parser.c revision 1f40d68043320c5e1b1c72a74e3945df91094c0b
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <libxml/xmlmemory.h> 45#include <libxml/threads.h> 46#include <libxml/globals.h> 47#include <libxml/tree.h> 48#include <libxml/parser.h> 49#include <libxml/parserInternals.h> 50#include <libxml/valid.h> 51#include <libxml/entities.h> 52#include <libxml/xmlerror.h> 53#include <libxml/encoding.h> 54#include <libxml/xmlIO.h> 55#include <libxml/uri.h> 56#ifdef LIBXML_CATALOG_ENABLED 57#include <libxml/catalog.h> 58#endif 59 60#ifdef HAVE_CTYPE_H 61#include <ctype.h> 62#endif 63#ifdef HAVE_STDLIB_H 64#include <stdlib.h> 65#endif 66#ifdef HAVE_SYS_STAT_H 67#include <sys/stat.h> 68#endif 69#ifdef HAVE_FCNTL_H 70#include <fcntl.h> 71#endif 72#ifdef HAVE_UNISTD_H 73#include <unistd.h> 74#endif 75#ifdef HAVE_ZLIB_H 76#include <zlib.h> 77#endif 78 79/** 80 * MAX_DEPTH: 81 * 82 * arbitrary depth limit for the XML documents that we allow to 83 * process. This is not a limitation of the parser but a safety 84 * boundary feature. 85 */ 86#define MAX_DEPTH 1024 87 88#define SAX2 1 89 90#define XML_PARSER_BIG_BUFFER_SIZE 300 91#define XML_PARSER_BUFFER_SIZE 100 92 93#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 94 95/* 96 * List of XML prefixed PI allowed by W3C specs 97 */ 98 99static const char *xmlW3CPIs[] = { 100 "xml-stylesheet", 101 NULL 102}; 103 104/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 105xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 106 const xmlChar **str); 107 108static int 109xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 110 xmlSAXHandlerPtr sax, 111 void *user_data, int depth, const xmlChar *URL, 112 const xmlChar *ID, xmlNodePtr *list); 113 114static void 115xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 116 xmlNodePtr lastNode); 117 118static int 119xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 120 const xmlChar *string, void *user_data, xmlNodePtr *lst); 121 122/************************************************************************ 123 * * 124 * Some factorized error routines * 125 * * 126 ************************************************************************/ 127 128/** 129 * xmlErrMemory: 130 * @ctxt: an XML parser context 131 * @extra: extra informations 132 * 133 * Handle a redefinition of attribute error 134 */ 135static void 136xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 137{ 138 if (ctxt != NULL) { 139 ctxt->errNo = XML_ERR_NO_MEMORY; 140 ctxt->instate = XML_PARSER_EOF; 141 ctxt->disableSAX = 1; 142 } 143 if ((ctxt != NULL) && (ctxt->sax != NULL) 144 && (ctxt->sax->error != NULL)) { 145 if (extra) 146 ctxt->sax->error(ctxt->userData, 147 "Memory allocation failed : %s\n", extra); 148 else 149 ctxt->sax->error(ctxt->userData, 150 "Memory allocation failed !\n"); 151 } else { 152 if (extra) 153 xmlGenericError(xmlGenericErrorContext, 154 "Memory allocation failed : %s\n", extra); 155 else 156 xmlGenericError(xmlGenericErrorContext, 157 "Memory allocation failed !\n"); 158 } 159} 160 161/** 162 * xmlErrAttributeDup: 163 * @ctxt: an XML parser context 164 * @prefix: the attribute prefix 165 * @localname: the attribute localname 166 * 167 * Handle a redefinition of attribute error 168 */ 169static void 170xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 171 const xmlChar * localname) 172{ 173 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 175 if (prefix == NULL) 176 ctxt->sax->error(ctxt->userData, 177 "Attribute %s redefined\n", localname); 178 else 179 ctxt->sax->error(ctxt->userData, 180 "Attribute %s:%s redefined\n", prefix, 181 localname); 182 } 183 ctxt->wellFormed = 0; 184 if (ctxt->recovery == 0) 185 ctxt->disableSAX = 1; 186} 187 188/** 189 * xmlFatalErr: 190 * @ctxt: an XML parser context 191 * @error: the error number 192 * @extra: extra information string 193 * 194 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 195 */ 196static void 197xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char * info) 198{ 199 const char *errmsg; 200 201 if (ctxt == NULL) { 202 xmlGenericError(xmlGenericErrorContext, 203 "xmlFatalErr: no context !\n"); 204 return; 205 } 206 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL)) 207 return; 208 switch (error) { 209 case XML_ERR_INVALID_HEX_CHARREF: 210 errmsg = "CharRef: invalid hexadecimal value\n"; 211 break; 212 case XML_ERR_INVALID_DEC_CHARREF: 213 errmsg = "CharRef: invalid decimal value\n"; 214 break; 215 case XML_ERR_INVALID_CHARREF: 216 errmsg = "CharRef: invalid value\n"; 217 break; 218 case XML_ERR_INTERNAL_ERROR: 219 errmsg = "internal error"; 220 break; 221 case XML_ERR_PEREF_AT_EOF: 222 errmsg = "PEReference at end of document\n"; 223 break; 224 case XML_ERR_PEREF_IN_PROLOG: 225 errmsg = "PEReference in prolog\n"; 226 break; 227 case XML_ERR_PEREF_IN_EPILOG: 228 errmsg = "PEReference in epilog\n"; 229 break; 230 case XML_ERR_PEREF_NO_NAME: 231 errmsg = "PEReference: no name\n"; 232 break; 233 case XML_ERR_PEREF_SEMICOL_MISSING: 234 errmsg = "PEReference: expecting ';'\n"; 235 break; 236 case XML_ERR_ENTITY_LOOP: 237 errmsg = "Detected an entity reference loop\n"; 238 break; 239 case XML_ERR_ENTITY_NOT_STARTED: 240 errmsg = "EntityValue: \" or ' expected\n"; 241 break; 242 case XML_ERR_ENTITY_PE_INTERNAL: 243 errmsg = "PEReferences forbidden in internal subset\n"; 244 break; 245 case XML_ERR_ENTITY_NOT_FINISHED: 246 errmsg = "EntityValue: \" or ' expected\n"; 247 break; 248 case XML_ERR_ATTRIBUTE_NOT_STARTED: 249 errmsg = "AttValue: \" or ' expected\n"; 250 break; 251 case XML_ERR_LT_IN_ATTRIBUTE: 252 errmsg = "Unescaped '<' not allowed in attributes values\n"; 253 break; 254 case XML_ERR_LITERAL_NOT_STARTED: 255 errmsg = "SystemLiteral \" or ' expected\n"; 256 break; 257 case XML_ERR_LITERAL_NOT_FINISHED: 258 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 259 break; 260 case XML_ERR_MISPLACED_CDATA_END: 261 errmsg = "Sequence ']]>' not allowed in content\n"; 262 break; 263 case XML_ERR_URI_REQUIRED: 264 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 265 break; 266 case XML_ERR_PUBID_REQUIRED: 267 errmsg = "PUBLIC, the Public Identifier is missing\n"; 268 break; 269 case XML_ERR_HYPHEN_IN_COMMENT: 270 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 271 break; 272 case XML_ERR_PI_NOT_STARTED: 273 errmsg = "xmlParsePI : no target name\n"; 274 break; 275 case XML_ERR_RESERVED_XML_NAME: 276 errmsg = "Invalid PI name\n"; 277 break; 278 case XML_ERR_NOTATION_NOT_STARTED: 279 errmsg = "NOTATION: Name expected here\n"; 280 break; 281 case XML_ERR_NOTATION_NOT_FINISHED: 282 errmsg = "'>' required to close NOTATION declaration\n"; 283 break; 284 case XML_ERR_VALUE_REQUIRED: 285 errmsg = "Entity value required\n"; 286 break; 287 case XML_ERR_URI_FRAGMENT: 288 errmsg = "Fragment not allowed"; 289 break; 290 case XML_ERR_ATTLIST_NOT_STARTED: 291 errmsg = "'(' required to start ATTLIST enumeration\n"; 292 break; 293 case XML_ERR_NMTOKEN_REQUIRED: 294 errmsg = "NmToken expected in ATTLIST enumeration\n"; 295 break; 296 case XML_ERR_ATTLIST_NOT_FINISHED: 297 errmsg = "')' required to finish ATTLIST enumeration\n"; 298 break; 299 case XML_ERR_MIXED_NOT_STARTED: 300 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 301 break; 302 case XML_ERR_PCDATA_REQUIRED: 303 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 304 break; 305 case XML_ERR_ELEMCONTENT_NOT_STARTED: 306 errmsg = "ContentDecl : Name or '(' expected\n"; 307 break; 308 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 309 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 310 break; 311 case XML_ERR_PEREF_IN_INT_SUBSET: 312 errmsg = "PEReference: forbidden within markup decl in internal subset\n"; 313 break; 314 case XML_ERR_GT_REQUIRED: 315 errmsg = "expected '>'\n"; 316 break; 317 case XML_ERR_CONDSEC_INVALID: 318 errmsg = "XML conditional section '[' expected\n"; 319 break; 320 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 321 errmsg = "Content error in the external subset\n"; 322 break; 323 case XML_ERR_CONDSEC_INVALID_KEYWORD: 324 errmsg = "conditional section INCLUDE or IGNORE keyword expected\n"; 325 break; 326 case XML_ERR_CONDSEC_NOT_FINISHED: 327 errmsg = "XML conditional section not closed\n"; 328 break; 329 case XML_ERR_XMLDECL_NOT_STARTED: 330 errmsg = "Text declaration '<?xml' required\n"; 331 break; 332 case XML_ERR_XMLDECL_NOT_FINISHED: 333 errmsg = "parsing XML declaration: '?>' expected\n"; 334 break; 335 case XML_ERR_EXT_ENTITY_STANDALONE: 336 errmsg = "external parsed entities cannot be standalone\n"; 337 break; 338 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 339 errmsg = "EntityRef: expecting ';'\n"; 340 break; 341 case XML_ERR_DOCTYPE_NOT_FINISHED: 342 errmsg = "DOCTYPE improperly terminated\n"; 343 break; 344 case XML_ERR_LTSLASH_REQUIRED: 345 errmsg = "EndTag: '</' not found\n"; 346 break; 347 case XML_ERR_EQUAL_REQUIRED: 348 errmsg = "expected '='\n"; 349 break; 350 case XML_ERR_STRING_NOT_CLOSED: 351 errmsg = "String not closed expecting \" or '\n"; 352 break; 353 case XML_ERR_STRING_NOT_STARTED: 354 errmsg = "String not started expecting ' or \"\n"; 355 break; 356 case XML_ERR_ENCODING_NAME: 357 errmsg = "Invalid XML encoding name\n"; 358 break; 359 case XML_ERR_STANDALONE_VALUE: 360 errmsg = "standalone accepts only 'yes' or 'no'\n"; 361 break; 362 case XML_ERR_DOCUMENT_EMPTY: 363 errmsg = "Document is empty\n"; 364 break; 365 case XML_ERR_DOCUMENT_END: 366 errmsg = "Extra content at the end of the document\n"; 367 break; 368 case XML_ERR_NOT_WELL_BALANCED: 369 errmsg = "chunk is not well balanced\n"; 370 break; 371 case XML_ERR_EXTRA_CONTENT: 372 errmsg = "extra content at the end of well balanced chunk\n"; 373 break; 374 case XML_ERR_VERSION_MISSING: 375 errmsg = "Malformed declaration expecting version\n"; 376 break; 377#if 0 378 case : 379 errmsg = "\n"; 380 break; 381#endif 382 default: 383 errmsg = "Unregistered error message\n"; 384 } 385 ctxt->errNo = error; 386 if (info == NULL) { 387 ctxt->sax->error(ctxt->userData, errmsg); 388 } else { 389 ctxt->sax->error(ctxt->userData, "%s: %s", errmsg, info); 390 } 391 ctxt->wellFormed = 0; 392 if (ctxt->recovery == 0) 393 ctxt->disableSAX = 1; 394} 395 396/** 397 * xmlFatalErrMsg: 398 * @ctxt: an XML parser context 399 * @error: the error number 400 * @msg: the error message 401 * 402 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 403 */ 404static void 405xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg) 406{ 407 if (ctxt == NULL) { 408 xmlGenericError(xmlGenericErrorContext, 409 "xmlFatalErr: no context !\n"); 410 return; 411 } 412 ctxt->errNo = error; 413 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL)) 414 return; 415 ctxt->sax->error(ctxt->userData, msg); 416 ctxt->wellFormed = 0; 417 if (ctxt->recovery == 0) 418 ctxt->disableSAX = 1; 419} 420 421/** 422 * xmlFatalErrMsgInt: 423 * @ctxt: an XML parser context 424 * @error: the error number 425 * @msg: the error message 426 * @val: an integer value 427 * 428 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 429 */ 430static void 431xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 432 const char *msg, int val) 433{ 434 if (ctxt == NULL) { 435 xmlGenericError(xmlGenericErrorContext, 436 "xmlFatalErr: no context !\n"); 437 return; 438 } 439 ctxt->errNo = error; 440 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL)) 441 return; 442 ctxt->sax->error(ctxt->userData, msg, val); 443 ctxt->wellFormed = 0; 444 if (ctxt->recovery == 0) 445 ctxt->disableSAX = 1; 446} 447 448/** 449 * xmlFatalErrMsgStr: 450 * @ctxt: an XML parser context 451 * @error: the error number 452 * @msg: the error message 453 * @val: a string value 454 * 455 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 456 */ 457static void 458xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 459 const char *msg, const xmlChar *val) 460{ 461 if (ctxt == NULL) { 462 xmlGenericError(xmlGenericErrorContext, 463 "xmlFatalErr: no context !\n"); 464 return; 465 } 466 ctxt->errNo = error; 467 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL)) 468 return; 469 ctxt->sax->error(ctxt->userData, msg, val); 470 ctxt->wellFormed = 0; 471 if (ctxt->recovery == 0) 472 ctxt->disableSAX = 1; 473} 474 475/** 476 * xmlNsErr: 477 * @ctxt: an XML parser context 478 * @error: the error number 479 * @msg: the message 480 * @info1: extra information string 481 * @info2: extra information string 482 * 483 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 484 */ 485static void 486xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 487 const char *msg, 488 const xmlChar *info1, const xmlChar *info2, const xmlChar *info3) 489{ 490 if (ctxt == NULL) 491 return; 492 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL)) 493 return; 494 495 ctxt->errNo = error; 496 if (info1 == NULL) { 497 ctxt->sax->error(ctxt->userData, msg); 498 } else if (info2 == NULL) { 499 ctxt->sax->error(ctxt->userData, msg, info1); 500 } else if (info3 == NULL) { 501 ctxt->sax->error(ctxt->userData, msg, info1, info2); 502 } else { 503 ctxt->sax->error(ctxt->userData, msg, info1, info2, info3); 504 } 505 ctxt->nsWellFormed = 0; 506} 507 508/************************************************************************ 509 * * 510 * SAX2 defaulted attributes handling * 511 * * 512 ************************************************************************/ 513 514/** 515 * xmlDetectSAX2: 516 * @ctxt: an XML parser context 517 * 518 * Do the SAX2 detection and specific intialization 519 */ 520static void 521xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 522 if (ctxt == NULL) return; 523 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 524 ((ctxt->sax->startElementNs != NULL) || 525 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 526 527 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 528 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 529 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 530} 531 532#ifdef SAX2 533typedef struct _xmlDefAttrs xmlDefAttrs; 534typedef xmlDefAttrs *xmlDefAttrsPtr; 535struct _xmlDefAttrs { 536 int nbAttrs; /* number of defaulted attributes on that element */ 537 int maxAttrs; /* the size of the array */ 538 const xmlChar *values[4]; /* array of localname/prefix/values */ 539}; 540#endif 541 542/** 543 * xmlAddDefAttrs: 544 * @ctxt: an XML parser context 545 * @fullname: the element fullname 546 * @fullattr: the attribute fullname 547 * @value: the attribute value 548 * 549 * Add a defaulted attribute for an element 550 */ 551static void 552xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 553 const xmlChar *fullname, 554 const xmlChar *fullattr, 555 const xmlChar *value) { 556 xmlDefAttrsPtr defaults; 557 int len; 558 const xmlChar *name; 559 const xmlChar *prefix; 560 561 if (ctxt->attsDefault == NULL) { 562 ctxt->attsDefault = xmlHashCreate(10); 563 if (ctxt->attsDefault == NULL) 564 goto mem_error; 565 } 566 567 /* 568 * plit the element name into prefix:localname , the string found 569 * are within the DTD and hen not associated to namespace names. 570 */ 571 name = xmlSplitQName3(fullname, &len); 572 if (name == NULL) { 573 name = xmlDictLookup(ctxt->dict, fullname, -1); 574 prefix = NULL; 575 } else { 576 name = xmlDictLookup(ctxt->dict, name, -1); 577 prefix = xmlDictLookup(ctxt->dict, fullname, len); 578 } 579 580 /* 581 * make sure there is some storage 582 */ 583 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 584 if (defaults == NULL) { 585 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 586 12 * sizeof(const xmlChar *)); 587 if (defaults == NULL) 588 goto mem_error; 589 defaults->maxAttrs = 4; 590 defaults->nbAttrs = 0; 591 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 592 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 593 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 594 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 595 if (defaults == NULL) 596 goto mem_error; 597 defaults->maxAttrs *= 2; 598 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 599 } 600 601 /* 602 * plit the element name into prefix:localname , the string found 603 * are within the DTD and hen not associated to namespace names. 604 */ 605 name = xmlSplitQName3(fullattr, &len); 606 if (name == NULL) { 607 name = xmlDictLookup(ctxt->dict, fullattr, -1); 608 prefix = NULL; 609 } else { 610 name = xmlDictLookup(ctxt->dict, name, -1); 611 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 612 } 613 614 defaults->values[4 * defaults->nbAttrs] = name; 615 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 616 /* intern the string and precompute the end */ 617 len = xmlStrlen(value); 618 value = xmlDictLookup(ctxt->dict, value, len); 619 defaults->values[4 * defaults->nbAttrs + 2] = value; 620 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 621 defaults->nbAttrs++; 622 623 return; 624 625mem_error: 626 xmlErrMemory(ctxt, NULL); 627 return; 628} 629 630/** 631 * xmlAddSpecialAttr: 632 * @ctxt: an XML parser context 633 * @fullname: the element fullname 634 * @fullattr: the attribute fullname 635 * @type: the attribute type 636 * 637 * Register that this attribute is not CDATA 638 */ 639static void 640xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 641 const xmlChar *fullname, 642 const xmlChar *fullattr, 643 int type) 644{ 645 if (ctxt->attsSpecial == NULL) { 646 ctxt->attsSpecial = xmlHashCreate(10); 647 if (ctxt->attsSpecial == NULL) 648 goto mem_error; 649 } 650 651 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 652 (void *) (long) type); 653 return; 654 655mem_error: 656 xmlErrMemory(ctxt, NULL); 657 return; 658} 659 660/************************************************************************ 661 * * 662 * Parser stacks related functions and macros * 663 * * 664 ************************************************************************/ 665 666xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 667 const xmlChar ** str); 668 669#ifdef SAX2 670/** 671 * nsPush: 672 * @ctxt: an XML parser context 673 * @prefix: the namespace prefix or NULL 674 * @URL: the namespace name 675 * 676 * Pushes a new parser namespace on top of the ns stack 677 * 678 * Returns -1 in case of error, the index in the stack otherwise 679 */ 680static int 681nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 682{ 683 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 684 ctxt->nsMax = 10; 685 ctxt->nsNr = 0; 686 ctxt->nsTab = (const xmlChar **) 687 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 688 if (ctxt->nsTab == NULL) { 689 xmlErrMemory(ctxt, NULL); 690 ctxt->nsMax = 0; 691 return (-1); 692 } 693 } else if (ctxt->nsNr >= ctxt->nsMax) { 694 ctxt->nsMax *= 2; 695 ctxt->nsTab = (const xmlChar **) 696 xmlRealloc(ctxt->nsTab, 697 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 698 if (ctxt->nsTab == NULL) { 699 xmlErrMemory(ctxt, NULL); 700 ctxt->nsMax /= 2; 701 return (-1); 702 } 703 } 704 ctxt->nsTab[ctxt->nsNr++] = prefix; 705 ctxt->nsTab[ctxt->nsNr++] = URL; 706 return (ctxt->nsNr); 707} 708/** 709 * nsPop: 710 * @ctxt: an XML parser context 711 * @nr: the number to pop 712 * 713 * Pops the top @nr parser prefix/namespace from the ns stack 714 * 715 * Returns the number of namespaces removed 716 */ 717static int 718nsPop(xmlParserCtxtPtr ctxt, int nr) 719{ 720 int i; 721 722 if (ctxt->nsTab == NULL) return(0); 723 if (ctxt->nsNr < nr) { 724 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 725 nr = ctxt->nsNr; 726 } 727 if (ctxt->nsNr <= 0) 728 return (0); 729 730 for (i = 0;i < nr;i++) { 731 ctxt->nsNr--; 732 ctxt->nsTab[ctxt->nsNr] = NULL; 733 } 734 return(nr); 735} 736#endif 737 738static int 739xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 740 const xmlChar **atts; 741 int *attallocs; 742 int maxatts; 743 744 if (ctxt->atts == NULL) { 745 maxatts = 55; /* allow for 10 attrs by default */ 746 atts = (const xmlChar **) 747 xmlMalloc(maxatts * sizeof(xmlChar *)); 748 if (atts == NULL) goto mem_error; 749 ctxt->atts = atts; 750 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 751 if (attallocs == NULL) goto mem_error; 752 ctxt->attallocs = attallocs; 753 ctxt->maxatts = maxatts; 754 } else if (nr + 5 > ctxt->maxatts) { 755 maxatts = (nr + 5) * 2; 756 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 757 maxatts * sizeof(const xmlChar *)); 758 if (atts == NULL) goto mem_error; 759 ctxt->atts = atts; 760 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 761 (maxatts / 5) * sizeof(int)); 762 if (attallocs == NULL) goto mem_error; 763 ctxt->attallocs = attallocs; 764 ctxt->maxatts = maxatts; 765 } 766 return(ctxt->maxatts); 767mem_error: 768 xmlErrMemory(ctxt, NULL); 769 return(-1); 770} 771 772/** 773 * inputPush: 774 * @ctxt: an XML parser context 775 * @value: the parser input 776 * 777 * Pushes a new parser input on top of the input stack 778 * 779 * Returns 0 in case of error, the index in the stack otherwise 780 */ 781extern int 782inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 783{ 784 if (ctxt->inputNr >= ctxt->inputMax) { 785 ctxt->inputMax *= 2; 786 ctxt->inputTab = 787 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 788 ctxt->inputMax * 789 sizeof(ctxt->inputTab[0])); 790 if (ctxt->inputTab == NULL) { 791 xmlErrMemory(ctxt, NULL); 792 return (0); 793 } 794 } 795 ctxt->inputTab[ctxt->inputNr] = value; 796 ctxt->input = value; 797 return (ctxt->inputNr++); 798} 799/** 800 * inputPop: 801 * @ctxt: an XML parser context 802 * 803 * Pops the top parser input from the input stack 804 * 805 * Returns the input just removed 806 */ 807extern xmlParserInputPtr 808inputPop(xmlParserCtxtPtr ctxt) 809{ 810 xmlParserInputPtr ret; 811 812 if (ctxt->inputNr <= 0) 813 return (0); 814 ctxt->inputNr--; 815 if (ctxt->inputNr > 0) 816 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 817 else 818 ctxt->input = NULL; 819 ret = ctxt->inputTab[ctxt->inputNr]; 820 ctxt->inputTab[ctxt->inputNr] = 0; 821 return (ret); 822} 823/** 824 * nodePush: 825 * @ctxt: an XML parser context 826 * @value: the element node 827 * 828 * Pushes a new element node on top of the node stack 829 * 830 * Returns 0 in case of error, the index in the stack otherwise 831 */ 832extern int 833nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 834{ 835 if (ctxt->nodeNr >= ctxt->nodeMax) { 836 ctxt->nodeMax *= 2; 837 ctxt->nodeTab = 838 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 839 ctxt->nodeMax * 840 sizeof(ctxt->nodeTab[0])); 841 if (ctxt->nodeTab == NULL) { 842 xmlErrMemory(ctxt, NULL); 843 return (0); 844 } 845 } 846#ifdef MAX_DEPTH 847 if (ctxt->nodeNr > MAX_DEPTH) { 848 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 849 "Excessive depth in document: change MAX_DEPTH = %d\n", 850 MAX_DEPTH); 851 ctxt->instate = XML_PARSER_EOF; 852 return(0); 853 } 854#endif 855 ctxt->nodeTab[ctxt->nodeNr] = value; 856 ctxt->node = value; 857 return (ctxt->nodeNr++); 858} 859/** 860 * nodePop: 861 * @ctxt: an XML parser context 862 * 863 * Pops the top element node from the node stack 864 * 865 * Returns the node just removed 866 */ 867extern xmlNodePtr 868nodePop(xmlParserCtxtPtr ctxt) 869{ 870 xmlNodePtr ret; 871 872 if (ctxt->nodeNr <= 0) 873 return (0); 874 ctxt->nodeNr--; 875 if (ctxt->nodeNr > 0) 876 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 877 else 878 ctxt->node = NULL; 879 ret = ctxt->nodeTab[ctxt->nodeNr]; 880 ctxt->nodeTab[ctxt->nodeNr] = 0; 881 return (ret); 882} 883/** 884 * nameNsPush: 885 * @ctxt: an XML parser context 886 * @value: the element name 887 * @prefix: the element prefix 888 * @URI: the element namespace name 889 * 890 * Pushes a new element name/prefix/URL on top of the name stack 891 * 892 * Returns -1 in case of error, the index in the stack otherwise 893 */ 894static int 895nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 896 const xmlChar *prefix, const xmlChar *URI, int nsNr) 897{ 898 if (ctxt->nameNr >= ctxt->nameMax) { 899 const xmlChar * *tmp; 900 void **tmp2; 901 ctxt->nameMax *= 2; 902 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 903 ctxt->nameMax * 904 sizeof(ctxt->nameTab[0])); 905 if (tmp == NULL) { 906 ctxt->nameMax /= 2; 907 goto mem_error; 908 } 909 ctxt->nameTab = tmp; 910 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 911 ctxt->nameMax * 3 * 912 sizeof(ctxt->pushTab[0])); 913 if (tmp2 == NULL) { 914 ctxt->nameMax /= 2; 915 goto mem_error; 916 } 917 ctxt->pushTab = tmp2; 918 } 919 ctxt->nameTab[ctxt->nameNr] = value; 920 ctxt->name = value; 921 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 922 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 923 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 924 return (ctxt->nameNr++); 925mem_error: 926 xmlErrMemory(ctxt, NULL); 927 return (-1); 928} 929/** 930 * nameNsPop: 931 * @ctxt: an XML parser context 932 * 933 * Pops the top element/prefix/URI name from the name stack 934 * 935 * Returns the name just removed 936 */ 937static const xmlChar * 938nameNsPop(xmlParserCtxtPtr ctxt) 939{ 940 const xmlChar *ret; 941 942 if (ctxt->nameNr <= 0) 943 return (0); 944 ctxt->nameNr--; 945 if (ctxt->nameNr > 0) 946 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 947 else 948 ctxt->name = NULL; 949 ret = ctxt->nameTab[ctxt->nameNr]; 950 ctxt->nameTab[ctxt->nameNr] = NULL; 951 return (ret); 952} 953 954/** 955 * namePush: 956 * @ctxt: an XML parser context 957 * @value: the element name 958 * 959 * Pushes a new element name on top of the name stack 960 * 961 * Returns -1 in case of error, the index in the stack otherwise 962 */ 963extern int 964namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 965{ 966 if (ctxt->nameNr >= ctxt->nameMax) { 967 const xmlChar * *tmp; 968 ctxt->nameMax *= 2; 969 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 970 ctxt->nameMax * 971 sizeof(ctxt->nameTab[0])); 972 if (tmp == NULL) { 973 ctxt->nameMax /= 2; 974 goto mem_error; 975 } 976 ctxt->nameTab = tmp; 977 } 978 ctxt->nameTab[ctxt->nameNr] = value; 979 ctxt->name = value; 980 return (ctxt->nameNr++); 981mem_error: 982 xmlErrMemory(ctxt, NULL); 983 return (-1); 984} 985/** 986 * namePop: 987 * @ctxt: an XML parser context 988 * 989 * Pops the top element name from the name stack 990 * 991 * Returns the name just removed 992 */ 993extern const xmlChar * 994namePop(xmlParserCtxtPtr ctxt) 995{ 996 const xmlChar *ret; 997 998 if (ctxt->nameNr <= 0) 999 return (0); 1000 ctxt->nameNr--; 1001 if (ctxt->nameNr > 0) 1002 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1003 else 1004 ctxt->name = NULL; 1005 ret = ctxt->nameTab[ctxt->nameNr]; 1006 ctxt->nameTab[ctxt->nameNr] = 0; 1007 return (ret); 1008} 1009 1010static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1011 if (ctxt->spaceNr >= ctxt->spaceMax) { 1012 ctxt->spaceMax *= 2; 1013 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 1014 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1015 if (ctxt->spaceTab == NULL) { 1016 xmlErrMemory(ctxt, NULL); 1017 return(0); 1018 } 1019 } 1020 ctxt->spaceTab[ctxt->spaceNr] = val; 1021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1022 return(ctxt->spaceNr++); 1023} 1024 1025static int spacePop(xmlParserCtxtPtr ctxt) { 1026 int ret; 1027 if (ctxt->spaceNr <= 0) return(0); 1028 ctxt->spaceNr--; 1029 if (ctxt->spaceNr > 0) 1030 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1031 else 1032 ctxt->space = NULL; 1033 ret = ctxt->spaceTab[ctxt->spaceNr]; 1034 ctxt->spaceTab[ctxt->spaceNr] = -1; 1035 return(ret); 1036} 1037 1038/* 1039 * Macros for accessing the content. Those should be used only by the parser, 1040 * and not exported. 1041 * 1042 * Dirty macros, i.e. one often need to make assumption on the context to 1043 * use them 1044 * 1045 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1046 * To be used with extreme caution since operations consuming 1047 * characters may move the input buffer to a different location ! 1048 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1049 * This should be used internally by the parser 1050 * only to compare to ASCII values otherwise it would break when 1051 * running with UTF-8 encoding. 1052 * RAW same as CUR but in the input buffer, bypass any token 1053 * extraction that may have been done 1054 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1055 * to compare on ASCII based substring. 1056 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1057 * strings without newlines within the parser. 1058 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1059 * defined char within the parser. 1060 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1061 * 1062 * NEXT Skip to the next character, this does the proper decoding 1063 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1064 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1065 * CUR_CHAR(l) returns the current unicode character (int), set l 1066 * to the number of xmlChars used for the encoding [0-5]. 1067 * CUR_SCHAR same but operate on a string instead of the context 1068 * COPY_BUF copy the current unicode char to the target buffer, increment 1069 * the index 1070 * GROW, SHRINK handling of input buffers 1071 */ 1072 1073#define RAW (*ctxt->input->cur) 1074#define CUR (*ctxt->input->cur) 1075#define NXT(val) ctxt->input->cur[(val)] 1076#define CUR_PTR ctxt->input->cur 1077 1078#define SKIP(val) do { \ 1079 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1080 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1081 if ((*ctxt->input->cur == 0) && \ 1082 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1083 xmlPopInput(ctxt); \ 1084 } while (0) 1085 1086#define SHRINK if ((ctxt->progressive == 0) && \ 1087 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1088 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1089 xmlSHRINK (ctxt); 1090 1091static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1092 xmlParserInputShrink(ctxt->input); 1093 if ((*ctxt->input->cur == 0) && 1094 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1095 xmlPopInput(ctxt); 1096 } 1097 1098#define GROW if ((ctxt->progressive == 0) && \ 1099 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1100 xmlGROW (ctxt); 1101 1102static void xmlGROW (xmlParserCtxtPtr ctxt) { 1103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1104 if ((*ctxt->input->cur == 0) && 1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1106 xmlPopInput(ctxt); 1107} 1108 1109#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1110 1111#define NEXT xmlNextChar(ctxt) 1112 1113#define NEXT1 { \ 1114 ctxt->input->col++; \ 1115 ctxt->input->cur++; \ 1116 ctxt->nbChars++; \ 1117 if (*ctxt->input->cur == 0) \ 1118 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1119 } 1120 1121#define NEXTL(l) do { \ 1122 if (*(ctxt->input->cur) == '\n') { \ 1123 ctxt->input->line++; ctxt->input->col = 1; \ 1124 } else ctxt->input->col++; \ 1125 ctxt->input->cur += l; \ 1126 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1127 } while (0) 1128 1129#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1130#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1131 1132#define COPY_BUF(l,b,i,v) \ 1133 if (l == 1) b[i++] = (xmlChar) v; \ 1134 else i += xmlCopyCharMultiByte(&b[i],v) 1135 1136/** 1137 * xmlSkipBlankChars: 1138 * @ctxt: the XML parser context 1139 * 1140 * skip all blanks character found at that point in the input streams. 1141 * It pops up finished entities in the process if allowable at that point. 1142 * 1143 * Returns the number of space chars skipped 1144 */ 1145 1146int 1147xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1148 int res = 0; 1149 1150 /* 1151 * It's Okay to use CUR/NEXT here since all the blanks are on 1152 * the ASCII range. 1153 */ 1154 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1155 const xmlChar *cur; 1156 /* 1157 * if we are in the document content, go really fast 1158 */ 1159 cur = ctxt->input->cur; 1160 while (IS_BLANK(*cur)) { 1161 if (*cur == '\n') { 1162 ctxt->input->line++; ctxt->input->col = 1; 1163 } 1164 cur++; 1165 res++; 1166 if (*cur == 0) { 1167 ctxt->input->cur = cur; 1168 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1169 cur = ctxt->input->cur; 1170 } 1171 } 1172 ctxt->input->cur = cur; 1173 } else { 1174 int cur; 1175 do { 1176 cur = CUR; 1177 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 1178 NEXT; 1179 cur = CUR; 1180 res++; 1181 } 1182 while ((cur == 0) && (ctxt->inputNr > 1) && 1183 (ctxt->instate != XML_PARSER_COMMENT)) { 1184 xmlPopInput(ctxt); 1185 cur = CUR; 1186 } 1187 /* 1188 * Need to handle support of entities branching here 1189 */ 1190 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1191 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1192 } 1193 return(res); 1194} 1195 1196/************************************************************************ 1197 * * 1198 * Commodity functions to handle entities * 1199 * * 1200 ************************************************************************/ 1201 1202/** 1203 * xmlPopInput: 1204 * @ctxt: an XML parser context 1205 * 1206 * xmlPopInput: the current input pointed by ctxt->input came to an end 1207 * pop it and return the next char. 1208 * 1209 * Returns the current xmlChar in the parser context 1210 */ 1211xmlChar 1212xmlPopInput(xmlParserCtxtPtr ctxt) { 1213 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 1214 if (xmlParserDebugEntities) 1215 xmlGenericError(xmlGenericErrorContext, 1216 "Popping input %d\n", ctxt->inputNr); 1217 xmlFreeInputStream(inputPop(ctxt)); 1218 if ((*ctxt->input->cur == 0) && 1219 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1220 return(xmlPopInput(ctxt)); 1221 return(CUR); 1222} 1223 1224/** 1225 * xmlPushInput: 1226 * @ctxt: an XML parser context 1227 * @input: an XML parser input fragment (entity, XML fragment ...). 1228 * 1229 * xmlPushInput: switch to a new input stream which is stacked on top 1230 * of the previous one(s). 1231 */ 1232void 1233xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1234 if (input == NULL) return; 1235 1236 if (xmlParserDebugEntities) { 1237 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1238 xmlGenericError(xmlGenericErrorContext, 1239 "%s(%d): ", ctxt->input->filename, 1240 ctxt->input->line); 1241 xmlGenericError(xmlGenericErrorContext, 1242 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1243 } 1244 inputPush(ctxt, input); 1245 GROW; 1246} 1247 1248/** 1249 * xmlParseCharRef: 1250 * @ctxt: an XML parser context 1251 * 1252 * parse Reference declarations 1253 * 1254 * [66] CharRef ::= '&#' [0-9]+ ';' | 1255 * '&#x' [0-9a-fA-F]+ ';' 1256 * 1257 * [ WFC: Legal Character ] 1258 * Characters referred to using character references must match the 1259 * production for Char. 1260 * 1261 * Returns the value parsed (as an int), 0 in case of error 1262 */ 1263int 1264xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1265 unsigned int val = 0; 1266 int count = 0; 1267 1268 /* 1269 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1270 */ 1271 if ((RAW == '&') && (NXT(1) == '#') && 1272 (NXT(2) == 'x')) { 1273 SKIP(3); 1274 GROW; 1275 while (RAW != ';') { /* loop blocked by count */ 1276 if (count++ > 20) { 1277 count = 0; 1278 GROW; 1279 } 1280 if ((RAW >= '0') && (RAW <= '9')) 1281 val = val * 16 + (CUR - '0'); 1282 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1283 val = val * 16 + (CUR - 'a') + 10; 1284 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1285 val = val * 16 + (CUR - 'A') + 10; 1286 else { 1287 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1288 val = 0; 1289 break; 1290 } 1291 NEXT; 1292 count++; 1293 } 1294 if (RAW == ';') { 1295 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1296 ctxt->input->col++; 1297 ctxt->nbChars ++; 1298 ctxt->input->cur++; 1299 } 1300 } else if ((RAW == '&') && (NXT(1) == '#')) { 1301 SKIP(2); 1302 GROW; 1303 while (RAW != ';') { /* loop blocked by count */ 1304 if (count++ > 20) { 1305 count = 0; 1306 GROW; 1307 } 1308 if ((RAW >= '0') && (RAW <= '9')) 1309 val = val * 10 + (CUR - '0'); 1310 else { 1311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1312 val = 0; 1313 break; 1314 } 1315 NEXT; 1316 count++; 1317 } 1318 if (RAW == ';') { 1319 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1320 ctxt->input->col++; 1321 ctxt->nbChars ++; 1322 ctxt->input->cur++; 1323 } 1324 } else { 1325 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1326 } 1327 1328 /* 1329 * [ WFC: Legal Character ] 1330 * Characters referred to using character references must match the 1331 * production for Char. 1332 */ 1333 if (IS_CHAR(val)) { 1334 return(val); 1335 } else { 1336 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1337 "xmlParseCharRef: invalid xmlChar value %d\n", 1338 val); 1339 } 1340 return(0); 1341} 1342 1343/** 1344 * xmlParseStringCharRef: 1345 * @ctxt: an XML parser context 1346 * @str: a pointer to an index in the string 1347 * 1348 * parse Reference declarations, variant parsing from a string rather 1349 * than an an input flow. 1350 * 1351 * [66] CharRef ::= '&#' [0-9]+ ';' | 1352 * '&#x' [0-9a-fA-F]+ ';' 1353 * 1354 * [ WFC: Legal Character ] 1355 * Characters referred to using character references must match the 1356 * production for Char. 1357 * 1358 * Returns the value parsed (as an int), 0 in case of error, str will be 1359 * updated to the current value of the index 1360 */ 1361static int 1362xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1363 const xmlChar *ptr; 1364 xmlChar cur; 1365 int val = 0; 1366 1367 if ((str == NULL) || (*str == NULL)) return(0); 1368 ptr = *str; 1369 cur = *ptr; 1370 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1371 ptr += 3; 1372 cur = *ptr; 1373 while (cur != ';') { /* Non input consuming loop */ 1374 if ((cur >= '0') && (cur <= '9')) 1375 val = val * 16 + (cur - '0'); 1376 else if ((cur >= 'a') && (cur <= 'f')) 1377 val = val * 16 + (cur - 'a') + 10; 1378 else if ((cur >= 'A') && (cur <= 'F')) 1379 val = val * 16 + (cur - 'A') + 10; 1380 else { 1381 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1382 val = 0; 1383 break; 1384 } 1385 ptr++; 1386 cur = *ptr; 1387 } 1388 if (cur == ';') 1389 ptr++; 1390 } else if ((cur == '&') && (ptr[1] == '#')){ 1391 ptr += 2; 1392 cur = *ptr; 1393 while (cur != ';') { /* Non input consuming loops */ 1394 if ((cur >= '0') && (cur <= '9')) 1395 val = val * 10 + (cur - '0'); 1396 else { 1397 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1398 val = 0; 1399 break; 1400 } 1401 ptr++; 1402 cur = *ptr; 1403 } 1404 if (cur == ';') 1405 ptr++; 1406 } else { 1407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1408 return(0); 1409 } 1410 *str = ptr; 1411 1412 /* 1413 * [ WFC: Legal Character ] 1414 * Characters referred to using character references must match the 1415 * production for Char. 1416 */ 1417 if (IS_CHAR(val)) { 1418 return(val); 1419 } else { 1420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1421 "xmlParseStringCharRef: invalid xmlChar value %d\n", 1422 val); 1423 } 1424 return(0); 1425} 1426 1427/** 1428 * xmlNewBlanksWrapperInputStream: 1429 * @ctxt: an XML parser context 1430 * @entity: an Entity pointer 1431 * 1432 * Create a new input stream for wrapping 1433 * blanks around a PEReference 1434 * 1435 * Returns the new input stream or NULL 1436 */ 1437 1438static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 1439 1440static xmlParserInputPtr 1441xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1442 xmlParserInputPtr input; 1443 xmlChar *buffer; 1444 size_t length; 1445 if (entity == NULL) { 1446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 1447 "xmlNewBlanksWrapperInputStream entity\n"); 1448 return(NULL); 1449 } 1450 if (xmlParserDebugEntities) 1451 xmlGenericError(xmlGenericErrorContext, 1452 "new blanks wrapper for entity: %s\n", entity->name); 1453 input = xmlNewInputStream(ctxt); 1454 if (input == NULL) { 1455 return(NULL); 1456 } 1457 length = xmlStrlen(entity->name) + 5; 1458 buffer = xmlMallocAtomic(length); 1459 if (buffer == NULL) { 1460 xmlErrMemory(ctxt, NULL); 1461 return(NULL); 1462 } 1463 buffer [0] = ' '; 1464 buffer [1] = '%'; 1465 buffer [length-3] = ';'; 1466 buffer [length-2] = ' '; 1467 buffer [length-1] = 0; 1468 memcpy(buffer + 2, entity->name, length - 5); 1469 input->free = deallocblankswrapper; 1470 input->base = buffer; 1471 input->cur = buffer; 1472 input->length = length; 1473 input->end = &buffer[length]; 1474 return(input); 1475} 1476 1477/** 1478 * xmlParserHandlePEReference: 1479 * @ctxt: the parser context 1480 * 1481 * [69] PEReference ::= '%' Name ';' 1482 * 1483 * [ WFC: No Recursion ] 1484 * A parsed entity must not contain a recursive 1485 * reference to itself, either directly or indirectly. 1486 * 1487 * [ WFC: Entity Declared ] 1488 * In a document without any DTD, a document with only an internal DTD 1489 * subset which contains no parameter entity references, or a document 1490 * with "standalone='yes'", ... ... The declaration of a parameter 1491 * entity must precede any reference to it... 1492 * 1493 * [ VC: Entity Declared ] 1494 * In a document with an external subset or external parameter entities 1495 * with "standalone='no'", ... ... The declaration of a parameter entity 1496 * must precede any reference to it... 1497 * 1498 * [ WFC: In DTD ] 1499 * Parameter-entity references may only appear in the DTD. 1500 * NOTE: misleading but this is handled. 1501 * 1502 * A PEReference may have been detected in the current input stream 1503 * the handling is done accordingly to 1504 * http://www.w3.org/TR/REC-xml#entproc 1505 * i.e. 1506 * - Included in literal in entity values 1507 * - Included as Parameter Entity reference within DTDs 1508 */ 1509void 1510xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 1511 const xmlChar *name; 1512 xmlEntityPtr entity = NULL; 1513 xmlParserInputPtr input; 1514 1515 if (RAW != '%') return; 1516 switch(ctxt->instate) { 1517 case XML_PARSER_CDATA_SECTION: 1518 return; 1519 case XML_PARSER_COMMENT: 1520 return; 1521 case XML_PARSER_START_TAG: 1522 return; 1523 case XML_PARSER_END_TAG: 1524 return; 1525 case XML_PARSER_EOF: 1526 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 1527 return; 1528 case XML_PARSER_PROLOG: 1529 case XML_PARSER_START: 1530 case XML_PARSER_MISC: 1531 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 1532 return; 1533 case XML_PARSER_ENTITY_DECL: 1534 case XML_PARSER_CONTENT: 1535 case XML_PARSER_ATTRIBUTE_VALUE: 1536 case XML_PARSER_PI: 1537 case XML_PARSER_SYSTEM_LITERAL: 1538 case XML_PARSER_PUBLIC_LITERAL: 1539 /* we just ignore it there */ 1540 return; 1541 case XML_PARSER_EPILOG: 1542 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 1543 return; 1544 case XML_PARSER_ENTITY_VALUE: 1545 /* 1546 * NOTE: in the case of entity values, we don't do the 1547 * substitution here since we need the literal 1548 * entity value to be able to save the internal 1549 * subset of the document. 1550 * This will be handled by xmlStringDecodeEntities 1551 */ 1552 return; 1553 case XML_PARSER_DTD: 1554 /* 1555 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 1556 * In the internal DTD subset, parameter-entity references 1557 * can occur only where markup declarations can occur, not 1558 * within markup declarations. 1559 * In that case this is handled in xmlParseMarkupDecl 1560 */ 1561 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 1562 return; 1563 if (IS_BLANK(NXT(1)) || NXT(1) == 0) 1564 return; 1565 break; 1566 case XML_PARSER_IGNORE: 1567 return; 1568 } 1569 1570 NEXT; 1571 name = xmlParseName(ctxt); 1572 if (xmlParserDebugEntities) 1573 xmlGenericError(xmlGenericErrorContext, 1574 "PEReference: %s\n", name); 1575 if (name == NULL) { 1576 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 1577 } else { 1578 if (RAW == ';') { 1579 NEXT; 1580 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 1581 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 1582 if (entity == NULL) { 1583 1584 /* 1585 * [ WFC: Entity Declared ] 1586 * In a document without any DTD, a document with only an 1587 * internal DTD subset which contains no parameter entity 1588 * references, or a document with "standalone='yes'", ... 1589 * ... The declaration of a parameter entity must precede 1590 * any reference to it... 1591 */ 1592 if ((ctxt->standalone == 1) || 1593 ((ctxt->hasExternalSubset == 0) && 1594 (ctxt->hasPErefs == 0))) { 1595 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 1596 "PEReference: %%%s; not found\n", name); 1597 } else { 1598 /* 1599 * [ VC: Entity Declared ] 1600 * In a document with an external subset or external 1601 * parameter entities with "standalone='no'", ... 1602 * ... The declaration of a parameter entity must precede 1603 * any reference to it... 1604 */ 1605 if ((!ctxt->disableSAX) && 1606 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 1607 ctxt->vctxt.error(ctxt->vctxt.userData, 1608 "PEReference: %%%s; not found\n", name); 1609 } else if ((!ctxt->disableSAX) && 1610 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 1611 ctxt->sax->warning(ctxt->userData, 1612 "PEReference: %%%s; not found\n", name); 1613 ctxt->valid = 0; 1614 } 1615 } else if (ctxt->input->free != deallocblankswrapper) { 1616 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 1617 xmlPushInput(ctxt, input); 1618 } else { 1619 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 1620 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 1621 xmlChar start[4]; 1622 xmlCharEncoding enc; 1623 1624 /* 1625 * handle the extra spaces added before and after 1626 * c.f. http://www.w3.org/TR/REC-xml#as-PE 1627 * this is done independently. 1628 */ 1629 input = xmlNewEntityInputStream(ctxt, entity); 1630 xmlPushInput(ctxt, input); 1631 1632 /* 1633 * Get the 4 first bytes and decode the charset 1634 * if enc != XML_CHAR_ENCODING_NONE 1635 * plug some encoding conversion routines. 1636 */ 1637 GROW 1638 if (entity->length >= 4) { 1639 start[0] = RAW; 1640 start[1] = NXT(1); 1641 start[2] = NXT(2); 1642 start[3] = NXT(3); 1643 enc = xmlDetectCharEncoding(start, 4); 1644 if (enc != XML_CHAR_ENCODING_NONE) { 1645 xmlSwitchEncoding(ctxt, enc); 1646 } 1647 } 1648 1649 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 1650 (RAW == '<') && (NXT(1) == '?') && 1651 (NXT(2) == 'x') && (NXT(3) == 'm') && 1652 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 1653 xmlParseTextDecl(ctxt); 1654 } 1655 } else { 1656 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 1657 "PEReference: %s is not a parameter entity\n", 1658 name); 1659 } 1660 } 1661 } else { 1662 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 1663 } 1664 } 1665} 1666 1667/* 1668 * Macro used to grow the current buffer. 1669 */ 1670#define growBuffer(buffer) { \ 1671 buffer##_size *= 2; \ 1672 buffer = (xmlChar *) \ 1673 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 1674 if (buffer == NULL) goto mem_error; \ 1675} 1676 1677/** 1678 * xmlStringDecodeEntities: 1679 * @ctxt: the parser context 1680 * @str: the input string 1681 * @len: the string length 1682 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1683 * @end: an end marker xmlChar, 0 if none 1684 * @end2: an end marker xmlChar, 0 if none 1685 * @end3: an end marker xmlChar, 0 if none 1686 * 1687 * Takes a entity string content and process to do the adequate substitutions. 1688 * 1689 * [67] Reference ::= EntityRef | CharRef 1690 * 1691 * [69] PEReference ::= '%' Name ';' 1692 * 1693 * Returns A newly allocated string with the substitution done. The caller 1694 * must deallocate it ! 1695 */ 1696xmlChar * 1697xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 1698 int what, xmlChar end, xmlChar end2, xmlChar end3) { 1699 xmlChar *buffer = NULL; 1700 int buffer_size = 0; 1701 1702 xmlChar *current = NULL; 1703 const xmlChar *last; 1704 xmlEntityPtr ent; 1705 int c,l; 1706 int nbchars = 0; 1707 1708 if ((str == NULL) || (len < 0)) 1709 return(NULL); 1710 last = str + len; 1711 1712 if (ctxt->depth > 40) { 1713 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 1714 return(NULL); 1715 } 1716 1717 /* 1718 * allocate a translation buffer. 1719 */ 1720 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1721 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 1722 if (buffer == NULL) goto mem_error; 1723 1724 /* 1725 * OK loop until we reach one of the ending char or a size limit. 1726 * we are operating on already parsed values. 1727 */ 1728 if (str < last) 1729 c = CUR_SCHAR(str, l); 1730 else 1731 c = 0; 1732 while ((c != 0) && (c != end) && /* non input consuming loop */ 1733 (c != end2) && (c != end3)) { 1734 1735 if (c == 0) break; 1736 if ((c == '&') && (str[1] == '#')) { 1737 int val = xmlParseStringCharRef(ctxt, &str); 1738 if (val != 0) { 1739 COPY_BUF(0,buffer,nbchars,val); 1740 } 1741 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 1742 if (xmlParserDebugEntities) 1743 xmlGenericError(xmlGenericErrorContext, 1744 "String decoding Entity Reference: %.30s\n", 1745 str); 1746 ent = xmlParseStringEntityRef(ctxt, &str); 1747 if ((ent != NULL) && 1748 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 1749 if (ent->content != NULL) { 1750 COPY_BUF(0,buffer,nbchars,ent->content[0]); 1751 } else { 1752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1753 ctxt->sax->error(ctxt->userData, 1754 "internal error entity has no content\n"); 1755 } 1756 } else if ((ent != NULL) && (ent->content != NULL)) { 1757 xmlChar *rep; 1758 1759 ctxt->depth++; 1760 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1761 0, 0, 0); 1762 ctxt->depth--; 1763 if (rep != NULL) { 1764 current = rep; 1765 while (*current != 0) { /* non input consuming loop */ 1766 buffer[nbchars++] = *current++; 1767 if (nbchars > 1768 buffer_size - XML_PARSER_BUFFER_SIZE) { 1769 growBuffer(buffer); 1770 } 1771 } 1772 xmlFree(rep); 1773 } 1774 } else if (ent != NULL) { 1775 int i = xmlStrlen(ent->name); 1776 const xmlChar *cur = ent->name; 1777 1778 buffer[nbchars++] = '&'; 1779 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1780 growBuffer(buffer); 1781 } 1782 for (;i > 0;i--) 1783 buffer[nbchars++] = *cur++; 1784 buffer[nbchars++] = ';'; 1785 } 1786 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1787 if (xmlParserDebugEntities) 1788 xmlGenericError(xmlGenericErrorContext, 1789 "String decoding PE Reference: %.30s\n", str); 1790 ent = xmlParseStringPEReference(ctxt, &str); 1791 if (ent != NULL) { 1792 xmlChar *rep; 1793 1794 ctxt->depth++; 1795 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1796 0, 0, 0); 1797 ctxt->depth--; 1798 if (rep != NULL) { 1799 current = rep; 1800 while (*current != 0) { /* non input consuming loop */ 1801 buffer[nbchars++] = *current++; 1802 if (nbchars > 1803 buffer_size - XML_PARSER_BUFFER_SIZE) { 1804 growBuffer(buffer); 1805 } 1806 } 1807 xmlFree(rep); 1808 } 1809 } 1810 } else { 1811 COPY_BUF(l,buffer,nbchars,c); 1812 str += l; 1813 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1814 growBuffer(buffer); 1815 } 1816 } 1817 if (str < last) 1818 c = CUR_SCHAR(str, l); 1819 else 1820 c = 0; 1821 } 1822 buffer[nbchars++] = 0; 1823 return(buffer); 1824 1825mem_error: 1826 xmlErrMemory(ctxt, NULL); 1827 return(NULL); 1828} 1829 1830/** 1831 * xmlStringDecodeEntities: 1832 * @ctxt: the parser context 1833 * @str: the input string 1834 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1835 * @end: an end marker xmlChar, 0 if none 1836 * @end2: an end marker xmlChar, 0 if none 1837 * @end3: an end marker xmlChar, 0 if none 1838 * 1839 * Takes a entity string content and process to do the adequate substitutions. 1840 * 1841 * [67] Reference ::= EntityRef | CharRef 1842 * 1843 * [69] PEReference ::= '%' Name ';' 1844 * 1845 * Returns A newly allocated string with the substitution done. The caller 1846 * must deallocate it ! 1847 */ 1848xmlChar * 1849xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 1850 xmlChar end, xmlChar end2, xmlChar end3) { 1851 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 1852 end, end2, end3)); 1853} 1854 1855/************************************************************************ 1856 * * 1857 * Commodity functions to handle xmlChars * 1858 * * 1859 ************************************************************************/ 1860 1861/** 1862 * xmlStrndup: 1863 * @cur: the input xmlChar * 1864 * @len: the len of @cur 1865 * 1866 * a strndup for array of xmlChar's 1867 * 1868 * Returns a new xmlChar * or NULL 1869 */ 1870xmlChar * 1871xmlStrndup(const xmlChar *cur, int len) { 1872 xmlChar *ret; 1873 1874 if ((cur == NULL) || (len < 0)) return(NULL); 1875 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar)); 1876 if (ret == NULL) { 1877 xmlErrMemory(NULL, NULL); 1878 return(NULL); 1879 } 1880 memcpy(ret, cur, len * sizeof(xmlChar)); 1881 ret[len] = 0; 1882 return(ret); 1883} 1884 1885/** 1886 * xmlStrdup: 1887 * @cur: the input xmlChar * 1888 * 1889 * a strdup for array of xmlChar's. Since they are supposed to be 1890 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1891 * a termination mark of '0'. 1892 * 1893 * Returns a new xmlChar * or NULL 1894 */ 1895xmlChar * 1896xmlStrdup(const xmlChar *cur) { 1897 const xmlChar *p = cur; 1898 1899 if (cur == NULL) return(NULL); 1900 while (*p != 0) p++; /* non input consuming */ 1901 return(xmlStrndup(cur, p - cur)); 1902} 1903 1904/** 1905 * xmlCharStrndup: 1906 * @cur: the input char * 1907 * @len: the len of @cur 1908 * 1909 * a strndup for char's to xmlChar's 1910 * 1911 * Returns a new xmlChar * or NULL 1912 */ 1913 1914xmlChar * 1915xmlCharStrndup(const char *cur, int len) { 1916 int i; 1917 xmlChar *ret; 1918 1919 if ((cur == NULL) || (len < 0)) return(NULL); 1920 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar)); 1921 if (ret == NULL) { 1922 xmlErrMemory(NULL, NULL); 1923 return(NULL); 1924 } 1925 for (i = 0;i < len;i++) 1926 ret[i] = (xmlChar) cur[i]; 1927 ret[len] = 0; 1928 return(ret); 1929} 1930 1931/** 1932 * xmlCharStrdup: 1933 * @cur: the input char * 1934 * 1935 * a strdup for char's to xmlChar's 1936 * 1937 * Returns a new xmlChar * or NULL 1938 */ 1939 1940xmlChar * 1941xmlCharStrdup(const char *cur) { 1942 const char *p = cur; 1943 1944 if (cur == NULL) return(NULL); 1945 while (*p != '\0') p++; /* non input consuming */ 1946 return(xmlCharStrndup(cur, p - cur)); 1947} 1948 1949/** 1950 * xmlStrcmp: 1951 * @str1: the first xmlChar * 1952 * @str2: the second xmlChar * 1953 * 1954 * a strcmp for xmlChar's 1955 * 1956 * Returns the integer result of the comparison 1957 */ 1958 1959int 1960xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1961 register int tmp; 1962 1963 if (str1 == str2) return(0); 1964 if (str1 == NULL) return(-1); 1965 if (str2 == NULL) return(1); 1966 do { 1967 tmp = *str1++ - *str2; 1968 if (tmp != 0) return(tmp); 1969 } while (*str2++ != 0); 1970 return 0; 1971} 1972 1973/** 1974 * xmlStrEqual: 1975 * @str1: the first xmlChar * 1976 * @str2: the second xmlChar * 1977 * 1978 * Check if both string are equal of have same content 1979 * Should be a bit more readable and faster than xmlStrEqual() 1980 * 1981 * Returns 1 if they are equal, 0 if they are different 1982 */ 1983 1984int 1985xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1986 if (str1 == str2) return(1); 1987 if (str1 == NULL) return(0); 1988 if (str2 == NULL) return(0); 1989 do { 1990 if (*str1++ != *str2) return(0); 1991 } while (*str2++); 1992 return(1); 1993} 1994 1995/** 1996 * xmlStrQEqual: 1997 * @pref: the prefix of the QName 1998 * @name: the localname of the QName 1999 * @str: the second xmlChar * 2000 * 2001 * Check if a QName is Equal to a given string 2002 * 2003 * Returns 1 if they are equal, 0 if they are different 2004 */ 2005 2006int 2007xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { 2008 if (pref == NULL) return(xmlStrEqual(name, str)); 2009 if (name == NULL) return(0); 2010 if (str == NULL) return(0); 2011 2012 do { 2013 if (*pref++ != *str) return(0); 2014 } while ((*str++) && (*pref)); 2015 if (*str++ != ':') return(0); 2016 do { 2017 if (*name++ != *str) return(0); 2018 } while (*str++); 2019 return(1); 2020} 2021 2022/** 2023 * xmlStrncmp: 2024 * @str1: the first xmlChar * 2025 * @str2: the second xmlChar * 2026 * @len: the max comparison length 2027 * 2028 * a strncmp for xmlChar's 2029 * 2030 * Returns the integer result of the comparison 2031 */ 2032 2033int 2034xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 2035 register int tmp; 2036 2037 if (len <= 0) return(0); 2038 if (str1 == str2) return(0); 2039 if (str1 == NULL) return(-1); 2040 if (str2 == NULL) return(1); 2041 do { 2042 tmp = *str1++ - *str2; 2043 if (tmp != 0 || --len == 0) return(tmp); 2044 } while (*str2++ != 0); 2045 return 0; 2046} 2047 2048static const xmlChar casemap[256] = { 2049 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 2050 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 2051 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 2052 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 2053 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 2054 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 2055 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 2056 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 2057 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 2058 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 2059 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 2060 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 2061 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 2062 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 2063 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 2064 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 2065 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 2066 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 2067 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 2068 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 2069 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 2070 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 2071 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 2072 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 2073 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 2074 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 2075 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 2076 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 2077 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 2078 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 2079 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 2080 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 2081}; 2082 2083/** 2084 * xmlStrcasecmp: 2085 * @str1: the first xmlChar * 2086 * @str2: the second xmlChar * 2087 * 2088 * a strcasecmp for xmlChar's 2089 * 2090 * Returns the integer result of the comparison 2091 */ 2092 2093int 2094xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 2095 register int tmp; 2096 2097 if (str1 == str2) return(0); 2098 if (str1 == NULL) return(-1); 2099 if (str2 == NULL) return(1); 2100 do { 2101 tmp = casemap[*str1++] - casemap[*str2]; 2102 if (tmp != 0) return(tmp); 2103 } while (*str2++ != 0); 2104 return 0; 2105} 2106 2107/** 2108 * xmlStrncasecmp: 2109 * @str1: the first xmlChar * 2110 * @str2: the second xmlChar * 2111 * @len: the max comparison length 2112 * 2113 * a strncasecmp for xmlChar's 2114 * 2115 * Returns the integer result of the comparison 2116 */ 2117 2118int 2119xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 2120 register int tmp; 2121 2122 if (len <= 0) return(0); 2123 if (str1 == str2) return(0); 2124 if (str1 == NULL) return(-1); 2125 if (str2 == NULL) return(1); 2126 do { 2127 tmp = casemap[*str1++] - casemap[*str2]; 2128 if (tmp != 0 || --len == 0) return(tmp); 2129 } while (*str2++ != 0); 2130 return 0; 2131} 2132 2133/** 2134 * xmlStrchr: 2135 * @str: the xmlChar * array 2136 * @val: the xmlChar to search 2137 * 2138 * a strchr for xmlChar's 2139 * 2140 * Returns the xmlChar * for the first occurrence or NULL. 2141 */ 2142 2143const xmlChar * 2144xmlStrchr(const xmlChar *str, xmlChar val) { 2145 if (str == NULL) return(NULL); 2146 while (*str != 0) { /* non input consuming */ 2147 if (*str == val) return((xmlChar *) str); 2148 str++; 2149 } 2150 return(NULL); 2151} 2152 2153/** 2154 * xmlStrstr: 2155 * @str: the xmlChar * array (haystack) 2156 * @val: the xmlChar to search (needle) 2157 * 2158 * a strstr for xmlChar's 2159 * 2160 * Returns the xmlChar * for the first occurrence or NULL. 2161 */ 2162 2163const xmlChar * 2164xmlStrstr(const xmlChar *str, const xmlChar *val) { 2165 int n; 2166 2167 if (str == NULL) return(NULL); 2168 if (val == NULL) return(NULL); 2169 n = xmlStrlen(val); 2170 2171 if (n == 0) return(str); 2172 while (*str != 0) { /* non input consuming */ 2173 if (*str == *val) { 2174 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 2175 } 2176 str++; 2177 } 2178 return(NULL); 2179} 2180 2181/** 2182 * xmlStrcasestr: 2183 * @str: the xmlChar * array (haystack) 2184 * @val: the xmlChar to search (needle) 2185 * 2186 * a case-ignoring strstr for xmlChar's 2187 * 2188 * Returns the xmlChar * for the first occurrence or NULL. 2189 */ 2190 2191const xmlChar * 2192xmlStrcasestr(const xmlChar *str, xmlChar *val) { 2193 int n; 2194 2195 if (str == NULL) return(NULL); 2196 if (val == NULL) return(NULL); 2197 n = xmlStrlen(val); 2198 2199 if (n == 0) return(str); 2200 while (*str != 0) { /* non input consuming */ 2201 if (casemap[*str] == casemap[*val]) 2202 if (!xmlStrncasecmp(str, val, n)) return(str); 2203 str++; 2204 } 2205 return(NULL); 2206} 2207 2208/** 2209 * xmlStrsub: 2210 * @str: the xmlChar * array (haystack) 2211 * @start: the index of the first char (zero based) 2212 * @len: the length of the substring 2213 * 2214 * Extract a substring of a given string 2215 * 2216 * Returns the xmlChar * for the first occurrence or NULL. 2217 */ 2218 2219xmlChar * 2220xmlStrsub(const xmlChar *str, int start, int len) { 2221 int i; 2222 2223 if (str == NULL) return(NULL); 2224 if (start < 0) return(NULL); 2225 if (len < 0) return(NULL); 2226 2227 for (i = 0;i < start;i++) { 2228 if (*str == 0) return(NULL); 2229 str++; 2230 } 2231 if (*str == 0) return(NULL); 2232 return(xmlStrndup(str, len)); 2233} 2234 2235/** 2236 * xmlStrlen: 2237 * @str: the xmlChar * array 2238 * 2239 * length of a xmlChar's string 2240 * 2241 * Returns the number of xmlChar contained in the ARRAY. 2242 */ 2243 2244int 2245xmlStrlen(const xmlChar *str) { 2246 int len = 0; 2247 2248 if (str == NULL) return(0); 2249 while (*str != 0) { /* non input consuming */ 2250 str++; 2251 len++; 2252 } 2253 return(len); 2254} 2255 2256/** 2257 * xmlStrncat: 2258 * @cur: the original xmlChar * array 2259 * @add: the xmlChar * array added 2260 * @len: the length of @add 2261 * 2262 * a strncat for array of xmlChar's, it will extend @cur with the len 2263 * first bytes of @add. 2264 * 2265 * Returns a new xmlChar *, the original @cur is reallocated if needed 2266 * and should not be freed 2267 */ 2268 2269xmlChar * 2270xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 2271 int size; 2272 xmlChar *ret; 2273 2274 if ((add == NULL) || (len == 0)) 2275 return(cur); 2276 if (cur == NULL) 2277 return(xmlStrndup(add, len)); 2278 2279 size = xmlStrlen(cur); 2280 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 2281 if (ret == NULL) { 2282 xmlErrMemory(NULL, NULL); 2283 return(cur); 2284 } 2285 memcpy(&ret[size], add, len * sizeof(xmlChar)); 2286 ret[size + len] = 0; 2287 return(ret); 2288} 2289 2290/** 2291 * xmlStrcat: 2292 * @cur: the original xmlChar * array 2293 * @add: the xmlChar * array added 2294 * 2295 * a strcat for array of xmlChar's. Since they are supposed to be 2296 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 2297 * a termination mark of '0'. 2298 * 2299 * Returns a new xmlChar * containing the concatenated string. 2300 */ 2301xmlChar * 2302xmlStrcat(xmlChar *cur, const xmlChar *add) { 2303 const xmlChar *p = add; 2304 2305 if (add == NULL) return(cur); 2306 if (cur == NULL) 2307 return(xmlStrdup(add)); 2308 2309 while (*p != 0) p++; /* non input consuming */ 2310 return(xmlStrncat(cur, add, p - add)); 2311} 2312 2313/************************************************************************ 2314 * * 2315 * Commodity functions, cleanup needed ? * 2316 * * 2317 ************************************************************************/ 2318 2319/** 2320 * areBlanks: 2321 * @ctxt: an XML parser context 2322 * @str: a xmlChar * 2323 * @len: the size of @str 2324 * 2325 * Is this a sequence of blank chars that one can ignore ? 2326 * 2327 * Returns 1 if ignorable 0 otherwise. 2328 */ 2329 2330static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 2331 int i, ret; 2332 xmlNodePtr lastChild; 2333 2334 /* 2335 * Don't spend time trying to differentiate them, the same callback is 2336 * used ! 2337 */ 2338 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2339 return(0); 2340 2341 /* 2342 * Check for xml:space value. 2343 */ 2344 if (*(ctxt->space) == 1) 2345 return(0); 2346 2347 /* 2348 * Check that the string is made of blanks 2349 */ 2350 for (i = 0;i < len;i++) 2351 if (!(IS_BLANK(str[i]))) return(0); 2352 2353 /* 2354 * Look if the element is mixed content in the DTD if available 2355 */ 2356 if (ctxt->node == NULL) return(0); 2357 if (ctxt->myDoc != NULL) { 2358 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2359 if (ret == 0) return(1); 2360 if (ret == 1) return(0); 2361 } 2362 2363 /* 2364 * Otherwise, heuristic :-\ 2365 */ 2366 if (RAW != '<') return(0); 2367 if ((ctxt->node->children == NULL) && 2368 (RAW == '<') && (NXT(1) == '/')) return(0); 2369 2370 lastChild = xmlGetLastChild(ctxt->node); 2371 if (lastChild == NULL) { 2372 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2373 (ctxt->node->content != NULL)) return(0); 2374 } else if (xmlNodeIsText(lastChild)) 2375 return(0); 2376 else if ((ctxt->node->children != NULL) && 2377 (xmlNodeIsText(ctxt->node->children))) 2378 return(0); 2379 return(1); 2380} 2381 2382/************************************************************************ 2383 * * 2384 * Extra stuff for namespace support * 2385 * Relates to http://www.w3.org/TR/WD-xml-names * 2386 * * 2387 ************************************************************************/ 2388 2389/** 2390 * xmlSplitQName: 2391 * @ctxt: an XML parser context 2392 * @name: an XML parser context 2393 * @prefix: a xmlChar ** 2394 * 2395 * parse an UTF8 encoded XML qualified name string 2396 * 2397 * [NS 5] QName ::= (Prefix ':')? LocalPart 2398 * 2399 * [NS 6] Prefix ::= NCName 2400 * 2401 * [NS 7] LocalPart ::= NCName 2402 * 2403 * Returns the local part, and prefix is updated 2404 * to get the Prefix if any. 2405 */ 2406 2407xmlChar * 2408xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2409 xmlChar buf[XML_MAX_NAMELEN + 5]; 2410 xmlChar *buffer = NULL; 2411 int len = 0; 2412 int max = XML_MAX_NAMELEN; 2413 xmlChar *ret = NULL; 2414 const xmlChar *cur = name; 2415 int c; 2416 2417 *prefix = NULL; 2418 2419 if (cur == NULL) return(NULL); 2420 2421#ifndef XML_XML_NAMESPACE 2422 /* xml: prefix is not really a namespace */ 2423 if ((cur[0] == 'x') && (cur[1] == 'm') && 2424 (cur[2] == 'l') && (cur[3] == ':')) 2425 return(xmlStrdup(name)); 2426#endif 2427 2428 /* nasty but well=formed */ 2429 if (cur[0] == ':') 2430 return(xmlStrdup(name)); 2431 2432 c = *cur++; 2433 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2434 buf[len++] = c; 2435 c = *cur++; 2436 } 2437 if (len >= max) { 2438 /* 2439 * Okay someone managed to make a huge name, so he's ready to pay 2440 * for the processing speed. 2441 */ 2442 max = len * 2; 2443 2444 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2445 if (buffer == NULL) { 2446 xmlErrMemory(ctxt, NULL); 2447 return(NULL); 2448 } 2449 memcpy(buffer, buf, len); 2450 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2451 if (len + 10 > max) { 2452 max *= 2; 2453 buffer = (xmlChar *) xmlRealloc(buffer, 2454 max * sizeof(xmlChar)); 2455 if (buffer == NULL) { 2456 xmlErrMemory(ctxt, NULL); 2457 return(NULL); 2458 } 2459 } 2460 buffer[len++] = c; 2461 c = *cur++; 2462 } 2463 buffer[len] = 0; 2464 } 2465 2466 /* nasty but well=formed 2467 if ((c == ':') && (*cur == 0)) { 2468 return(xmlStrdup(name)); 2469 } */ 2470 2471 if (buffer == NULL) 2472 ret = xmlStrndup(buf, len); 2473 else { 2474 ret = buffer; 2475 buffer = NULL; 2476 max = XML_MAX_NAMELEN; 2477 } 2478 2479 2480 if (c == ':') { 2481 c = *cur; 2482 *prefix = ret; 2483 if (c == 0) { 2484 return(xmlStrndup(BAD_CAST "", 0)); 2485 } 2486 len = 0; 2487 2488 /* 2489 * Check that the first character is proper to start 2490 * a new name 2491 */ 2492 if (!(((c >= 0x61) && (c <= 0x7A)) || 2493 ((c >= 0x41) && (c <= 0x5A)) || 2494 (c == '_') || (c == ':'))) { 2495 int l; 2496 int first = CUR_SCHAR(cur, l); 2497 2498 if (!IS_LETTER(first) && (first != '_')) { 2499 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2500 "Name %s is not XML Namespace compliant\n", 2501 name); 2502 } 2503 } 2504 cur++; 2505 2506 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2507 buf[len++] = c; 2508 c = *cur++; 2509 } 2510 if (len >= max) { 2511 /* 2512 * Okay someone managed to make a huge name, so he's ready to pay 2513 * for the processing speed. 2514 */ 2515 max = len * 2; 2516 2517 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2518 if (buffer == NULL) { 2519 xmlErrMemory(ctxt, NULL); 2520 return(NULL); 2521 } 2522 memcpy(buffer, buf, len); 2523 while (c != 0) { /* tested bigname2.xml */ 2524 if (len + 10 > max) { 2525 max *= 2; 2526 buffer = (xmlChar *) xmlRealloc(buffer, 2527 max * sizeof(xmlChar)); 2528 if (buffer == NULL) { 2529 xmlErrMemory(ctxt, NULL); 2530 return(NULL); 2531 } 2532 } 2533 buffer[len++] = c; 2534 c = *cur++; 2535 } 2536 buffer[len] = 0; 2537 } 2538 2539 if (buffer == NULL) 2540 ret = xmlStrndup(buf, len); 2541 else { 2542 ret = buffer; 2543 } 2544 } 2545 2546 return(ret); 2547} 2548 2549/************************************************************************ 2550 * * 2551 * The parser itself * 2552 * Relates to http://www.w3.org/TR/REC-xml * 2553 * * 2554 ************************************************************************/ 2555 2556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2558 int *len, int *alloc, int normalize); 2559 2560/** 2561 * xmlParseName: 2562 * @ctxt: an XML parser context 2563 * 2564 * parse an XML name. 2565 * 2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2567 * CombiningChar | Extender 2568 * 2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2570 * 2571 * [6] Names ::= Name (S Name)* 2572 * 2573 * Returns the Name parsed or NULL 2574 */ 2575 2576const xmlChar * 2577xmlParseName(xmlParserCtxtPtr ctxt) { 2578 const xmlChar *in; 2579 const xmlChar *ret; 2580 int count = 0; 2581 2582 GROW; 2583 2584 /* 2585 * Accelerator for simple ASCII names 2586 */ 2587 in = ctxt->input->cur; 2588 if (((*in >= 0x61) && (*in <= 0x7A)) || 2589 ((*in >= 0x41) && (*in <= 0x5A)) || 2590 (*in == '_') || (*in == ':')) { 2591 in++; 2592 while (((*in >= 0x61) && (*in <= 0x7A)) || 2593 ((*in >= 0x41) && (*in <= 0x5A)) || 2594 ((*in >= 0x30) && (*in <= 0x39)) || 2595 (*in == '_') || (*in == '-') || 2596 (*in == ':') || (*in == '.')) 2597 in++; 2598 if ((*in > 0) && (*in < 0x80)) { 2599 count = in - ctxt->input->cur; 2600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2601 ctxt->input->cur = in; 2602 ctxt->nbChars += count; 2603 ctxt->input->col += count; 2604 if (ret == NULL) 2605 xmlErrMemory(ctxt, NULL); 2606 return(ret); 2607 } 2608 } 2609 return(xmlParseNameComplex(ctxt)); 2610} 2611 2612/** 2613 * xmlParseNameAndCompare: 2614 * @ctxt: an XML parser context 2615 * 2616 * parse an XML name and compares for match 2617 * (specialized for endtag parsing) 2618 * 2619 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2620 * and the name for mismatch 2621 */ 2622 2623static const xmlChar * 2624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2625 const xmlChar *cmp = other; 2626 const xmlChar *in; 2627 const xmlChar *ret; 2628 2629 GROW; 2630 2631 in = ctxt->input->cur; 2632 while (*in != 0 && *in == *cmp) { 2633 ++in; 2634 ++cmp; 2635 } 2636 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) { 2637 /* success */ 2638 ctxt->input->cur = in; 2639 return (const xmlChar*) 1; 2640 } 2641 /* failure (or end of input buffer), check with full function */ 2642 ret = xmlParseName (ctxt); 2643 /* strings coming from the dictionnary direct compare possible */ 2644 if (ret == other) { 2645 return (const xmlChar*) 1; 2646 } 2647 return ret; 2648} 2649 2650static const xmlChar * 2651xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2652 int len = 0, l; 2653 int c; 2654 int count = 0; 2655 2656 /* 2657 * Handler for more complex cases 2658 */ 2659 GROW; 2660 c = CUR_CHAR(l); 2661 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2662 (!IS_LETTER(c) && (c != '_') && 2663 (c != ':'))) { 2664 return(NULL); 2665 } 2666 2667 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2668 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2669 (c == '.') || (c == '-') || 2670 (c == '_') || (c == ':') || 2671 (IS_COMBINING(c)) || 2672 (IS_EXTENDER(c)))) { 2673 if (count++ > 100) { 2674 count = 0; 2675 GROW; 2676 } 2677 len += l; 2678 NEXTL(l); 2679 c = CUR_CHAR(l); 2680 } 2681 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2682} 2683 2684/** 2685 * xmlParseStringName: 2686 * @ctxt: an XML parser context 2687 * @str: a pointer to the string pointer (IN/OUT) 2688 * 2689 * parse an XML name. 2690 * 2691 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2692 * CombiningChar | Extender 2693 * 2694 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2695 * 2696 * [6] Names ::= Name (S Name)* 2697 * 2698 * Returns the Name parsed or NULL. The @str pointer 2699 * is updated to the current location in the string. 2700 */ 2701 2702static xmlChar * 2703xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2704 xmlChar buf[XML_MAX_NAMELEN + 5]; 2705 const xmlChar *cur = *str; 2706 int len = 0, l; 2707 int c; 2708 2709 c = CUR_SCHAR(cur, l); 2710 if (!IS_LETTER(c) && (c != '_') && 2711 (c != ':')) { 2712 return(NULL); 2713 } 2714 2715 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2716 (c == '.') || (c == '-') || 2717 (c == '_') || (c == ':') || 2718 (IS_COMBINING(c)) || 2719 (IS_EXTENDER(c))) { 2720 COPY_BUF(l,buf,len,c); 2721 cur += l; 2722 c = CUR_SCHAR(cur, l); 2723 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2724 /* 2725 * Okay someone managed to make a huge name, so he's ready to pay 2726 * for the processing speed. 2727 */ 2728 xmlChar *buffer; 2729 int max = len * 2; 2730 2731 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2732 if (buffer == NULL) { 2733 xmlErrMemory(ctxt, NULL); 2734 return(NULL); 2735 } 2736 memcpy(buffer, buf, len); 2737 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2738 (c == '.') || (c == '-') || 2739 (c == '_') || (c == ':') || 2740 (IS_COMBINING(c)) || 2741 (IS_EXTENDER(c))) { 2742 if (len + 10 > max) { 2743 max *= 2; 2744 buffer = (xmlChar *) xmlRealloc(buffer, 2745 max * sizeof(xmlChar)); 2746 if (buffer == NULL) { 2747 xmlErrMemory(ctxt, NULL); 2748 return(NULL); 2749 } 2750 } 2751 COPY_BUF(l,buffer,len,c); 2752 cur += l; 2753 c = CUR_SCHAR(cur, l); 2754 } 2755 buffer[len] = 0; 2756 *str = cur; 2757 return(buffer); 2758 } 2759 } 2760 *str = cur; 2761 return(xmlStrndup(buf, len)); 2762} 2763 2764/** 2765 * xmlParseNmtoken: 2766 * @ctxt: an XML parser context 2767 * 2768 * parse an XML Nmtoken. 2769 * 2770 * [7] Nmtoken ::= (NameChar)+ 2771 * 2772 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 2773 * 2774 * Returns the Nmtoken parsed or NULL 2775 */ 2776 2777xmlChar * 2778xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2779 xmlChar buf[XML_MAX_NAMELEN + 5]; 2780 int len = 0, l; 2781 int c; 2782 int count = 0; 2783 2784 GROW; 2785 c = CUR_CHAR(l); 2786 2787 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2788 (c == '.') || (c == '-') || 2789 (c == '_') || (c == ':') || 2790 (IS_COMBINING(c)) || 2791 (IS_EXTENDER(c))) { 2792 if (count++ > 100) { 2793 count = 0; 2794 GROW; 2795 } 2796 COPY_BUF(l,buf,len,c); 2797 NEXTL(l); 2798 c = CUR_CHAR(l); 2799 if (len >= XML_MAX_NAMELEN) { 2800 /* 2801 * Okay someone managed to make a huge token, so he's ready to pay 2802 * for the processing speed. 2803 */ 2804 xmlChar *buffer; 2805 int max = len * 2; 2806 2807 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2808 if (buffer == NULL) { 2809 xmlErrMemory(ctxt, NULL); 2810 return(NULL); 2811 } 2812 memcpy(buffer, buf, len); 2813 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2814 (c == '.') || (c == '-') || 2815 (c == '_') || (c == ':') || 2816 (IS_COMBINING(c)) || 2817 (IS_EXTENDER(c))) { 2818 if (count++ > 100) { 2819 count = 0; 2820 GROW; 2821 } 2822 if (len + 10 > max) { 2823 max *= 2; 2824 buffer = (xmlChar *) xmlRealloc(buffer, 2825 max * sizeof(xmlChar)); 2826 if (buffer == NULL) { 2827 xmlErrMemory(ctxt, NULL); 2828 return(NULL); 2829 } 2830 } 2831 COPY_BUF(l,buffer,len,c); 2832 NEXTL(l); 2833 c = CUR_CHAR(l); 2834 } 2835 buffer[len] = 0; 2836 return(buffer); 2837 } 2838 } 2839 if (len == 0) 2840 return(NULL); 2841 return(xmlStrndup(buf, len)); 2842} 2843 2844/** 2845 * xmlParseEntityValue: 2846 * @ctxt: an XML parser context 2847 * @orig: if non-NULL store a copy of the original entity value 2848 * 2849 * parse a value for ENTITY declarations 2850 * 2851 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2852 * "'" ([^%&'] | PEReference | Reference)* "'" 2853 * 2854 * Returns the EntityValue parsed with reference substituted or NULL 2855 */ 2856 2857xmlChar * 2858xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2859 xmlChar *buf = NULL; 2860 int len = 0; 2861 int size = XML_PARSER_BUFFER_SIZE; 2862 int c, l; 2863 xmlChar stop; 2864 xmlChar *ret = NULL; 2865 const xmlChar *cur = NULL; 2866 xmlParserInputPtr input; 2867 2868 if (RAW == '"') stop = '"'; 2869 else if (RAW == '\'') stop = '\''; 2870 else { 2871 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 2872 return(NULL); 2873 } 2874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 2875 if (buf == NULL) { 2876 xmlErrMemory(ctxt, NULL); 2877 return(NULL); 2878 } 2879 2880 /* 2881 * The content of the entity definition is copied in a buffer. 2882 */ 2883 2884 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2885 input = ctxt->input; 2886 GROW; 2887 NEXT; 2888 c = CUR_CHAR(l); 2889 /* 2890 * NOTE: 4.4.5 Included in Literal 2891 * When a parameter entity reference appears in a literal entity 2892 * value, ... a single or double quote character in the replacement 2893 * text is always treated as a normal data character and will not 2894 * terminate the literal. 2895 * In practice it means we stop the loop only when back at parsing 2896 * the initial entity and the quote is found 2897 */ 2898 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2899 (ctxt->input != input))) { 2900 if (len + 5 >= size) { 2901 size *= 2; 2902 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2903 if (buf == NULL) { 2904 xmlErrMemory(ctxt, NULL); 2905 return(NULL); 2906 } 2907 } 2908 COPY_BUF(l,buf,len,c); 2909 NEXTL(l); 2910 /* 2911 * Pop-up of finished entities. 2912 */ 2913 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2914 xmlPopInput(ctxt); 2915 2916 GROW; 2917 c = CUR_CHAR(l); 2918 if (c == 0) { 2919 GROW; 2920 c = CUR_CHAR(l); 2921 } 2922 } 2923 buf[len] = 0; 2924 2925 /* 2926 * Raise problem w.r.t. '&' and '%' being used in non-entities 2927 * reference constructs. Note Charref will be handled in 2928 * xmlStringDecodeEntities() 2929 */ 2930 cur = buf; 2931 while (*cur != 0) { /* non input consuming */ 2932 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2933 xmlChar *name; 2934 xmlChar tmp = *cur; 2935 2936 cur++; 2937 name = xmlParseStringName(ctxt, &cur); 2938 if ((name == NULL) || (*cur != ';')) { 2939 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 2940 "EntityValue: '%c' forbidden except for entities references\n", 2941 tmp); 2942 } 2943 if ((tmp == '%') && (ctxt->inSubset == 1) && 2944 (ctxt->inputNr == 1)) { 2945 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 2946 } 2947 if (name != NULL) 2948 xmlFree(name); 2949 } 2950 cur++; 2951 } 2952 2953 /* 2954 * Then PEReference entities are substituted. 2955 */ 2956 if (c != stop) { 2957 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 2958 xmlFree(buf); 2959 } else { 2960 NEXT; 2961 /* 2962 * NOTE: 4.4.7 Bypassed 2963 * When a general entity reference appears in the EntityValue in 2964 * an entity declaration, it is bypassed and left as is. 2965 * so XML_SUBSTITUTE_REF is not set here. 2966 */ 2967 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2968 0, 0, 0); 2969 if (orig != NULL) 2970 *orig = buf; 2971 else 2972 xmlFree(buf); 2973 } 2974 2975 return(ret); 2976} 2977 2978/** 2979 * xmlParseAttValueComplex: 2980 * @ctxt: an XML parser context 2981 * @len: the resulting attribute len 2982 * @normalize: wether to apply the inner normalization 2983 * 2984 * parse a value for an attribute, this is the fallback function 2985 * of xmlParseAttValue() when the attribute parsing requires handling 2986 * of non-ASCII characters, or normalization compaction. 2987 * 2988 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2989 */ 2990static xmlChar * 2991xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 2992 xmlChar limit = 0; 2993 xmlChar *buf = NULL; 2994 int len = 0; 2995 int buf_size = 0; 2996 int c, l, in_space = 0; 2997 xmlChar *current = NULL; 2998 xmlEntityPtr ent; 2999 3000 if (NXT(0) == '"') { 3001 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3002 limit = '"'; 3003 NEXT; 3004 } else if (NXT(0) == '\'') { 3005 limit = '\''; 3006 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3007 NEXT; 3008 } else { 3009 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3010 return(NULL); 3011 } 3012 3013 /* 3014 * allocate a translation buffer. 3015 */ 3016 buf_size = XML_PARSER_BUFFER_SIZE; 3017 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3018 if (buf == NULL) goto mem_error; 3019 3020 /* 3021 * OK loop until we reach one of the ending char or a size limit. 3022 */ 3023 c = CUR_CHAR(l); 3024 while ((NXT(0) != limit) && /* checked */ 3025 (c != '<')) { 3026 if (c == 0) break; 3027 if (c == '&') { 3028 in_space = 0; 3029 if (NXT(1) == '#') { 3030 int val = xmlParseCharRef(ctxt); 3031 3032 if (val == '&') { 3033 if (ctxt->replaceEntities) { 3034 if (len > buf_size - 10) { 3035 growBuffer(buf); 3036 } 3037 buf[len++] = '&'; 3038 } else { 3039 /* 3040 * The reparsing will be done in xmlStringGetNodeList() 3041 * called by the attribute() function in SAX.c 3042 */ 3043 if (len > buf_size - 10) { 3044 growBuffer(buf); 3045 } 3046 buf[len++] = '&'; 3047 buf[len++] = '#'; 3048 buf[len++] = '3'; 3049 buf[len++] = '8'; 3050 buf[len++] = ';'; 3051 } 3052 } else { 3053 if (len > buf_size - 10) { 3054 growBuffer(buf); 3055 } 3056 len += xmlCopyChar(0, &buf[len], val); 3057 } 3058 } else { 3059 ent = xmlParseEntityRef(ctxt); 3060 if ((ent != NULL) && 3061 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3062 if (len > buf_size - 10) { 3063 growBuffer(buf); 3064 } 3065 if ((ctxt->replaceEntities == 0) && 3066 (ent->content[0] == '&')) { 3067 buf[len++] = '&'; 3068 buf[len++] = '#'; 3069 buf[len++] = '3'; 3070 buf[len++] = '8'; 3071 buf[len++] = ';'; 3072 } else { 3073 buf[len++] = ent->content[0]; 3074 } 3075 } else if ((ent != NULL) && 3076 (ctxt->replaceEntities != 0)) { 3077 xmlChar *rep; 3078 3079 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3080 rep = xmlStringDecodeEntities(ctxt, ent->content, 3081 XML_SUBSTITUTE_REF, 3082 0, 0, 0); 3083 if (rep != NULL) { 3084 current = rep; 3085 while (*current != 0) { /* non input consuming */ 3086 buf[len++] = *current++; 3087 if (len > buf_size - 10) { 3088 growBuffer(buf); 3089 } 3090 } 3091 xmlFree(rep); 3092 } 3093 } else { 3094 if (len > buf_size - 10) { 3095 growBuffer(buf); 3096 } 3097 if (ent->content != NULL) 3098 buf[len++] = ent->content[0]; 3099 } 3100 } else if (ent != NULL) { 3101 int i = xmlStrlen(ent->name); 3102 const xmlChar *cur = ent->name; 3103 3104 /* 3105 * This may look absurd but is needed to detect 3106 * entities problems 3107 */ 3108 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3109 (ent->content != NULL)) { 3110 xmlChar *rep; 3111 rep = xmlStringDecodeEntities(ctxt, ent->content, 3112 XML_SUBSTITUTE_REF, 0, 0, 0); 3113 if (rep != NULL) 3114 xmlFree(rep); 3115 } 3116 3117 /* 3118 * Just output the reference 3119 */ 3120 buf[len++] = '&'; 3121 if (len > buf_size - i - 10) { 3122 growBuffer(buf); 3123 } 3124 for (;i > 0;i--) 3125 buf[len++] = *cur++; 3126 buf[len++] = ';'; 3127 } 3128 } 3129 } else { 3130 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3131 if ((len != 0) || (!normalize)) { 3132 if ((!normalize) || (!in_space)) { 3133 COPY_BUF(l,buf,len,0x20); 3134 if (len > buf_size - 10) { 3135 growBuffer(buf); 3136 } 3137 } 3138 in_space = 1; 3139 } 3140 } else { 3141 in_space = 0; 3142 COPY_BUF(l,buf,len,c); 3143 if (len > buf_size - 10) { 3144 growBuffer(buf); 3145 } 3146 } 3147 NEXTL(l); 3148 } 3149 GROW; 3150 c = CUR_CHAR(l); 3151 } 3152 if ((in_space) && (normalize)) { 3153 while (buf[len - 1] == 0x20) len--; 3154 } 3155 buf[len] = 0; 3156 if (RAW == '<') { 3157 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3158 } else if (RAW != limit) { 3159 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3160 "AttValue: ' expected\n"); 3161 } else 3162 NEXT; 3163 if (attlen != NULL) *attlen = len; 3164 return(buf); 3165 3166mem_error: 3167 xmlErrMemory(ctxt, NULL); 3168 return(NULL); 3169} 3170 3171/** 3172 * xmlParseAttValue: 3173 * @ctxt: an XML parser context 3174 * 3175 * parse a value for an attribute 3176 * Note: the parser won't do substitution of entities here, this 3177 * will be handled later in xmlStringGetNodeList 3178 * 3179 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3180 * "'" ([^<&'] | Reference)* "'" 3181 * 3182 * 3.3.3 Attribute-Value Normalization: 3183 * Before the value of an attribute is passed to the application or 3184 * checked for validity, the XML processor must normalize it as follows: 3185 * - a character reference is processed by appending the referenced 3186 * character to the attribute value 3187 * - an entity reference is processed by recursively processing the 3188 * replacement text of the entity 3189 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3190 * appending #x20 to the normalized value, except that only a single 3191 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3192 * parsed entity or the literal entity value of an internal parsed entity 3193 * - other characters are processed by appending them to the normalized value 3194 * If the declared value is not CDATA, then the XML processor must further 3195 * process the normalized attribute value by discarding any leading and 3196 * trailing space (#x20) characters, and by replacing sequences of space 3197 * (#x20) characters by a single space (#x20) character. 3198 * All attributes for which no declaration has been read should be treated 3199 * by a non-validating parser as if declared CDATA. 3200 * 3201 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3202 */ 3203 3204 3205xmlChar * 3206xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3207 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3208} 3209 3210/** 3211 * xmlParseSystemLiteral: 3212 * @ctxt: an XML parser context 3213 * 3214 * parse an XML Literal 3215 * 3216 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3217 * 3218 * Returns the SystemLiteral parsed or NULL 3219 */ 3220 3221xmlChar * 3222xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3223 xmlChar *buf = NULL; 3224 int len = 0; 3225 int size = XML_PARSER_BUFFER_SIZE; 3226 int cur, l; 3227 xmlChar stop; 3228 int state = ctxt->instate; 3229 int count = 0; 3230 3231 SHRINK; 3232 if (RAW == '"') { 3233 NEXT; 3234 stop = '"'; 3235 } else if (RAW == '\'') { 3236 NEXT; 3237 stop = '\''; 3238 } else { 3239 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3240 return(NULL); 3241 } 3242 3243 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3244 if (buf == NULL) { 3245 xmlErrMemory(ctxt, NULL); 3246 return(NULL); 3247 } 3248 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3249 cur = CUR_CHAR(l); 3250 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3251 if (len + 5 >= size) { 3252 size *= 2; 3253 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3254 if (buf == NULL) { 3255 xmlErrMemory(ctxt, NULL); 3256 ctxt->instate = (xmlParserInputState) state; 3257 return(NULL); 3258 } 3259 } 3260 count++; 3261 if (count > 50) { 3262 GROW; 3263 count = 0; 3264 } 3265 COPY_BUF(l,buf,len,cur); 3266 NEXTL(l); 3267 cur = CUR_CHAR(l); 3268 if (cur == 0) { 3269 GROW; 3270 SHRINK; 3271 cur = CUR_CHAR(l); 3272 } 3273 } 3274 buf[len] = 0; 3275 ctxt->instate = (xmlParserInputState) state; 3276 if (!IS_CHAR(cur)) { 3277 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3278 } else { 3279 NEXT; 3280 } 3281 return(buf); 3282} 3283 3284/** 3285 * xmlParsePubidLiteral: 3286 * @ctxt: an XML parser context 3287 * 3288 * parse an XML public literal 3289 * 3290 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3291 * 3292 * Returns the PubidLiteral parsed or NULL. 3293 */ 3294 3295xmlChar * 3296xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3297 xmlChar *buf = NULL; 3298 int len = 0; 3299 int size = XML_PARSER_BUFFER_SIZE; 3300 xmlChar cur; 3301 xmlChar stop; 3302 int count = 0; 3303 xmlParserInputState oldstate = ctxt->instate; 3304 3305 SHRINK; 3306 if (RAW == '"') { 3307 NEXT; 3308 stop = '"'; 3309 } else if (RAW == '\'') { 3310 NEXT; 3311 stop = '\''; 3312 } else { 3313 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3314 return(NULL); 3315 } 3316 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3317 if (buf == NULL) { 3318 xmlErrMemory(ctxt, NULL); 3319 return(NULL); 3320 } 3321 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3322 cur = CUR; 3323 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 3324 if (len + 1 >= size) { 3325 size *= 2; 3326 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3327 if (buf == NULL) { 3328 xmlErrMemory(ctxt, NULL); 3329 return(NULL); 3330 } 3331 } 3332 buf[len++] = cur; 3333 count++; 3334 if (count > 50) { 3335 GROW; 3336 count = 0; 3337 } 3338 NEXT; 3339 cur = CUR; 3340 if (cur == 0) { 3341 GROW; 3342 SHRINK; 3343 cur = CUR; 3344 } 3345 } 3346 buf[len] = 0; 3347 if (cur != stop) { 3348 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3349 } else { 3350 NEXT; 3351 } 3352 ctxt->instate = oldstate; 3353 return(buf); 3354} 3355 3356void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3357/** 3358 * xmlParseCharData: 3359 * @ctxt: an XML parser context 3360 * @cdata: int indicating whether we are within a CDATA section 3361 * 3362 * parse a CharData section. 3363 * if we are within a CDATA section ']]>' marks an end of section. 3364 * 3365 * The right angle bracket (>) may be represented using the string ">", 3366 * and must, for compatibility, be escaped using ">" or a character 3367 * reference when it appears in the string "]]>" in content, when that 3368 * string is not marking the end of a CDATA section. 3369 * 3370 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3371 */ 3372 3373void 3374xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3375 const xmlChar *in; 3376 int nbchar = 0; 3377 int line = ctxt->input->line; 3378 int col = ctxt->input->col; 3379 3380 SHRINK; 3381 GROW; 3382 /* 3383 * Accelerated common case where input don't need to be 3384 * modified before passing it to the handler. 3385 */ 3386 if (!cdata) { 3387 in = ctxt->input->cur; 3388 do { 3389get_more: 3390 while (((*in >= 0x20) && (*in != '<') && (*in != ']') && 3391 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 3392 in++; 3393 if (*in == 0xA) { 3394 ctxt->input->line++; 3395 in++; 3396 while (*in == 0xA) { 3397 ctxt->input->line++; 3398 in++; 3399 } 3400 goto get_more; 3401 } 3402 if (*in == ']') { 3403 if ((in[1] == ']') && (in[2] == '>')) { 3404 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3405 ctxt->input->cur = in; 3406 return; 3407 } 3408 in++; 3409 goto get_more; 3410 } 3411 nbchar = in - ctxt->input->cur; 3412 if (nbchar > 0) { 3413 if ((ctxt->sax->ignorableWhitespace != 3414 ctxt->sax->characters) && 3415 (IS_BLANK(*ctxt->input->cur))) { 3416 const xmlChar *tmp = ctxt->input->cur; 3417 ctxt->input->cur = in; 3418 3419 if (areBlanks(ctxt, tmp, nbchar)) { 3420 ctxt->sax->ignorableWhitespace(ctxt->userData, 3421 tmp, nbchar); 3422 } else if (ctxt->sax->characters != NULL) 3423 ctxt->sax->characters(ctxt->userData, 3424 tmp, nbchar); 3425 line = ctxt->input->line; 3426 col = ctxt->input->col; 3427 } else { 3428 if (ctxt->sax->characters != NULL) 3429 ctxt->sax->characters(ctxt->userData, 3430 ctxt->input->cur, nbchar); 3431 line = ctxt->input->line; 3432 col = ctxt->input->col; 3433 } 3434 } 3435 ctxt->input->cur = in; 3436 if (*in == 0xD) { 3437 in++; 3438 if (*in == 0xA) { 3439 ctxt->input->cur = in; 3440 in++; 3441 ctxt->input->line++; 3442 continue; /* while */ 3443 } 3444 in--; 3445 } 3446 if (*in == '<') { 3447 return; 3448 } 3449 if (*in == '&') { 3450 return; 3451 } 3452 SHRINK; 3453 GROW; 3454 in = ctxt->input->cur; 3455 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3456 nbchar = 0; 3457 } 3458 ctxt->input->line = line; 3459 ctxt->input->col = col; 3460 xmlParseCharDataComplex(ctxt, cdata); 3461} 3462 3463/** 3464 * xmlParseCharDataComplex: 3465 * @ctxt: an XML parser context 3466 * @cdata: int indicating whether we are within a CDATA section 3467 * 3468 * parse a CharData section.this is the fallback function 3469 * of xmlParseCharData() when the parsing requires handling 3470 * of non-ASCII characters. 3471 */ 3472void 3473xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3474 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3475 int nbchar = 0; 3476 int cur, l; 3477 int count = 0; 3478 3479 SHRINK; 3480 GROW; 3481 cur = CUR_CHAR(l); 3482 while ((cur != '<') && /* checked */ 3483 (cur != '&') && 3484 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3485 if ((cur == ']') && (NXT(1) == ']') && 3486 (NXT(2) == '>')) { 3487 if (cdata) break; 3488 else { 3489 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3490 } 3491 } 3492 COPY_BUF(l,buf,nbchar,cur); 3493 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3494 /* 3495 * OK the segment is to be consumed as chars. 3496 */ 3497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3498 if (areBlanks(ctxt, buf, nbchar)) { 3499 if (ctxt->sax->ignorableWhitespace != NULL) 3500 ctxt->sax->ignorableWhitespace(ctxt->userData, 3501 buf, nbchar); 3502 } else { 3503 if (ctxt->sax->characters != NULL) 3504 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3505 } 3506 } 3507 nbchar = 0; 3508 } 3509 count++; 3510 if (count > 50) { 3511 GROW; 3512 count = 0; 3513 } 3514 NEXTL(l); 3515 cur = CUR_CHAR(l); 3516 } 3517 if (nbchar != 0) { 3518 /* 3519 * OK the segment is to be consumed as chars. 3520 */ 3521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3522 if (areBlanks(ctxt, buf, nbchar)) { 3523 if (ctxt->sax->ignorableWhitespace != NULL) 3524 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3525 } else { 3526 if (ctxt->sax->characters != NULL) 3527 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3528 } 3529 } 3530 } 3531} 3532 3533/** 3534 * xmlParseExternalID: 3535 * @ctxt: an XML parser context 3536 * @publicID: a xmlChar** receiving PubidLiteral 3537 * @strict: indicate whether we should restrict parsing to only 3538 * production [75], see NOTE below 3539 * 3540 * Parse an External ID or a Public ID 3541 * 3542 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3543 * 'PUBLIC' S PubidLiteral S SystemLiteral 3544 * 3545 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3546 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3547 * 3548 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3549 * 3550 * Returns the function returns SystemLiteral and in the second 3551 * case publicID receives PubidLiteral, is strict is off 3552 * it is possible to return NULL and have publicID set. 3553 */ 3554 3555xmlChar * 3556xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3557 xmlChar *URI = NULL; 3558 3559 SHRINK; 3560 3561 *publicID = NULL; 3562 if ((RAW == 'S') && (NXT(1) == 'Y') && 3563 (NXT(2) == 'S') && (NXT(3) == 'T') && 3564 (NXT(4) == 'E') && (NXT(5) == 'M')) { 3565 SKIP(6); 3566 if (!IS_BLANK(CUR)) { 3567 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3568 "Space required after 'SYSTEM'\n"); 3569 } 3570 SKIP_BLANKS; 3571 URI = xmlParseSystemLiteral(ctxt); 3572 if (URI == NULL) { 3573 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3574 } 3575 } else if ((RAW == 'P') && (NXT(1) == 'U') && 3576 (NXT(2) == 'B') && (NXT(3) == 'L') && 3577 (NXT(4) == 'I') && (NXT(5) == 'C')) { 3578 SKIP(6); 3579 if (!IS_BLANK(CUR)) { 3580 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3581 "Space required after 'PUBLIC'\n"); 3582 } 3583 SKIP_BLANKS; 3584 *publicID = xmlParsePubidLiteral(ctxt); 3585 if (*publicID == NULL) { 3586 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3587 } 3588 if (strict) { 3589 /* 3590 * We don't handle [83] so "S SystemLiteral" is required. 3591 */ 3592 if (!IS_BLANK(CUR)) { 3593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3594 "Space required after the Public Identifier\n"); 3595 } 3596 } else { 3597 /* 3598 * We handle [83] so we return immediately, if 3599 * "S SystemLiteral" is not detected. From a purely parsing 3600 * point of view that's a nice mess. 3601 */ 3602 const xmlChar *ptr; 3603 GROW; 3604 3605 ptr = CUR_PTR; 3606 if (!IS_BLANK(*ptr)) return(NULL); 3607 3608 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3609 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3610 } 3611 SKIP_BLANKS; 3612 URI = xmlParseSystemLiteral(ctxt); 3613 if (URI == NULL) { 3614 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3615 } 3616 } 3617 return(URI); 3618} 3619 3620/** 3621 * xmlParseComment: 3622 * @ctxt: an XML parser context 3623 * 3624 * Skip an XML (SGML) comment <!-- .... --> 3625 * The spec says that "For compatibility, the string "--" (double-hyphen) 3626 * must not occur within comments. " 3627 * 3628 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3629 */ 3630void 3631xmlParseComment(xmlParserCtxtPtr ctxt) { 3632 xmlChar *buf = NULL; 3633 int len; 3634 int size = XML_PARSER_BUFFER_SIZE; 3635 int q, ql; 3636 int r, rl; 3637 int cur, l; 3638 xmlParserInputState state; 3639 xmlParserInputPtr input = ctxt->input; 3640 int count = 0; 3641 3642 /* 3643 * Check that there is a comment right here. 3644 */ 3645 if ((RAW != '<') || (NXT(1) != '!') || 3646 (NXT(2) != '-') || (NXT(3) != '-')) return; 3647 3648 state = ctxt->instate; 3649 ctxt->instate = XML_PARSER_COMMENT; 3650 SHRINK; 3651 SKIP(4); 3652 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3653 if (buf == NULL) { 3654 xmlErrMemory(ctxt, NULL); 3655 ctxt->instate = state; 3656 return; 3657 } 3658 q = CUR_CHAR(ql); 3659 NEXTL(ql); 3660 r = CUR_CHAR(rl); 3661 NEXTL(rl); 3662 cur = CUR_CHAR(l); 3663 len = 0; 3664 while (IS_CHAR(cur) && /* checked */ 3665 ((cur != '>') || 3666 (r != '-') || (q != '-'))) { 3667 if ((r == '-') && (q == '-')) { 3668 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 3669 } 3670 if (len + 5 >= size) { 3671 size *= 2; 3672 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3673 if (buf == NULL) { 3674 xmlErrMemory(ctxt, NULL); 3675 ctxt->instate = state; 3676 return; 3677 } 3678 } 3679 COPY_BUF(ql,buf,len,q); 3680 q = r; 3681 ql = rl; 3682 r = cur; 3683 rl = l; 3684 3685 count++; 3686 if (count > 50) { 3687 GROW; 3688 count = 0; 3689 } 3690 NEXTL(l); 3691 cur = CUR_CHAR(l); 3692 if (cur == 0) { 3693 SHRINK; 3694 GROW; 3695 cur = CUR_CHAR(l); 3696 } 3697 } 3698 buf[len] = 0; 3699 if (!IS_CHAR(cur)) { 3700 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3701 "Comment not terminated \n<!--%.50s\n", buf); 3702 xmlFree(buf); 3703 } else { 3704 if (input != ctxt->input) { 3705 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3706 "Comment doesn't start and stop in the same entity\n"); 3707 } 3708 NEXT; 3709 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3710 (!ctxt->disableSAX)) 3711 ctxt->sax->comment(ctxt->userData, buf); 3712 xmlFree(buf); 3713 } 3714 ctxt->instate = state; 3715} 3716 3717/** 3718 * xmlParsePITarget: 3719 * @ctxt: an XML parser context 3720 * 3721 * parse the name of a PI 3722 * 3723 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3724 * 3725 * Returns the PITarget name or NULL 3726 */ 3727 3728const xmlChar * 3729xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3730 const xmlChar *name; 3731 3732 name = xmlParseName(ctxt); 3733 if ((name != NULL) && 3734 ((name[0] == 'x') || (name[0] == 'X')) && 3735 ((name[1] == 'm') || (name[1] == 'M')) && 3736 ((name[2] == 'l') || (name[2] == 'L'))) { 3737 int i; 3738 if ((name[0] == 'x') && (name[1] == 'm') && 3739 (name[2] == 'l') && (name[3] == 0)) { 3740 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3741 "XML declaration allowed only at the start of the document\n"); 3742 return(name); 3743 } else if (name[3] == 0) { 3744 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 3745 return(name); 3746 } 3747 for (i = 0;;i++) { 3748 if (xmlW3CPIs[i] == NULL) break; 3749 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 3750 return(name); 3751 } 3752 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 3753 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3754 ctxt->sax->warning(ctxt->userData, 3755 "xmlParsePITarget: invalid name prefix 'xml'\n"); 3756 } 3757 } 3758 return(name); 3759} 3760 3761#ifdef LIBXML_CATALOG_ENABLED 3762/** 3763 * xmlParseCatalogPI: 3764 * @ctxt: an XML parser context 3765 * @catalog: the PI value string 3766 * 3767 * parse an XML Catalog Processing Instruction. 3768 * 3769 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3770 * 3771 * Occurs only if allowed by the user and if happening in the Misc 3772 * part of the document before any doctype informations 3773 * This will add the given catalog to the parsing context in order 3774 * to be used if there is a resolution need further down in the document 3775 */ 3776 3777static void 3778xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3779 xmlChar *URL = NULL; 3780 const xmlChar *tmp, *base; 3781 xmlChar marker; 3782 3783 tmp = catalog; 3784 while (IS_BLANK(*tmp)) tmp++; 3785 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3786 goto error; 3787 tmp += 7; 3788 while (IS_BLANK(*tmp)) tmp++; 3789 if (*tmp != '=') { 3790 return; 3791 } 3792 tmp++; 3793 while (IS_BLANK(*tmp)) tmp++; 3794 marker = *tmp; 3795 if ((marker != '\'') && (marker != '"')) 3796 goto error; 3797 tmp++; 3798 base = tmp; 3799 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3800 if (*tmp == 0) 3801 goto error; 3802 URL = xmlStrndup(base, tmp - base); 3803 tmp++; 3804 while (IS_BLANK(*tmp)) tmp++; 3805 if (*tmp != 0) 3806 goto error; 3807 3808 if (URL != NULL) { 3809 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3810 xmlFree(URL); 3811 } 3812 return; 3813 3814error: 3815 ctxt->errNo = XML_WAR_CATALOG_PI; 3816 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 3817 ctxt->sax->warning(ctxt->userData, 3818 "Catalog PI syntax error: %s\n", catalog); 3819 if (URL != NULL) 3820 xmlFree(URL); 3821} 3822#endif 3823 3824/** 3825 * xmlParsePI: 3826 * @ctxt: an XML parser context 3827 * 3828 * parse an XML Processing Instruction. 3829 * 3830 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3831 * 3832 * The processing is transfered to SAX once parsed. 3833 */ 3834 3835void 3836xmlParsePI(xmlParserCtxtPtr ctxt) { 3837 xmlChar *buf = NULL; 3838 int len = 0; 3839 int size = XML_PARSER_BUFFER_SIZE; 3840 int cur, l; 3841 const xmlChar *target; 3842 xmlParserInputState state; 3843 int count = 0; 3844 3845 if ((RAW == '<') && (NXT(1) == '?')) { 3846 xmlParserInputPtr input = ctxt->input; 3847 state = ctxt->instate; 3848 ctxt->instate = XML_PARSER_PI; 3849 /* 3850 * this is a Processing Instruction. 3851 */ 3852 SKIP(2); 3853 SHRINK; 3854 3855 /* 3856 * Parse the target name and check for special support like 3857 * namespace. 3858 */ 3859 target = xmlParsePITarget(ctxt); 3860 if (target != NULL) { 3861 if ((RAW == '?') && (NXT(1) == '>')) { 3862 if (input != ctxt->input) { 3863 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3864 "PI declaration doesn't start and stop in the same entity\n"); 3865 } 3866 SKIP(2); 3867 3868 /* 3869 * SAX: PI detected. 3870 */ 3871 if ((ctxt->sax) && (!ctxt->disableSAX) && 3872 (ctxt->sax->processingInstruction != NULL)) 3873 ctxt->sax->processingInstruction(ctxt->userData, 3874 target, NULL); 3875 ctxt->instate = state; 3876 return; 3877 } 3878 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3879 if (buf == NULL) { 3880 xmlErrMemory(ctxt, NULL); 3881 ctxt->instate = state; 3882 return; 3883 } 3884 cur = CUR; 3885 if (!IS_BLANK(cur)) { 3886 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 3887 "ParsePI: PI %s space expected\n", target); 3888 } 3889 SKIP_BLANKS; 3890 cur = CUR_CHAR(l); 3891 while (IS_CHAR(cur) && /* checked */ 3892 ((cur != '?') || (NXT(1) != '>'))) { 3893 if (len + 5 >= size) { 3894 size *= 2; 3895 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3896 if (buf == NULL) { 3897 xmlErrMemory(ctxt, NULL); 3898 ctxt->instate = state; 3899 return; 3900 } 3901 } 3902 count++; 3903 if (count > 50) { 3904 GROW; 3905 count = 0; 3906 } 3907 COPY_BUF(l,buf,len,cur); 3908 NEXTL(l); 3909 cur = CUR_CHAR(l); 3910 if (cur == 0) { 3911 SHRINK; 3912 GROW; 3913 cur = CUR_CHAR(l); 3914 } 3915 } 3916 buf[len] = 0; 3917 if (cur != '?') { 3918 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 3919 "ParsePI: PI %s never end ...\n", target); 3920 } else { 3921 if (input != ctxt->input) { 3922 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3923 "PI declaration doesn't start and stop in the same entity\n"); 3924 } 3925 SKIP(2); 3926 3927#ifdef LIBXML_CATALOG_ENABLED 3928 if (((state == XML_PARSER_MISC) || 3929 (state == XML_PARSER_START)) && 3930 (xmlStrEqual(target, XML_CATALOG_PI))) { 3931 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3932 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3933 (allow == XML_CATA_ALLOW_ALL)) 3934 xmlParseCatalogPI(ctxt, buf); 3935 } 3936#endif 3937 3938 3939 /* 3940 * SAX: PI detected. 3941 */ 3942 if ((ctxt->sax) && (!ctxt->disableSAX) && 3943 (ctxt->sax->processingInstruction != NULL)) 3944 ctxt->sax->processingInstruction(ctxt->userData, 3945 target, buf); 3946 } 3947 xmlFree(buf); 3948 } else { 3949 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 3950 } 3951 ctxt->instate = state; 3952 } 3953} 3954 3955/** 3956 * xmlParseNotationDecl: 3957 * @ctxt: an XML parser context 3958 * 3959 * parse a notation declaration 3960 * 3961 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3962 * 3963 * Hence there is actually 3 choices: 3964 * 'PUBLIC' S PubidLiteral 3965 * 'PUBLIC' S PubidLiteral S SystemLiteral 3966 * and 'SYSTEM' S SystemLiteral 3967 * 3968 * See the NOTE on xmlParseExternalID(). 3969 */ 3970 3971void 3972xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3973 const xmlChar *name; 3974 xmlChar *Pubid; 3975 xmlChar *Systemid; 3976 3977 if ((RAW == '<') && (NXT(1) == '!') && 3978 (NXT(2) == 'N') && (NXT(3) == 'O') && 3979 (NXT(4) == 'T') && (NXT(5) == 'A') && 3980 (NXT(6) == 'T') && (NXT(7) == 'I') && 3981 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3982 xmlParserInputPtr input = ctxt->input; 3983 SHRINK; 3984 SKIP(10); 3985 if (!IS_BLANK(CUR)) { 3986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3987 "Space required after '<!NOTATION'\n"); 3988 return; 3989 } 3990 SKIP_BLANKS; 3991 3992 name = xmlParseName(ctxt); 3993 if (name == NULL) { 3994 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 3995 return; 3996 } 3997 if (!IS_BLANK(CUR)) { 3998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3999 "Space required after the NOTATION name'\n"); 4000 return; 4001 } 4002 SKIP_BLANKS; 4003 4004 /* 4005 * Parse the IDs. 4006 */ 4007 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4008 SKIP_BLANKS; 4009 4010 if (RAW == '>') { 4011 if (input != ctxt->input) { 4012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4013 "Notation declaration doesn't start and stop in the same entity\n"); 4014 } 4015 NEXT; 4016 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4017 (ctxt->sax->notationDecl != NULL)) 4018 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4019 } else { 4020 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4021 } 4022 if (Systemid != NULL) xmlFree(Systemid); 4023 if (Pubid != NULL) xmlFree(Pubid); 4024 } 4025} 4026 4027/** 4028 * xmlParseEntityDecl: 4029 * @ctxt: an XML parser context 4030 * 4031 * parse <!ENTITY declarations 4032 * 4033 * [70] EntityDecl ::= GEDecl | PEDecl 4034 * 4035 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4036 * 4037 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4038 * 4039 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4040 * 4041 * [74] PEDef ::= EntityValue | ExternalID 4042 * 4043 * [76] NDataDecl ::= S 'NDATA' S Name 4044 * 4045 * [ VC: Notation Declared ] 4046 * The Name must match the declared name of a notation. 4047 */ 4048 4049void 4050xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 4051 const xmlChar *name = NULL; 4052 xmlChar *value = NULL; 4053 xmlChar *URI = NULL, *literal = NULL; 4054 const xmlChar *ndata = NULL; 4055 int isParameter = 0; 4056 xmlChar *orig = NULL; 4057 int skipped; 4058 4059 GROW; 4060 if ((RAW == '<') && (NXT(1) == '!') && 4061 (NXT(2) == 'E') && (NXT(3) == 'N') && 4062 (NXT(4) == 'T') && (NXT(5) == 'I') && 4063 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 4064 xmlParserInputPtr input = ctxt->input; 4065 SHRINK; 4066 SKIP(8); 4067 skipped = SKIP_BLANKS; 4068 if (skipped == 0) { 4069 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4070 "Space required after '<!ENTITY'\n"); 4071 } 4072 4073 if (RAW == '%') { 4074 NEXT; 4075 skipped = SKIP_BLANKS; 4076 if (skipped == 0) { 4077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4078 "Space required after '%'\n"); 4079 } 4080 isParameter = 1; 4081 } 4082 4083 name = xmlParseName(ctxt); 4084 if (name == NULL) { 4085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4086 "xmlParseEntityDecl: no name\n"); 4087 return; 4088 } 4089 skipped = SKIP_BLANKS; 4090 if (skipped == 0) { 4091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4092 "Space required after the entity name\n"); 4093 } 4094 4095 ctxt->instate = XML_PARSER_ENTITY_DECL; 4096 /* 4097 * handle the various case of definitions... 4098 */ 4099 if (isParameter) { 4100 if ((RAW == '"') || (RAW == '\'')) { 4101 value = xmlParseEntityValue(ctxt, &orig); 4102 if (value) { 4103 if ((ctxt->sax != NULL) && 4104 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4105 ctxt->sax->entityDecl(ctxt->userData, name, 4106 XML_INTERNAL_PARAMETER_ENTITY, 4107 NULL, NULL, value); 4108 } 4109 } else { 4110 URI = xmlParseExternalID(ctxt, &literal, 1); 4111 if ((URI == NULL) && (literal == NULL)) { 4112 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4113 } 4114 if (URI) { 4115 xmlURIPtr uri; 4116 4117 uri = xmlParseURI((const char *) URI); 4118 if (uri == NULL) { 4119 ctxt->errNo = XML_ERR_INVALID_URI; 4120 if ((ctxt->sax != NULL) && 4121 (!ctxt->disableSAX) && 4122 (ctxt->sax->error != NULL)) 4123 ctxt->sax->error(ctxt->userData, 4124 "Invalid URI: %s\n", URI); 4125 /* 4126 * This really ought to be a well formedness error 4127 * but the XML Core WG decided otherwise c.f. issue 4128 * E26 of the XML erratas. 4129 */ 4130 } else { 4131 if (uri->fragment != NULL) { 4132 /* 4133 * Okay this is foolish to block those but not 4134 * invalid URIs. 4135 */ 4136 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4137 } else { 4138 if ((ctxt->sax != NULL) && 4139 (!ctxt->disableSAX) && 4140 (ctxt->sax->entityDecl != NULL)) 4141 ctxt->sax->entityDecl(ctxt->userData, name, 4142 XML_EXTERNAL_PARAMETER_ENTITY, 4143 literal, URI, NULL); 4144 } 4145 xmlFreeURI(uri); 4146 } 4147 } 4148 } 4149 } else { 4150 if ((RAW == '"') || (RAW == '\'')) { 4151 value = xmlParseEntityValue(ctxt, &orig); 4152 if ((ctxt->sax != NULL) && 4153 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4154 ctxt->sax->entityDecl(ctxt->userData, name, 4155 XML_INTERNAL_GENERAL_ENTITY, 4156 NULL, NULL, value); 4157 /* 4158 * For expat compatibility in SAX mode. 4159 */ 4160 if ((ctxt->myDoc == NULL) || 4161 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 4162 if (ctxt->myDoc == NULL) { 4163 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4164 } 4165 if (ctxt->myDoc->intSubset == NULL) 4166 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4167 BAD_CAST "fake", NULL, NULL); 4168 4169 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 4170 NULL, NULL, value); 4171 } 4172 } else { 4173 URI = xmlParseExternalID(ctxt, &literal, 1); 4174 if ((URI == NULL) && (literal == NULL)) { 4175 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4176 } 4177 if (URI) { 4178 xmlURIPtr uri; 4179 4180 uri = xmlParseURI((const char *)URI); 4181 if (uri == NULL) { 4182 ctxt->errNo = XML_ERR_INVALID_URI; 4183 if ((ctxt->sax != NULL) && 4184 (!ctxt->disableSAX) && 4185 (ctxt->sax->error != NULL)) 4186 ctxt->sax->error(ctxt->userData, 4187 "Invalid URI: %s\n", URI); 4188 /* 4189 * This really ought to be a well formedness error 4190 * but the XML Core WG decided otherwise c.f. issue 4191 * E26 of the XML erratas. 4192 */ 4193 } else { 4194 if (uri->fragment != NULL) { 4195 /* 4196 * Okay this is foolish to block those but not 4197 * invalid URIs. 4198 */ 4199 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4200 } 4201 xmlFreeURI(uri); 4202 } 4203 } 4204 if ((RAW != '>') && (!IS_BLANK(CUR))) { 4205 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4206 "Space required before 'NDATA'\n"); 4207 } 4208 SKIP_BLANKS; 4209 if ((RAW == 'N') && (NXT(1) == 'D') && 4210 (NXT(2) == 'A') && (NXT(3) == 'T') && 4211 (NXT(4) == 'A')) { 4212 SKIP(5); 4213 if (!IS_BLANK(CUR)) { 4214 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4215 "Space required after 'NDATA'\n"); 4216 } 4217 SKIP_BLANKS; 4218 ndata = xmlParseName(ctxt); 4219 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4220 (ctxt->sax->unparsedEntityDecl != NULL)) 4221 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 4222 literal, URI, ndata); 4223 } else { 4224 if ((ctxt->sax != NULL) && 4225 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4226 ctxt->sax->entityDecl(ctxt->userData, name, 4227 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4228 literal, URI, NULL); 4229 /* 4230 * For expat compatibility in SAX mode. 4231 * assuming the entity repalcement was asked for 4232 */ 4233 if ((ctxt->replaceEntities != 0) && 4234 ((ctxt->myDoc == NULL) || 4235 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 4236 if (ctxt->myDoc == NULL) { 4237 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4238 } 4239 4240 if (ctxt->myDoc->intSubset == NULL) 4241 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4242 BAD_CAST "fake", NULL, NULL); 4243 xmlSAX2EntityDecl(ctxt, name, 4244 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4245 literal, URI, NULL); 4246 } 4247 } 4248 } 4249 } 4250 SKIP_BLANKS; 4251 if (RAW != '>') { 4252 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4253 "xmlParseEntityDecl: entity %s not terminated\n", name); 4254 } else { 4255 if (input != ctxt->input) { 4256 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4257 "Entity declaration doesn't start and stop in the same entity\n"); 4258 } 4259 NEXT; 4260 } 4261 if (orig != NULL) { 4262 /* 4263 * Ugly mechanism to save the raw entity value. 4264 */ 4265 xmlEntityPtr cur = NULL; 4266 4267 if (isParameter) { 4268 if ((ctxt->sax != NULL) && 4269 (ctxt->sax->getParameterEntity != NULL)) 4270 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4271 } else { 4272 if ((ctxt->sax != NULL) && 4273 (ctxt->sax->getEntity != NULL)) 4274 cur = ctxt->sax->getEntity(ctxt->userData, name); 4275 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4276 cur = xmlSAX2GetEntity(ctxt, name); 4277 } 4278 } 4279 if (cur != NULL) { 4280 if (cur->orig != NULL) 4281 xmlFree(orig); 4282 else 4283 cur->orig = orig; 4284 } else 4285 xmlFree(orig); 4286 } 4287 if (value != NULL) xmlFree(value); 4288 if (URI != NULL) xmlFree(URI); 4289 if (literal != NULL) xmlFree(literal); 4290 } 4291} 4292 4293/** 4294 * xmlParseDefaultDecl: 4295 * @ctxt: an XML parser context 4296 * @value: Receive a possible fixed default value for the attribute 4297 * 4298 * Parse an attribute default declaration 4299 * 4300 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4301 * 4302 * [ VC: Required Attribute ] 4303 * if the default declaration is the keyword #REQUIRED, then the 4304 * attribute must be specified for all elements of the type in the 4305 * attribute-list declaration. 4306 * 4307 * [ VC: Attribute Default Legal ] 4308 * The declared default value must meet the lexical constraints of 4309 * the declared attribute type c.f. xmlValidateAttributeDecl() 4310 * 4311 * [ VC: Fixed Attribute Default ] 4312 * if an attribute has a default value declared with the #FIXED 4313 * keyword, instances of that attribute must match the default value. 4314 * 4315 * [ WFC: No < in Attribute Values ] 4316 * handled in xmlParseAttValue() 4317 * 4318 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4319 * or XML_ATTRIBUTE_FIXED. 4320 */ 4321 4322int 4323xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4324 int val; 4325 xmlChar *ret; 4326 4327 *value = NULL; 4328 if ((RAW == '#') && (NXT(1) == 'R') && 4329 (NXT(2) == 'E') && (NXT(3) == 'Q') && 4330 (NXT(4) == 'U') && (NXT(5) == 'I') && 4331 (NXT(6) == 'R') && (NXT(7) == 'E') && 4332 (NXT(8) == 'D')) { 4333 SKIP(9); 4334 return(XML_ATTRIBUTE_REQUIRED); 4335 } 4336 if ((RAW == '#') && (NXT(1) == 'I') && 4337 (NXT(2) == 'M') && (NXT(3) == 'P') && 4338 (NXT(4) == 'L') && (NXT(5) == 'I') && 4339 (NXT(6) == 'E') && (NXT(7) == 'D')) { 4340 SKIP(8); 4341 return(XML_ATTRIBUTE_IMPLIED); 4342 } 4343 val = XML_ATTRIBUTE_NONE; 4344 if ((RAW == '#') && (NXT(1) == 'F') && 4345 (NXT(2) == 'I') && (NXT(3) == 'X') && 4346 (NXT(4) == 'E') && (NXT(5) == 'D')) { 4347 SKIP(6); 4348 val = XML_ATTRIBUTE_FIXED; 4349 if (!IS_BLANK(CUR)) { 4350 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4351 "Space required after '#FIXED'\n"); 4352 } 4353 SKIP_BLANKS; 4354 } 4355 ret = xmlParseAttValue(ctxt); 4356 ctxt->instate = XML_PARSER_DTD; 4357 if (ret == NULL) { 4358 xmlFatalErrMsg(ctxt, ctxt->errNo, 4359 "Attribute default value declaration error\n"); 4360 } else 4361 *value = ret; 4362 return(val); 4363} 4364 4365/** 4366 * xmlParseNotationType: 4367 * @ctxt: an XML parser context 4368 * 4369 * parse an Notation attribute type. 4370 * 4371 * Note: the leading 'NOTATION' S part has already being parsed... 4372 * 4373 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4374 * 4375 * [ VC: Notation Attributes ] 4376 * Values of this type must match one of the notation names included 4377 * in the declaration; all notation names in the declaration must be declared. 4378 * 4379 * Returns: the notation attribute tree built while parsing 4380 */ 4381 4382xmlEnumerationPtr 4383xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4384 const xmlChar *name; 4385 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4386 4387 if (RAW != '(') { 4388 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4389 return(NULL); 4390 } 4391 SHRINK; 4392 do { 4393 NEXT; 4394 SKIP_BLANKS; 4395 name = xmlParseName(ctxt); 4396 if (name == NULL) { 4397 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4398 "Name expected in NOTATION declaration\n"); 4399 return(ret); 4400 } 4401 cur = xmlCreateEnumeration(name); 4402 if (cur == NULL) return(ret); 4403 if (last == NULL) ret = last = cur; 4404 else { 4405 last->next = cur; 4406 last = cur; 4407 } 4408 SKIP_BLANKS; 4409 } while (RAW == '|'); 4410 if (RAW != ')') { 4411 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4412 if ((last != NULL) && (last != ret)) 4413 xmlFreeEnumeration(last); 4414 return(ret); 4415 } 4416 NEXT; 4417 return(ret); 4418} 4419 4420/** 4421 * xmlParseEnumerationType: 4422 * @ctxt: an XML parser context 4423 * 4424 * parse an Enumeration attribute type. 4425 * 4426 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4427 * 4428 * [ VC: Enumeration ] 4429 * Values of this type must match one of the Nmtoken tokens in 4430 * the declaration 4431 * 4432 * Returns: the enumeration attribute tree built while parsing 4433 */ 4434 4435xmlEnumerationPtr 4436xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4437 xmlChar *name; 4438 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4439 4440 if (RAW != '(') { 4441 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4442 return(NULL); 4443 } 4444 SHRINK; 4445 do { 4446 NEXT; 4447 SKIP_BLANKS; 4448 name = xmlParseNmtoken(ctxt); 4449 if (name == NULL) { 4450 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4451 return(ret); 4452 } 4453 cur = xmlCreateEnumeration(name); 4454 xmlFree(name); 4455 if (cur == NULL) return(ret); 4456 if (last == NULL) ret = last = cur; 4457 else { 4458 last->next = cur; 4459 last = cur; 4460 } 4461 SKIP_BLANKS; 4462 } while (RAW == '|'); 4463 if (RAW != ')') { 4464 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4465 return(ret); 4466 } 4467 NEXT; 4468 return(ret); 4469} 4470 4471/** 4472 * xmlParseEnumeratedType: 4473 * @ctxt: an XML parser context 4474 * @tree: the enumeration tree built while parsing 4475 * 4476 * parse an Enumerated attribute type. 4477 * 4478 * [57] EnumeratedType ::= NotationType | Enumeration 4479 * 4480 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4481 * 4482 * 4483 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4484 */ 4485 4486int 4487xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4488 if ((RAW == 'N') && (NXT(1) == 'O') && 4489 (NXT(2) == 'T') && (NXT(3) == 'A') && 4490 (NXT(4) == 'T') && (NXT(5) == 'I') && 4491 (NXT(6) == 'O') && (NXT(7) == 'N')) { 4492 SKIP(8); 4493 if (!IS_BLANK(CUR)) { 4494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4495 "Space required after 'NOTATION'\n"); 4496 return(0); 4497 } 4498 SKIP_BLANKS; 4499 *tree = xmlParseNotationType(ctxt); 4500 if (*tree == NULL) return(0); 4501 return(XML_ATTRIBUTE_NOTATION); 4502 } 4503 *tree = xmlParseEnumerationType(ctxt); 4504 if (*tree == NULL) return(0); 4505 return(XML_ATTRIBUTE_ENUMERATION); 4506} 4507 4508/** 4509 * xmlParseAttributeType: 4510 * @ctxt: an XML parser context 4511 * @tree: the enumeration tree built while parsing 4512 * 4513 * parse the Attribute list def for an element 4514 * 4515 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4516 * 4517 * [55] StringType ::= 'CDATA' 4518 * 4519 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4520 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4521 * 4522 * Validity constraints for attribute values syntax are checked in 4523 * xmlValidateAttributeValue() 4524 * 4525 * [ VC: ID ] 4526 * Values of type ID must match the Name production. A name must not 4527 * appear more than once in an XML document as a value of this type; 4528 * i.e., ID values must uniquely identify the elements which bear them. 4529 * 4530 * [ VC: One ID per Element Type ] 4531 * No element type may have more than one ID attribute specified. 4532 * 4533 * [ VC: ID Attribute Default ] 4534 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4535 * 4536 * [ VC: IDREF ] 4537 * Values of type IDREF must match the Name production, and values 4538 * of type IDREFS must match Names; each IDREF Name must match the value 4539 * of an ID attribute on some element in the XML document; i.e. IDREF 4540 * values must match the value of some ID attribute. 4541 * 4542 * [ VC: Entity Name ] 4543 * Values of type ENTITY must match the Name production, values 4544 * of type ENTITIES must match Names; each Entity Name must match the 4545 * name of an unparsed entity declared in the DTD. 4546 * 4547 * [ VC: Name Token ] 4548 * Values of type NMTOKEN must match the Nmtoken production; values 4549 * of type NMTOKENS must match Nmtokens. 4550 * 4551 * Returns the attribute type 4552 */ 4553int 4554xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4555 SHRINK; 4556 if ((RAW == 'C') && (NXT(1) == 'D') && 4557 (NXT(2) == 'A') && (NXT(3) == 'T') && 4558 (NXT(4) == 'A')) { 4559 SKIP(5); 4560 return(XML_ATTRIBUTE_CDATA); 4561 } else if ((RAW == 'I') && (NXT(1) == 'D') && 4562 (NXT(2) == 'R') && (NXT(3) == 'E') && 4563 (NXT(4) == 'F') && (NXT(5) == 'S')) { 4564 SKIP(6); 4565 return(XML_ATTRIBUTE_IDREFS); 4566 } else if ((RAW == 'I') && (NXT(1) == 'D') && 4567 (NXT(2) == 'R') && (NXT(3) == 'E') && 4568 (NXT(4) == 'F')) { 4569 SKIP(5); 4570 return(XML_ATTRIBUTE_IDREF); 4571 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4572 SKIP(2); 4573 return(XML_ATTRIBUTE_ID); 4574 } else if ((RAW == 'E') && (NXT(1) == 'N') && 4575 (NXT(2) == 'T') && (NXT(3) == 'I') && 4576 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 4577 SKIP(6); 4578 return(XML_ATTRIBUTE_ENTITY); 4579 } else if ((RAW == 'E') && (NXT(1) == 'N') && 4580 (NXT(2) == 'T') && (NXT(3) == 'I') && 4581 (NXT(4) == 'T') && (NXT(5) == 'I') && 4582 (NXT(6) == 'E') && (NXT(7) == 'S')) { 4583 SKIP(8); 4584 return(XML_ATTRIBUTE_ENTITIES); 4585 } else if ((RAW == 'N') && (NXT(1) == 'M') && 4586 (NXT(2) == 'T') && (NXT(3) == 'O') && 4587 (NXT(4) == 'K') && (NXT(5) == 'E') && 4588 (NXT(6) == 'N') && (NXT(7) == 'S')) { 4589 SKIP(8); 4590 return(XML_ATTRIBUTE_NMTOKENS); 4591 } else if ((RAW == 'N') && (NXT(1) == 'M') && 4592 (NXT(2) == 'T') && (NXT(3) == 'O') && 4593 (NXT(4) == 'K') && (NXT(5) == 'E') && 4594 (NXT(6) == 'N')) { 4595 SKIP(7); 4596 return(XML_ATTRIBUTE_NMTOKEN); 4597 } 4598 return(xmlParseEnumeratedType(ctxt, tree)); 4599} 4600 4601/** 4602 * xmlParseAttributeListDecl: 4603 * @ctxt: an XML parser context 4604 * 4605 * : parse the Attribute list def for an element 4606 * 4607 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4608 * 4609 * [53] AttDef ::= S Name S AttType S DefaultDecl 4610 * 4611 */ 4612void 4613xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4614 const xmlChar *elemName; 4615 const xmlChar *attrName; 4616 xmlEnumerationPtr tree; 4617 4618 if ((RAW == '<') && (NXT(1) == '!') && 4619 (NXT(2) == 'A') && (NXT(3) == 'T') && 4620 (NXT(4) == 'T') && (NXT(5) == 'L') && 4621 (NXT(6) == 'I') && (NXT(7) == 'S') && 4622 (NXT(8) == 'T')) { 4623 xmlParserInputPtr input = ctxt->input; 4624 4625 SKIP(9); 4626 if (!IS_BLANK(CUR)) { 4627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4628 "Space required after '<!ATTLIST'\n"); 4629 } 4630 SKIP_BLANKS; 4631 elemName = xmlParseName(ctxt); 4632 if (elemName == NULL) { 4633 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4634 "ATTLIST: no name for Element\n"); 4635 return; 4636 } 4637 SKIP_BLANKS; 4638 GROW; 4639 while (RAW != '>') { 4640 const xmlChar *check = CUR_PTR; 4641 int type; 4642 int def; 4643 xmlChar *defaultValue = NULL; 4644 4645 GROW; 4646 tree = NULL; 4647 attrName = xmlParseName(ctxt); 4648 if (attrName == NULL) { 4649 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4650 "ATTLIST: no name for Attribute\n"); 4651 break; 4652 } 4653 GROW; 4654 if (!IS_BLANK(CUR)) { 4655 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4656 "Space required after the attribute name\n"); 4657 if (defaultValue != NULL) 4658 xmlFree(defaultValue); 4659 break; 4660 } 4661 SKIP_BLANKS; 4662 4663 type = xmlParseAttributeType(ctxt, &tree); 4664 if (type <= 0) { 4665 if (defaultValue != NULL) 4666 xmlFree(defaultValue); 4667 break; 4668 } 4669 4670 GROW; 4671 if (!IS_BLANK(CUR)) { 4672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4673 "Space required after the attribute type\n"); 4674 if (defaultValue != NULL) 4675 xmlFree(defaultValue); 4676 if (tree != NULL) 4677 xmlFreeEnumeration(tree); 4678 break; 4679 } 4680 SKIP_BLANKS; 4681 4682 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4683 if (def <= 0) { 4684 if (defaultValue != NULL) 4685 xmlFree(defaultValue); 4686 if (tree != NULL) 4687 xmlFreeEnumeration(tree); 4688 break; 4689 } 4690 4691 GROW; 4692 if (RAW != '>') { 4693 if (!IS_BLANK(CUR)) { 4694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4695 "Space required after the attribute default value\n"); 4696 if (defaultValue != NULL) 4697 xmlFree(defaultValue); 4698 if (tree != NULL) 4699 xmlFreeEnumeration(tree); 4700 break; 4701 } 4702 SKIP_BLANKS; 4703 } 4704 if (check == CUR_PTR) { 4705 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 4706 "in xmlParseAttributeListDecl\n"); 4707 if (defaultValue != NULL) 4708 xmlFree(defaultValue); 4709 if (tree != NULL) 4710 xmlFreeEnumeration(tree); 4711 break; 4712 } 4713 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4714 (ctxt->sax->attributeDecl != NULL)) 4715 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4716 type, def, defaultValue, tree); 4717 else if (tree != NULL) 4718 xmlFreeEnumeration(tree); 4719 4720 if ((ctxt->sax2) && (defaultValue != NULL) && 4721 (def != XML_ATTRIBUTE_IMPLIED) && 4722 (def != XML_ATTRIBUTE_REQUIRED)) { 4723 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 4724 } 4725 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) { 4726 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 4727 } 4728 if (defaultValue != NULL) 4729 xmlFree(defaultValue); 4730 GROW; 4731 } 4732 if (RAW == '>') { 4733 if (input != ctxt->input) { 4734 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4735 "Attribute list declaration doesn't start and stop in the same entity\n"); 4736 } 4737 NEXT; 4738 } 4739 } 4740} 4741 4742/** 4743 * xmlParseElementMixedContentDecl: 4744 * @ctxt: an XML parser context 4745 * @inputchk: the input used for the current entity, needed for boundary checks 4746 * 4747 * parse the declaration for a Mixed Element content 4748 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4749 * 4750 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4751 * '(' S? '#PCDATA' S? ')' 4752 * 4753 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4754 * 4755 * [ VC: No Duplicate Types ] 4756 * The same name must not appear more than once in a single 4757 * mixed-content declaration. 4758 * 4759 * returns: the list of the xmlElementContentPtr describing the element choices 4760 */ 4761xmlElementContentPtr 4762xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 4763 xmlElementContentPtr ret = NULL, cur = NULL, n; 4764 const xmlChar *elem = NULL; 4765 4766 GROW; 4767 if ((RAW == '#') && (NXT(1) == 'P') && 4768 (NXT(2) == 'C') && (NXT(3) == 'D') && 4769 (NXT(4) == 'A') && (NXT(5) == 'T') && 4770 (NXT(6) == 'A')) { 4771 SKIP(7); 4772 SKIP_BLANKS; 4773 SHRINK; 4774 if (RAW == ')') { 4775 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4776 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4777 if (ctxt->vctxt.error != NULL) 4778 ctxt->vctxt.error(ctxt->vctxt.userData, 4779"Element content declaration doesn't start and stop in the same entity\n"); 4780 ctxt->valid = 0; 4781 } 4782 NEXT; 4783 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4784 if (RAW == '*') { 4785 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4786 NEXT; 4787 } 4788 return(ret); 4789 } 4790 if ((RAW == '(') || (RAW == '|')) { 4791 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4792 if (ret == NULL) return(NULL); 4793 } 4794 while (RAW == '|') { 4795 NEXT; 4796 if (elem == NULL) { 4797 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4798 if (ret == NULL) return(NULL); 4799 ret->c1 = cur; 4800 if (cur != NULL) 4801 cur->parent = ret; 4802 cur = ret; 4803 } else { 4804 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4805 if (n == NULL) return(NULL); 4806 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4807 if (n->c1 != NULL) 4808 n->c1->parent = n; 4809 cur->c2 = n; 4810 if (n != NULL) 4811 n->parent = cur; 4812 cur = n; 4813 } 4814 SKIP_BLANKS; 4815 elem = xmlParseName(ctxt); 4816 if (elem == NULL) { 4817 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4818 "xmlParseElementMixedContentDecl : Name expected\n"); 4819 xmlFreeElementContent(cur); 4820 return(NULL); 4821 } 4822 SKIP_BLANKS; 4823 GROW; 4824 } 4825 if ((RAW == ')') && (NXT(1) == '*')) { 4826 if (elem != NULL) { 4827 cur->c2 = xmlNewElementContent(elem, 4828 XML_ELEMENT_CONTENT_ELEMENT); 4829 if (cur->c2 != NULL) 4830 cur->c2->parent = cur; 4831 } 4832 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4833 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4834 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4835 if (ctxt->vctxt.error != NULL) 4836 ctxt->vctxt.error(ctxt->vctxt.userData, 4837"Element content declaration doesn't start and stop in the same entity\n"); 4838 ctxt->valid = 0; 4839 } 4840 SKIP(2); 4841 } else { 4842 xmlFreeElementContent(ret); 4843 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 4844 return(NULL); 4845 } 4846 4847 } else { 4848 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 4849 } 4850 return(ret); 4851} 4852 4853/** 4854 * xmlParseElementChildrenContentDecl: 4855 * @ctxt: an XML parser context 4856 * @inputchk: the input used for the current entity, needed for boundary checks 4857 * 4858 * parse the declaration for a Mixed Element content 4859 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4860 * 4861 * 4862 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4863 * 4864 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4865 * 4866 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4867 * 4868 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4869 * 4870 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4871 * TODO Parameter-entity replacement text must be properly nested 4872 * with parenthesized groups. That is to say, if either of the 4873 * opening or closing parentheses in a choice, seq, or Mixed 4874 * construct is contained in the replacement text for a parameter 4875 * entity, both must be contained in the same replacement text. For 4876 * interoperability, if a parameter-entity reference appears in a 4877 * choice, seq, or Mixed construct, its replacement text should not 4878 * be empty, and neither the first nor last non-blank character of 4879 * the replacement text should be a connector (| or ,). 4880 * 4881 * Returns the tree of xmlElementContentPtr describing the element 4882 * hierarchy. 4883 */ 4884xmlElementContentPtr 4885xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 4886 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4887 const xmlChar *elem; 4888 xmlChar type = 0; 4889 4890 SKIP_BLANKS; 4891 GROW; 4892 if (RAW == '(') { 4893 int inputid = ctxt->input->id; 4894 4895 /* Recurse on first child */ 4896 NEXT; 4897 SKIP_BLANKS; 4898 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 4899 SKIP_BLANKS; 4900 GROW; 4901 } else { 4902 elem = xmlParseName(ctxt); 4903 if (elem == NULL) { 4904 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 4905 return(NULL); 4906 } 4907 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4908 if (cur == NULL) { 4909 xmlErrMemory(ctxt, NULL); 4910 return(NULL); 4911 } 4912 GROW; 4913 if (RAW == '?') { 4914 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4915 NEXT; 4916 } else if (RAW == '*') { 4917 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4918 NEXT; 4919 } else if (RAW == '+') { 4920 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4921 NEXT; 4922 } else { 4923 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4924 } 4925 GROW; 4926 } 4927 SKIP_BLANKS; 4928 SHRINK; 4929 while (RAW != ')') { 4930 /* 4931 * Each loop we parse one separator and one element. 4932 */ 4933 if (RAW == ',') { 4934 if (type == 0) type = CUR; 4935 4936 /* 4937 * Detect "Name | Name , Name" error 4938 */ 4939 else if (type != CUR) { 4940 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 4941 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4942 type); 4943 if ((last != NULL) && (last != ret)) 4944 xmlFreeElementContent(last); 4945 if (ret != NULL) 4946 xmlFreeElementContent(ret); 4947 return(NULL); 4948 } 4949 NEXT; 4950 4951 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4952 if (op == NULL) { 4953 if ((last != NULL) && (last != ret)) 4954 xmlFreeElementContent(last); 4955 xmlFreeElementContent(ret); 4956 return(NULL); 4957 } 4958 if (last == NULL) { 4959 op->c1 = ret; 4960 if (ret != NULL) 4961 ret->parent = op; 4962 ret = cur = op; 4963 } else { 4964 cur->c2 = op; 4965 if (op != NULL) 4966 op->parent = cur; 4967 op->c1 = last; 4968 if (last != NULL) 4969 last->parent = op; 4970 cur =op; 4971 last = NULL; 4972 } 4973 } else if (RAW == '|') { 4974 if (type == 0) type = CUR; 4975 4976 /* 4977 * Detect "Name , Name | Name" error 4978 */ 4979 else if (type != CUR) { 4980 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 4981 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4982 type); 4983 if ((last != NULL) && (last != ret)) 4984 xmlFreeElementContent(last); 4985 if (ret != NULL) 4986 xmlFreeElementContent(ret); 4987 return(NULL); 4988 } 4989 NEXT; 4990 4991 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4992 if (op == NULL) { 4993 if ((last != NULL) && (last != ret)) 4994 xmlFreeElementContent(last); 4995 if (ret != NULL) 4996 xmlFreeElementContent(ret); 4997 return(NULL); 4998 } 4999 if (last == NULL) { 5000 op->c1 = ret; 5001 if (ret != NULL) 5002 ret->parent = op; 5003 ret = cur = op; 5004 } else { 5005 cur->c2 = op; 5006 if (op != NULL) 5007 op->parent = cur; 5008 op->c1 = last; 5009 if (last != NULL) 5010 last->parent = op; 5011 cur =op; 5012 last = NULL; 5013 } 5014 } else { 5015 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5016 if (ret != NULL) 5017 xmlFreeElementContent(ret); 5018 return(NULL); 5019 } 5020 GROW; 5021 SKIP_BLANKS; 5022 GROW; 5023 if (RAW == '(') { 5024 int inputid = ctxt->input->id; 5025 /* Recurse on second child */ 5026 NEXT; 5027 SKIP_BLANKS; 5028 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 5029 SKIP_BLANKS; 5030 } else { 5031 elem = xmlParseName(ctxt); 5032 if (elem == NULL) { 5033 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5034 if (ret != NULL) 5035 xmlFreeElementContent(ret); 5036 return(NULL); 5037 } 5038 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 5039 if (RAW == '?') { 5040 last->ocur = XML_ELEMENT_CONTENT_OPT; 5041 NEXT; 5042 } else if (RAW == '*') { 5043 last->ocur = XML_ELEMENT_CONTENT_MULT; 5044 NEXT; 5045 } else if (RAW == '+') { 5046 last->ocur = XML_ELEMENT_CONTENT_PLUS; 5047 NEXT; 5048 } else { 5049 last->ocur = XML_ELEMENT_CONTENT_ONCE; 5050 } 5051 } 5052 SKIP_BLANKS; 5053 GROW; 5054 } 5055 if ((cur != NULL) && (last != NULL)) { 5056 cur->c2 = last; 5057 if (last != NULL) 5058 last->parent = cur; 5059 } 5060 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5061 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 5062 if (ctxt->vctxt.error != NULL) 5063 ctxt->vctxt.error(ctxt->vctxt.userData, 5064"Element content declaration doesn't start and stop in the same entity\n"); 5065 ctxt->valid = 0; 5066 } 5067 NEXT; 5068 if (RAW == '?') { 5069 if (ret != NULL) 5070 ret->ocur = XML_ELEMENT_CONTENT_OPT; 5071 NEXT; 5072 } else if (RAW == '*') { 5073 if (ret != NULL) { 5074 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5075 cur = ret; 5076 /* 5077 * Some normalization: 5078 * (a | b* | c?)* == (a | b | c)* 5079 */ 5080 while (cur->type == XML_ELEMENT_CONTENT_OR) { 5081 if ((cur->c1 != NULL) && 5082 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5083 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 5084 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5085 if ((cur->c2 != NULL) && 5086 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5087 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 5088 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5089 cur = cur->c2; 5090 } 5091 } 5092 NEXT; 5093 } else if (RAW == '+') { 5094 if (ret != NULL) { 5095 int found = 0; 5096 5097 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 5098 /* 5099 * Some normalization: 5100 * (a | b*)+ == (a | b)* 5101 * (a | b?)+ == (a | b)* 5102 */ 5103 while (cur->type == XML_ELEMENT_CONTENT_OR) { 5104 if ((cur->c1 != NULL) && 5105 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5106 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 5107 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5108 found = 1; 5109 } 5110 if ((cur->c2 != NULL) && 5111 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5112 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 5113 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5114 found = 1; 5115 } 5116 cur = cur->c2; 5117 } 5118 if (found) 5119 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5120 } 5121 NEXT; 5122 } 5123 return(ret); 5124} 5125 5126/** 5127 * xmlParseElementContentDecl: 5128 * @ctxt: an XML parser context 5129 * @name: the name of the element being defined. 5130 * @result: the Element Content pointer will be stored here if any 5131 * 5132 * parse the declaration for an Element content either Mixed or Children, 5133 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 5134 * 5135 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 5136 * 5137 * returns: the type of element content XML_ELEMENT_TYPE_xxx 5138 */ 5139 5140int 5141xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 5142 xmlElementContentPtr *result) { 5143 5144 xmlElementContentPtr tree = NULL; 5145 int inputid = ctxt->input->id; 5146 int res; 5147 5148 *result = NULL; 5149 5150 if (RAW != '(') { 5151 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5152 "xmlParseElementContentDecl : %s '(' expected\n", name); 5153 return(-1); 5154 } 5155 NEXT; 5156 GROW; 5157 SKIP_BLANKS; 5158 if ((RAW == '#') && (NXT(1) == 'P') && 5159 (NXT(2) == 'C') && (NXT(3) == 'D') && 5160 (NXT(4) == 'A') && (NXT(5) == 'T') && 5161 (NXT(6) == 'A')) { 5162 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 5163 res = XML_ELEMENT_TYPE_MIXED; 5164 } else { 5165 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 5166 res = XML_ELEMENT_TYPE_ELEMENT; 5167 } 5168 SKIP_BLANKS; 5169 *result = tree; 5170 return(res); 5171} 5172 5173/** 5174 * xmlParseElementDecl: 5175 * @ctxt: an XML parser context 5176 * 5177 * parse an Element declaration. 5178 * 5179 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 5180 * 5181 * [ VC: Unique Element Type Declaration ] 5182 * No element type may be declared more than once 5183 * 5184 * Returns the type of the element, or -1 in case of error 5185 */ 5186int 5187xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 5188 const xmlChar *name; 5189 int ret = -1; 5190 xmlElementContentPtr content = NULL; 5191 5192 GROW; 5193 if ((RAW == '<') && (NXT(1) == '!') && 5194 (NXT(2) == 'E') && (NXT(3) == 'L') && 5195 (NXT(4) == 'E') && (NXT(5) == 'M') && 5196 (NXT(6) == 'E') && (NXT(7) == 'N') && 5197 (NXT(8) == 'T')) { 5198 xmlParserInputPtr input = ctxt->input; 5199 5200 SKIP(9); 5201 if (!IS_BLANK(CUR)) { 5202 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5203 "Space required after 'ELEMENT'\n"); 5204 } 5205 SKIP_BLANKS; 5206 name = xmlParseName(ctxt); 5207 if (name == NULL) { 5208 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5209 "xmlParseElementDecl: no name for Element\n"); 5210 return(-1); 5211 } 5212 while ((RAW == 0) && (ctxt->inputNr > 1)) 5213 xmlPopInput(ctxt); 5214 if (!IS_BLANK(CUR)) { 5215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5216 "Space required after the element name\n"); 5217 } 5218 SKIP_BLANKS; 5219 if ((RAW == 'E') && (NXT(1) == 'M') && 5220 (NXT(2) == 'P') && (NXT(3) == 'T') && 5221 (NXT(4) == 'Y')) { 5222 SKIP(5); 5223 /* 5224 * Element must always be empty. 5225 */ 5226 ret = XML_ELEMENT_TYPE_EMPTY; 5227 } else if ((RAW == 'A') && (NXT(1) == 'N') && 5228 (NXT(2) == 'Y')) { 5229 SKIP(3); 5230 /* 5231 * Element is a generic container. 5232 */ 5233 ret = XML_ELEMENT_TYPE_ANY; 5234 } else if (RAW == '(') { 5235 ret = xmlParseElementContentDecl(ctxt, name, &content); 5236 } else { 5237 /* 5238 * [ WFC: PEs in Internal Subset ] error handling. 5239 */ 5240 if ((RAW == '%') && (ctxt->external == 0) && 5241 (ctxt->inputNr == 1)) { 5242 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 5243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5244 ctxt->sax->error(ctxt->userData, 5245 "PEReference: forbidden within markup decl in internal subset\n"); 5246 } else { 5247 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 5248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5249 ctxt->sax->error(ctxt->userData, 5250 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 5251 } 5252 ctxt->wellFormed = 0; 5253 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5254 return(-1); 5255 } 5256 5257 SKIP_BLANKS; 5258 /* 5259 * Pop-up of finished entities. 5260 */ 5261 while ((RAW == 0) && (ctxt->inputNr > 1)) 5262 xmlPopInput(ctxt); 5263 SKIP_BLANKS; 5264 5265 if (RAW != '>') { 5266 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 5267 } else { 5268 if (input != ctxt->input) { 5269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5270 "Element declaration doesn't start and stop in the same entity\n"); 5271 } 5272 5273 NEXT; 5274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5275 (ctxt->sax->elementDecl != NULL)) 5276 ctxt->sax->elementDecl(ctxt->userData, name, ret, 5277 content); 5278 } 5279 if (content != NULL) { 5280 xmlFreeElementContent(content); 5281 } 5282 } 5283 return(ret); 5284} 5285 5286/** 5287 * xmlParseConditionalSections 5288 * @ctxt: an XML parser context 5289 * 5290 * [61] conditionalSect ::= includeSect | ignoreSect 5291 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5292 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5293 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5294 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5295 */ 5296 5297static void 5298xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5299 SKIP(3); 5300 SKIP_BLANKS; 5301 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 5302 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 5303 (NXT(6) == 'E')) { 5304 SKIP(7); 5305 SKIP_BLANKS; 5306 if (RAW != '[') { 5307 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5308 } else { 5309 NEXT; 5310 } 5311 if (xmlParserDebugEntities) { 5312 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5313 xmlGenericError(xmlGenericErrorContext, 5314 "%s(%d): ", ctxt->input->filename, 5315 ctxt->input->line); 5316 xmlGenericError(xmlGenericErrorContext, 5317 "Entering INCLUDE Conditional Section\n"); 5318 } 5319 5320 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5321 (NXT(2) != '>'))) { 5322 const xmlChar *check = CUR_PTR; 5323 unsigned int cons = ctxt->input->consumed; 5324 5325 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5326 xmlParseConditionalSections(ctxt); 5327 } else if (IS_BLANK(CUR)) { 5328 NEXT; 5329 } else if (RAW == '%') { 5330 xmlParsePEReference(ctxt); 5331 } else 5332 xmlParseMarkupDecl(ctxt); 5333 5334 /* 5335 * Pop-up of finished entities. 5336 */ 5337 while ((RAW == 0) && (ctxt->inputNr > 1)) 5338 xmlPopInput(ctxt); 5339 5340 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5341 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5342 break; 5343 } 5344 } 5345 if (xmlParserDebugEntities) { 5346 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5347 xmlGenericError(xmlGenericErrorContext, 5348 "%s(%d): ", ctxt->input->filename, 5349 ctxt->input->line); 5350 xmlGenericError(xmlGenericErrorContext, 5351 "Leaving INCLUDE Conditional Section\n"); 5352 } 5353 5354 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 5355 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 5356 int state; 5357 xmlParserInputState instate; 5358 int depth = 0; 5359 5360 SKIP(6); 5361 SKIP_BLANKS; 5362 if (RAW != '[') { 5363 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5364 } else { 5365 NEXT; 5366 } 5367 if (xmlParserDebugEntities) { 5368 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5369 xmlGenericError(xmlGenericErrorContext, 5370 "%s(%d): ", ctxt->input->filename, 5371 ctxt->input->line); 5372 xmlGenericError(xmlGenericErrorContext, 5373 "Entering IGNORE Conditional Section\n"); 5374 } 5375 5376 /* 5377 * Parse up to the end of the conditional section 5378 * But disable SAX event generating DTD building in the meantime 5379 */ 5380 state = ctxt->disableSAX; 5381 instate = ctxt->instate; 5382 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5383 ctxt->instate = XML_PARSER_IGNORE; 5384 5385 while ((depth >= 0) && (RAW != 0)) { 5386 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5387 depth++; 5388 SKIP(3); 5389 continue; 5390 } 5391 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5392 if (--depth >= 0) SKIP(3); 5393 continue; 5394 } 5395 NEXT; 5396 continue; 5397 } 5398 5399 ctxt->disableSAX = state; 5400 ctxt->instate = instate; 5401 5402 if (xmlParserDebugEntities) { 5403 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5404 xmlGenericError(xmlGenericErrorContext, 5405 "%s(%d): ", ctxt->input->filename, 5406 ctxt->input->line); 5407 xmlGenericError(xmlGenericErrorContext, 5408 "Leaving IGNORE Conditional Section\n"); 5409 } 5410 5411 } else { 5412 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5413 } 5414 5415 if (RAW == 0) 5416 SHRINK; 5417 5418 if (RAW == 0) { 5419 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5420 } else { 5421 SKIP(3); 5422 } 5423} 5424 5425/** 5426 * xmlParseMarkupDecl: 5427 * @ctxt: an XML parser context 5428 * 5429 * parse Markup declarations 5430 * 5431 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5432 * NotationDecl | PI | Comment 5433 * 5434 * [ VC: Proper Declaration/PE Nesting ] 5435 * Parameter-entity replacement text must be properly nested with 5436 * markup declarations. That is to say, if either the first character 5437 * or the last character of a markup declaration (markupdecl above) is 5438 * contained in the replacement text for a parameter-entity reference, 5439 * both must be contained in the same replacement text. 5440 * 5441 * [ WFC: PEs in Internal Subset ] 5442 * In the internal DTD subset, parameter-entity references can occur 5443 * only where markup declarations can occur, not within markup declarations. 5444 * (This does not apply to references that occur in external parameter 5445 * entities or to the external subset.) 5446 */ 5447void 5448xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5449 GROW; 5450 xmlParseElementDecl(ctxt); 5451 xmlParseAttributeListDecl(ctxt); 5452 xmlParseEntityDecl(ctxt); 5453 xmlParseNotationDecl(ctxt); 5454 xmlParsePI(ctxt); 5455 xmlParseComment(ctxt); 5456 /* 5457 * This is only for internal subset. On external entities, 5458 * the replacement is done before parsing stage 5459 */ 5460 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5461 xmlParsePEReference(ctxt); 5462 5463 /* 5464 * Conditional sections are allowed from entities included 5465 * by PE References in the internal subset. 5466 */ 5467 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5468 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5469 xmlParseConditionalSections(ctxt); 5470 } 5471 } 5472 5473 ctxt->instate = XML_PARSER_DTD; 5474} 5475 5476/** 5477 * xmlParseTextDecl: 5478 * @ctxt: an XML parser context 5479 * 5480 * parse an XML declaration header for external entities 5481 * 5482 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5483 * 5484 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5485 */ 5486 5487void 5488xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5489 xmlChar *version; 5490 5491 /* 5492 * We know that '<?xml' is here. 5493 */ 5494 if ((RAW == '<') && (NXT(1) == '?') && 5495 (NXT(2) == 'x') && (NXT(3) == 'm') && 5496 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5497 SKIP(5); 5498 } else { 5499 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5500 return; 5501 } 5502 5503 if (!IS_BLANK(CUR)) { 5504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5505 "Space needed after '<?xml'\n"); 5506 } 5507 SKIP_BLANKS; 5508 5509 /* 5510 * We may have the VersionInfo here. 5511 */ 5512 version = xmlParseVersionInfo(ctxt); 5513 if (version == NULL) 5514 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5515 else { 5516 if (!IS_BLANK(CUR)) { 5517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5518 "Space needed here\n"); 5519 } 5520 } 5521 ctxt->input->version = version; 5522 5523 /* 5524 * We must have the encoding declaration 5525 */ 5526 xmlParseEncodingDecl(ctxt); 5527 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5528 /* 5529 * The XML REC instructs us to stop parsing right here 5530 */ 5531 return; 5532 } 5533 5534 SKIP_BLANKS; 5535 if ((RAW == '?') && (NXT(1) == '>')) { 5536 SKIP(2); 5537 } else if (RAW == '>') { 5538 /* Deprecated old WD ... */ 5539 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5540 NEXT; 5541 } else { 5542 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5543 MOVETO_ENDTAG(CUR_PTR); 5544 NEXT; 5545 } 5546} 5547 5548/** 5549 * xmlParseExternalSubset: 5550 * @ctxt: an XML parser context 5551 * @ExternalID: the external identifier 5552 * @SystemID: the system identifier (or URL) 5553 * 5554 * parse Markup declarations from an external subset 5555 * 5556 * [30] extSubset ::= textDecl? extSubsetDecl 5557 * 5558 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5559 */ 5560void 5561xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5562 const xmlChar *SystemID) { 5563 xmlDetectSAX2(ctxt); 5564 GROW; 5565 if ((RAW == '<') && (NXT(1) == '?') && 5566 (NXT(2) == 'x') && (NXT(3) == 'm') && 5567 (NXT(4) == 'l')) { 5568 xmlParseTextDecl(ctxt); 5569 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5570 /* 5571 * The XML REC instructs us to stop parsing right here 5572 */ 5573 ctxt->instate = XML_PARSER_EOF; 5574 return; 5575 } 5576 } 5577 if (ctxt->myDoc == NULL) { 5578 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5579 } 5580 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5581 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5582 5583 ctxt->instate = XML_PARSER_DTD; 5584 ctxt->external = 1; 5585 while (((RAW == '<') && (NXT(1) == '?')) || 5586 ((RAW == '<') && (NXT(1) == '!')) || 5587 (RAW == '%') || IS_BLANK(CUR)) { 5588 const xmlChar *check = CUR_PTR; 5589 unsigned int cons = ctxt->input->consumed; 5590 5591 GROW; 5592 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5593 xmlParseConditionalSections(ctxt); 5594 } else if (IS_BLANK(CUR)) { 5595 NEXT; 5596 } else if (RAW == '%') { 5597 xmlParsePEReference(ctxt); 5598 } else 5599 xmlParseMarkupDecl(ctxt); 5600 5601 /* 5602 * Pop-up of finished entities. 5603 */ 5604 while ((RAW == 0) && (ctxt->inputNr > 1)) 5605 xmlPopInput(ctxt); 5606 5607 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5608 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5609 break; 5610 } 5611 } 5612 5613 if (RAW != 0) { 5614 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5615 } 5616 5617} 5618 5619/** 5620 * xmlParseReference: 5621 * @ctxt: an XML parser context 5622 * 5623 * parse and handle entity references in content, depending on the SAX 5624 * interface, this may end-up in a call to character() if this is a 5625 * CharRef, a predefined entity, if there is no reference() callback. 5626 * or if the parser was asked to switch to that mode. 5627 * 5628 * [67] Reference ::= EntityRef | CharRef 5629 */ 5630void 5631xmlParseReference(xmlParserCtxtPtr ctxt) { 5632 xmlEntityPtr ent; 5633 xmlChar *val; 5634 if (RAW != '&') return; 5635 5636 if (NXT(1) == '#') { 5637 int i = 0; 5638 xmlChar out[10]; 5639 int hex = NXT(2); 5640 int value = xmlParseCharRef(ctxt); 5641 5642 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5643 /* 5644 * So we are using non-UTF-8 buffers 5645 * Check that the char fit on 8bits, if not 5646 * generate a CharRef. 5647 */ 5648 if (value <= 0xFF) { 5649 out[0] = value; 5650 out[1] = 0; 5651 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5652 (!ctxt->disableSAX)) 5653 ctxt->sax->characters(ctxt->userData, out, 1); 5654 } else { 5655 if ((hex == 'x') || (hex == 'X')) 5656 snprintf((char *)out, sizeof(out), "#x%X", value); 5657 else 5658 snprintf((char *)out, sizeof(out), "#%d", value); 5659 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5660 (!ctxt->disableSAX)) 5661 ctxt->sax->reference(ctxt->userData, out); 5662 } 5663 } else { 5664 /* 5665 * Just encode the value in UTF-8 5666 */ 5667 COPY_BUF(0 ,out, i, value); 5668 out[i] = 0; 5669 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5670 (!ctxt->disableSAX)) 5671 ctxt->sax->characters(ctxt->userData, out, i); 5672 } 5673 } else { 5674 ent = xmlParseEntityRef(ctxt); 5675 if (ent == NULL) return; 5676 if (!ctxt->wellFormed) 5677 return; 5678 if ((ent->name != NULL) && 5679 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5680 xmlNodePtr list = NULL; 5681 int ret; 5682 5683 5684 /* 5685 * The first reference to the entity trigger a parsing phase 5686 * where the ent->children is filled with the result from 5687 * the parsing. 5688 */ 5689 if (ent->children == NULL) { 5690 xmlChar *value; 5691 value = ent->content; 5692 5693 /* 5694 * Check that this entity is well formed 5695 */ 5696 if ((value != NULL) && 5697 (value[1] == 0) && (value[0] == '<') && 5698 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5699 /* 5700 * DONE: get definite answer on this !!! 5701 * Lots of entity decls are used to declare a single 5702 * char 5703 * <!ENTITY lt "<"> 5704 * Which seems to be valid since 5705 * 2.4: The ampersand character (&) and the left angle 5706 * bracket (<) may appear in their literal form only 5707 * when used ... They are also legal within the literal 5708 * entity value of an internal entity declaration;i 5709 * see "4.3.2 Well-Formed Parsed Entities". 5710 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5711 * Looking at the OASIS test suite and James Clark 5712 * tests, this is broken. However the XML REC uses 5713 * it. Is the XML REC not well-formed ???? 5714 * This is a hack to avoid this problem 5715 * 5716 * ANSWER: since lt gt amp .. are already defined, 5717 * this is a redefinition and hence the fact that the 5718 * content is not well balanced is not a Wf error, this 5719 * is lousy but acceptable. 5720 */ 5721 list = xmlNewDocText(ctxt->myDoc, value); 5722 if (list != NULL) { 5723 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5724 (ent->children == NULL)) { 5725 ent->children = list; 5726 ent->last = list; 5727 ent->owner = 1; 5728 list->parent = (xmlNodePtr) ent; 5729 } else { 5730 xmlFreeNodeList(list); 5731 } 5732 } else if (list != NULL) { 5733 xmlFreeNodeList(list); 5734 } 5735 } else { 5736 /* 5737 * 4.3.2: An internal general parsed entity is well-formed 5738 * if its replacement text matches the production labeled 5739 * content. 5740 */ 5741 5742 void *user_data; 5743 /* 5744 * This is a bit hackish but this seems the best 5745 * way to make sure both SAX and DOM entity support 5746 * behaves okay. 5747 */ 5748 if (ctxt->userData == ctxt) 5749 user_data = NULL; 5750 else 5751 user_data = ctxt->userData; 5752 5753 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5754 ctxt->depth++; 5755 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 5756 value, user_data, &list); 5757 ctxt->depth--; 5758 } else if (ent->etype == 5759 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5760 ctxt->depth++; 5761 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5762 ctxt->sax, user_data, ctxt->depth, 5763 ent->URI, ent->ExternalID, &list); 5764 ctxt->depth--; 5765 } else { 5766 ret = -1; 5767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5768 ctxt->sax->error(ctxt->userData, 5769 "Internal: invalid entity type\n"); 5770 } 5771 if (ret == XML_ERR_ENTITY_LOOP) { 5772 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 5773 return; 5774 } else if ((ret == 0) && (list != NULL)) { 5775 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5776 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5777 (ent->children == NULL)) { 5778 ent->children = list; 5779 if (ctxt->replaceEntities) { 5780 /* 5781 * Prune it directly in the generated document 5782 * except for single text nodes. 5783 */ 5784 if ((list->type == XML_TEXT_NODE) && 5785 (list->next == NULL)) { 5786 list->parent = (xmlNodePtr) ent; 5787 list = NULL; 5788 ent->owner = 1; 5789 } else { 5790 ent->owner = 0; 5791 while (list != NULL) { 5792 list->parent = (xmlNodePtr) ctxt->node; 5793 list->doc = ctxt->myDoc; 5794 if (list->next == NULL) 5795 ent->last = list; 5796 list = list->next; 5797 } 5798 list = ent->children; 5799 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5800 xmlAddEntityReference(ent, list, NULL); 5801 } 5802 } else { 5803 ent->owner = 1; 5804 while (list != NULL) { 5805 list->parent = (xmlNodePtr) ent; 5806 if (list->next == NULL) 5807 ent->last = list; 5808 list = list->next; 5809 } 5810 } 5811 } else { 5812 xmlFreeNodeList(list); 5813 list = NULL; 5814 } 5815 } else if (ret > 0) { 5816 xmlFatalErr(ctxt, ret, NULL); 5817 } else if (list != NULL) { 5818 xmlFreeNodeList(list); 5819 list = NULL; 5820 } 5821 } 5822 } 5823 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5824 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5825 /* 5826 * Create a node. 5827 */ 5828 ctxt->sax->reference(ctxt->userData, ent->name); 5829 return; 5830 } else if (ctxt->replaceEntities) { 5831 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5832 /* 5833 * Seems we are generating the DOM content, do 5834 * a simple tree copy for all references except the first 5835 * In the first occurrence list contains the replacement 5836 */ 5837 if ((list == NULL) && (ent->owner == 0)) { 5838 xmlNodePtr nw = NULL, cur, firstChild = NULL; 5839 cur = ent->children; 5840 while (cur != NULL) { 5841 nw = xmlCopyNode(cur, 1); 5842 if (nw != NULL) { 5843 nw->_private = cur->_private; 5844 if (firstChild == NULL){ 5845 firstChild = nw; 5846 } 5847 xmlAddChild(ctxt->node, nw); 5848 } 5849 if (cur == ent->last) 5850 break; 5851 cur = cur->next; 5852 } 5853 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5854 xmlAddEntityReference(ent, firstChild, nw); 5855 } else if (list == NULL) { 5856 xmlNodePtr nw = NULL, cur, next, last, 5857 firstChild = NULL; 5858 /* 5859 * Copy the entity child list and make it the new 5860 * entity child list. The goal is to make sure any 5861 * ID or REF referenced will be the one from the 5862 * document content and not the entity copy. 5863 */ 5864 cur = ent->children; 5865 ent->children = NULL; 5866 last = ent->last; 5867 ent->last = NULL; 5868 while (cur != NULL) { 5869 next = cur->next; 5870 cur->next = NULL; 5871 cur->parent = NULL; 5872 nw = xmlCopyNode(cur, 1); 5873 if (nw != NULL) { 5874 nw->_private = cur->_private; 5875 if (firstChild == NULL){ 5876 firstChild = cur; 5877 } 5878 xmlAddChild((xmlNodePtr) ent, nw); 5879 xmlAddChild(ctxt->node, cur); 5880 } 5881 if (cur == last) 5882 break; 5883 cur = next; 5884 } 5885 ent->owner = 1; 5886 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5887 xmlAddEntityReference(ent, firstChild, nw); 5888 } else { 5889 /* 5890 * the name change is to avoid coalescing of the 5891 * node with a possible previous text one which 5892 * would make ent->children a dangling pointer 5893 */ 5894 if (ent->children->type == XML_TEXT_NODE) 5895 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5896 if ((ent->last != ent->children) && 5897 (ent->last->type == XML_TEXT_NODE)) 5898 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5899 xmlAddChildList(ctxt->node, ent->children); 5900 } 5901 5902 /* 5903 * This is to avoid a nasty side effect, see 5904 * characters() in SAX.c 5905 */ 5906 ctxt->nodemem = 0; 5907 ctxt->nodelen = 0; 5908 return; 5909 } else { 5910 /* 5911 * Probably running in SAX mode 5912 */ 5913 xmlParserInputPtr input; 5914 5915 input = xmlNewEntityInputStream(ctxt, ent); 5916 xmlPushInput(ctxt, input); 5917 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5918 (RAW == '<') && (NXT(1) == '?') && 5919 (NXT(2) == 'x') && (NXT(3) == 'm') && 5920 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5921 xmlParseTextDecl(ctxt); 5922 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5923 /* 5924 * The XML REC instructs us to stop parsing right here 5925 */ 5926 ctxt->instate = XML_PARSER_EOF; 5927 return; 5928 } 5929 if (input->standalone == 1) { 5930 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE, 5931 NULL); 5932 } 5933 } 5934 return; 5935 } 5936 } 5937 } else { 5938 val = ent->content; 5939 if (val == NULL) return; 5940 /* 5941 * inline the entity. 5942 */ 5943 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5944 (!ctxt->disableSAX)) 5945 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5946 } 5947 } 5948} 5949 5950/** 5951 * xmlParseEntityRef: 5952 * @ctxt: an XML parser context 5953 * 5954 * parse ENTITY references declarations 5955 * 5956 * [68] EntityRef ::= '&' Name ';' 5957 * 5958 * [ WFC: Entity Declared ] 5959 * In a document without any DTD, a document with only an internal DTD 5960 * subset which contains no parameter entity references, or a document 5961 * with "standalone='yes'", the Name given in the entity reference 5962 * must match that in an entity declaration, except that well-formed 5963 * documents need not declare any of the following entities: amp, lt, 5964 * gt, apos, quot. The declaration of a parameter entity must precede 5965 * any reference to it. Similarly, the declaration of a general entity 5966 * must precede any reference to it which appears in a default value in an 5967 * attribute-list declaration. Note that if entities are declared in the 5968 * external subset or in external parameter entities, a non-validating 5969 * processor is not obligated to read and process their declarations; 5970 * for such documents, the rule that an entity must be declared is a 5971 * well-formedness constraint only if standalone='yes'. 5972 * 5973 * [ WFC: Parsed Entity ] 5974 * An entity reference must not contain the name of an unparsed entity 5975 * 5976 * Returns the xmlEntityPtr if found, or NULL otherwise. 5977 */ 5978xmlEntityPtr 5979xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5980 const xmlChar *name; 5981 xmlEntityPtr ent = NULL; 5982 5983 GROW; 5984 5985 if (RAW == '&') { 5986 NEXT; 5987 name = xmlParseName(ctxt); 5988 if (name == NULL) { 5989 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5990 "xmlParseEntityRef: no name\n"); 5991 } else { 5992 if (RAW == ';') { 5993 NEXT; 5994 /* 5995 * Ask first SAX for entity resolution, otherwise try the 5996 * predefined set. 5997 */ 5998 if (ctxt->sax != NULL) { 5999 if (ctxt->sax->getEntity != NULL) 6000 ent = ctxt->sax->getEntity(ctxt->userData, name); 6001 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 6002 ent = xmlGetPredefinedEntity(name); 6003 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 6004 (ctxt->userData==ctxt)) { 6005 ent = xmlSAX2GetEntity(ctxt, name); 6006 } 6007 } 6008 /* 6009 * [ WFC: Entity Declared ] 6010 * In a document without any DTD, a document with only an 6011 * internal DTD subset which contains no parameter entity 6012 * references, or a document with "standalone='yes'", the 6013 * Name given in the entity reference must match that in an 6014 * entity declaration, except that well-formed documents 6015 * need not declare any of the following entities: amp, lt, 6016 * gt, apos, quot. 6017 * The declaration of a parameter entity must precede any 6018 * reference to it. 6019 * Similarly, the declaration of a general entity must 6020 * precede any reference to it which appears in a default 6021 * value in an attribute-list declaration. Note that if 6022 * entities are declared in the external subset or in 6023 * external parameter entities, a non-validating processor 6024 * is not obligated to read and process their declarations; 6025 * for such documents, the rule that an entity must be 6026 * declared is a well-formedness constraint only if 6027 * standalone='yes'. 6028 */ 6029 if (ent == NULL) { 6030 if ((ctxt->standalone == 1) || 6031 ((ctxt->hasExternalSubset == 0) && 6032 (ctxt->hasPErefs == 0))) { 6033 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6034 "Entity '%s' not defined\n", name); 6035 ctxt->valid = 0; 6036 } else { 6037 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6039 ctxt->sax->error(ctxt->userData, 6040 "Entity '%s' not defined\n", name); 6041 ctxt->valid = 0; 6042 } 6043 } 6044 6045 /* 6046 * [ WFC: Parsed Entity ] 6047 * An entity reference must not contain the name of an 6048 * unparsed entity 6049 */ 6050 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6051 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6052 "Entity reference to unparsed entity %s\n", name); 6053 } 6054 6055 /* 6056 * [ WFC: No External Entity References ] 6057 * Attribute values cannot contain direct or indirect 6058 * entity references to external entities. 6059 */ 6060 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6061 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6062 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6063 "Attribute references external entity '%s'\n", name); 6064 } 6065 /* 6066 * [ WFC: No < in Attribute Values ] 6067 * The replacement text of any entity referred to directly or 6068 * indirectly in an attribute value (other than "<") must 6069 * not contain a <. 6070 */ 6071 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6072 (ent != NULL) && 6073 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6074 (ent->content != NULL) && 6075 (xmlStrchr(ent->content, '<'))) { 6076 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6077 "'<' in entity '%s' is not allowed in attributes values\n", name); 6078 } 6079 6080 /* 6081 * Internal check, no parameter entities here ... 6082 */ 6083 else { 6084 switch (ent->etype) { 6085 case XML_INTERNAL_PARAMETER_ENTITY: 6086 case XML_EXTERNAL_PARAMETER_ENTITY: 6087 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6088 "Attempt to reference the parameter entity '%s'\n", 6089 name); 6090 break; 6091 default: 6092 break; 6093 } 6094 } 6095 6096 /* 6097 * [ WFC: No Recursion ] 6098 * A parsed entity must not contain a recursive reference 6099 * to itself, either directly or indirectly. 6100 * Done somewhere else 6101 */ 6102 6103 } else { 6104 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6105 } 6106 } 6107 } 6108 return(ent); 6109} 6110 6111/** 6112 * xmlParseStringEntityRef: 6113 * @ctxt: an XML parser context 6114 * @str: a pointer to an index in the string 6115 * 6116 * parse ENTITY references declarations, but this version parses it from 6117 * a string value. 6118 * 6119 * [68] EntityRef ::= '&' Name ';' 6120 * 6121 * [ WFC: Entity Declared ] 6122 * In a document without any DTD, a document with only an internal DTD 6123 * subset which contains no parameter entity references, or a document 6124 * with "standalone='yes'", the Name given in the entity reference 6125 * must match that in an entity declaration, except that well-formed 6126 * documents need not declare any of the following entities: amp, lt, 6127 * gt, apos, quot. The declaration of a parameter entity must precede 6128 * any reference to it. Similarly, the declaration of a general entity 6129 * must precede any reference to it which appears in a default value in an 6130 * attribute-list declaration. Note that if entities are declared in the 6131 * external subset or in external parameter entities, a non-validating 6132 * processor is not obligated to read and process their declarations; 6133 * for such documents, the rule that an entity must be declared is a 6134 * well-formedness constraint only if standalone='yes'. 6135 * 6136 * [ WFC: Parsed Entity ] 6137 * An entity reference must not contain the name of an unparsed entity 6138 * 6139 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 6140 * is updated to the current location in the string. 6141 */ 6142xmlEntityPtr 6143xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 6144 xmlChar *name; 6145 const xmlChar *ptr; 6146 xmlChar cur; 6147 xmlEntityPtr ent = NULL; 6148 6149 if ((str == NULL) || (*str == NULL)) 6150 return(NULL); 6151 ptr = *str; 6152 cur = *ptr; 6153 if (cur == '&') { 6154 ptr++; 6155 cur = *ptr; 6156 name = xmlParseStringName(ctxt, &ptr); 6157 if (name == NULL) { 6158 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6159 "xmlParseStringEntityRef: no name\n"); 6160 } else { 6161 if (*ptr == ';') { 6162 ptr++; 6163 /* 6164 * Ask first SAX for entity resolution, otherwise try the 6165 * predefined set. 6166 */ 6167 if (ctxt->sax != NULL) { 6168 if (ctxt->sax->getEntity != NULL) 6169 ent = ctxt->sax->getEntity(ctxt->userData, name); 6170 if (ent == NULL) 6171 ent = xmlGetPredefinedEntity(name); 6172 if ((ent == NULL) && (ctxt->userData==ctxt)) { 6173 ent = xmlSAX2GetEntity(ctxt, name); 6174 } 6175 } 6176 /* 6177 * [ WFC: Entity Declared ] 6178 * In a document without any DTD, a document with only an 6179 * internal DTD subset which contains no parameter entity 6180 * references, or a document with "standalone='yes'", the 6181 * Name given in the entity reference must match that in an 6182 * entity declaration, except that well-formed documents 6183 * need not declare any of the following entities: amp, lt, 6184 * gt, apos, quot. 6185 * The declaration of a parameter entity must precede any 6186 * reference to it. 6187 * Similarly, the declaration of a general entity must 6188 * precede any reference to it which appears in a default 6189 * value in an attribute-list declaration. Note that if 6190 * entities are declared in the external subset or in 6191 * external parameter entities, a non-validating processor 6192 * is not obligated to read and process their declarations; 6193 * for such documents, the rule that an entity must be 6194 * declared is a well-formedness constraint only if 6195 * standalone='yes'. 6196 */ 6197 if (ent == NULL) { 6198 if ((ctxt->standalone == 1) || 6199 ((ctxt->hasExternalSubset == 0) && 6200 (ctxt->hasPErefs == 0))) { 6201 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6202 "Entity '%s' not defined\n", name); 6203 } else { 6204 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 6205 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6206 ctxt->sax->warning(ctxt->userData, 6207 "Entity '%s' not defined\n", name); 6208 } 6209 } 6210 6211 /* 6212 * [ WFC: Parsed Entity ] 6213 * An entity reference must not contain the name of an 6214 * unparsed entity 6215 */ 6216 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6217 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 6218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6219 ctxt->sax->error(ctxt->userData, 6220 "Entity reference to unparsed entity %s\n", name); 6221 ctxt->wellFormed = 0; 6222 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6223 } 6224 6225 /* 6226 * [ WFC: No External Entity References ] 6227 * Attribute values cannot contain direct or indirect 6228 * entity references to external entities. 6229 */ 6230 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6231 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6232 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 6233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6234 ctxt->sax->error(ctxt->userData, 6235 "Attribute references external entity '%s'\n", name); 6236 ctxt->wellFormed = 0; 6237 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6238 } 6239 /* 6240 * [ WFC: No < in Attribute Values ] 6241 * The replacement text of any entity referred to directly or 6242 * indirectly in an attribute value (other than "<") must 6243 * not contain a <. 6244 */ 6245 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6246 (ent != NULL) && 6247 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6248 (ent->content != NULL) && 6249 (xmlStrchr(ent->content, '<'))) { 6250 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 6251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6252 ctxt->sax->error(ctxt->userData, 6253 "'<' in entity '%s' is not allowed in attributes values\n", name); 6254 ctxt->wellFormed = 0; 6255 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6256 } 6257 6258 /* 6259 * Internal check, no parameter entities here ... 6260 */ 6261 else { 6262 switch (ent->etype) { 6263 case XML_INTERNAL_PARAMETER_ENTITY: 6264 case XML_EXTERNAL_PARAMETER_ENTITY: 6265 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 6266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6267 ctxt->sax->error(ctxt->userData, 6268 "Attempt to reference the parameter entity '%s'\n", name); 6269 ctxt->wellFormed = 0; 6270 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6271 break; 6272 default: 6273 break; 6274 } 6275 } 6276 6277 /* 6278 * [ WFC: No Recursion ] 6279 * A parsed entity must not contain a recursive reference 6280 * to itself, either directly or indirectly. 6281 * Done somewhere else 6282 */ 6283 6284 } else { 6285 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6286 } 6287 xmlFree(name); 6288 } 6289 } 6290 *str = ptr; 6291 return(ent); 6292} 6293 6294/** 6295 * xmlParsePEReference: 6296 * @ctxt: an XML parser context 6297 * 6298 * parse PEReference declarations 6299 * The entity content is handled directly by pushing it's content as 6300 * a new input stream. 6301 * 6302 * [69] PEReference ::= '%' Name ';' 6303 * 6304 * [ WFC: No Recursion ] 6305 * A parsed entity must not contain a recursive 6306 * reference to itself, either directly or indirectly. 6307 * 6308 * [ WFC: Entity Declared ] 6309 * In a document without any DTD, a document with only an internal DTD 6310 * subset which contains no parameter entity references, or a document 6311 * with "standalone='yes'", ... ... The declaration of a parameter 6312 * entity must precede any reference to it... 6313 * 6314 * [ VC: Entity Declared ] 6315 * In a document with an external subset or external parameter entities 6316 * with "standalone='no'", ... ... The declaration of a parameter entity 6317 * must precede any reference to it... 6318 * 6319 * [ WFC: In DTD ] 6320 * Parameter-entity references may only appear in the DTD. 6321 * NOTE: misleading but this is handled. 6322 */ 6323void 6324xmlParsePEReference(xmlParserCtxtPtr ctxt) { 6325 const xmlChar *name; 6326 xmlEntityPtr entity = NULL; 6327 xmlParserInputPtr input; 6328 6329 if (RAW == '%') { 6330 NEXT; 6331 name = xmlParseName(ctxt); 6332 if (name == NULL) { 6333 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6334 "xmlParsePEReference: no name\n"); 6335 } else { 6336 if (RAW == ';') { 6337 NEXT; 6338 if ((ctxt->sax != NULL) && 6339 (ctxt->sax->getParameterEntity != NULL)) 6340 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6341 name); 6342 if (entity == NULL) { 6343 /* 6344 * [ WFC: Entity Declared ] 6345 * In a document without any DTD, a document with only an 6346 * internal DTD subset which contains no parameter entity 6347 * references, or a document with "standalone='yes'", ... 6348 * ... The declaration of a parameter entity must precede 6349 * any reference to it... 6350 */ 6351 if ((ctxt->standalone == 1) || 6352 ((ctxt->hasExternalSubset == 0) && 6353 (ctxt->hasPErefs == 0))) { 6354 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6355 "PEReference: %%%s; not found\n", name); 6356 } else { 6357 /* 6358 * [ VC: Entity Declared ] 6359 * In a document with an external subset or external 6360 * parameter entities with "standalone='no'", ... 6361 * ... The declaration of a parameter entity must precede 6362 * any reference to it... 6363 */ 6364 if ((!ctxt->disableSAX) && 6365 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6366 ctxt->sax->warning(ctxt->userData, 6367 "PEReference: %%%s; not found\n", name); 6368 ctxt->valid = 0; 6369 } 6370 } else { 6371 /* 6372 * Internal checking in case the entity quest barfed 6373 */ 6374 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6375 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6376 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6377 ctxt->sax->warning(ctxt->userData, 6378 "Internal: %%%s; is not a parameter entity\n", name); 6379 } else if (ctxt->input->free != deallocblankswrapper) { 6380 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 6381 xmlPushInput(ctxt, input); 6382 } else { 6383 /* 6384 * TODO !!! 6385 * handle the extra spaces added before and after 6386 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6387 */ 6388 input = xmlNewEntityInputStream(ctxt, entity); 6389 xmlPushInput(ctxt, input); 6390 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6391 (RAW == '<') && (NXT(1) == '?') && 6392 (NXT(2) == 'x') && (NXT(3) == 'm') && 6393 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 6394 xmlParseTextDecl(ctxt); 6395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6396 /* 6397 * The XML REC instructs us to stop parsing 6398 * right here 6399 */ 6400 ctxt->instate = XML_PARSER_EOF; 6401 return; 6402 } 6403 } 6404 } 6405 } 6406 ctxt->hasPErefs = 1; 6407 } else { 6408 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6409 } 6410 } 6411 } 6412} 6413 6414/** 6415 * xmlParseStringPEReference: 6416 * @ctxt: an XML parser context 6417 * @str: a pointer to an index in the string 6418 * 6419 * parse PEReference declarations 6420 * 6421 * [69] PEReference ::= '%' Name ';' 6422 * 6423 * [ WFC: No Recursion ] 6424 * A parsed entity must not contain a recursive 6425 * reference to itself, either directly or indirectly. 6426 * 6427 * [ WFC: Entity Declared ] 6428 * In a document without any DTD, a document with only an internal DTD 6429 * subset which contains no parameter entity references, or a document 6430 * with "standalone='yes'", ... ... The declaration of a parameter 6431 * entity must precede any reference to it... 6432 * 6433 * [ VC: Entity Declared ] 6434 * In a document with an external subset or external parameter entities 6435 * with "standalone='no'", ... ... The declaration of a parameter entity 6436 * must precede any reference to it... 6437 * 6438 * [ WFC: In DTD ] 6439 * Parameter-entity references may only appear in the DTD. 6440 * NOTE: misleading but this is handled. 6441 * 6442 * Returns the string of the entity content. 6443 * str is updated to the current value of the index 6444 */ 6445xmlEntityPtr 6446xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6447 const xmlChar *ptr; 6448 xmlChar cur; 6449 xmlChar *name; 6450 xmlEntityPtr entity = NULL; 6451 6452 if ((str == NULL) || (*str == NULL)) return(NULL); 6453 ptr = *str; 6454 cur = *ptr; 6455 if (cur == '%') { 6456 ptr++; 6457 cur = *ptr; 6458 name = xmlParseStringName(ctxt, &ptr); 6459 if (name == NULL) { 6460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6461 "xmlParseStringPEReference: no name\n"); 6462 } else { 6463 cur = *ptr; 6464 if (cur == ';') { 6465 ptr++; 6466 cur = *ptr; 6467 if ((ctxt->sax != NULL) && 6468 (ctxt->sax->getParameterEntity != NULL)) 6469 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6470 name); 6471 if (entity == NULL) { 6472 /* 6473 * [ WFC: Entity Declared ] 6474 * In a document without any DTD, a document with only an 6475 * internal DTD subset which contains no parameter entity 6476 * references, or a document with "standalone='yes'", ... 6477 * ... The declaration of a parameter entity must precede 6478 * any reference to it... 6479 */ 6480 if ((ctxt->standalone == 1) || 6481 ((ctxt->hasExternalSubset == 0) && 6482 (ctxt->hasPErefs == 0))) { 6483 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6484 "PEReference: %%%s; not found\n", name); 6485 } else { 6486 /* 6487 * [ VC: Entity Declared ] 6488 * In a document with an external subset or external 6489 * parameter entities with "standalone='no'", ... 6490 * ... The declaration of a parameter entity must 6491 * precede any reference to it... 6492 */ 6493 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6494 ctxt->sax->warning(ctxt->userData, 6495 "PEReference: %%%s; not found\n", name); 6496 ctxt->valid = 0; 6497 } 6498 } else { 6499 /* 6500 * Internal checking in case the entity quest barfed 6501 */ 6502 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6503 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6504 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6505 ctxt->sax->warning(ctxt->userData, 6506 "Internal: %%%s; is not a parameter entity\n", name); 6507 } 6508 } 6509 ctxt->hasPErefs = 1; 6510 } else { 6511 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6512 } 6513 xmlFree(name); 6514 } 6515 } 6516 *str = ptr; 6517 return(entity); 6518} 6519 6520/** 6521 * xmlParseDocTypeDecl: 6522 * @ctxt: an XML parser context 6523 * 6524 * parse a DOCTYPE declaration 6525 * 6526 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6527 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6528 * 6529 * [ VC: Root Element Type ] 6530 * The Name in the document type declaration must match the element 6531 * type of the root element. 6532 */ 6533 6534void 6535xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6536 const xmlChar *name = NULL; 6537 xmlChar *ExternalID = NULL; 6538 xmlChar *URI = NULL; 6539 6540 /* 6541 * We know that '<!DOCTYPE' has been detected. 6542 */ 6543 SKIP(9); 6544 6545 SKIP_BLANKS; 6546 6547 /* 6548 * Parse the DOCTYPE name. 6549 */ 6550 name = xmlParseName(ctxt); 6551 if (name == NULL) { 6552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6553 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6554 } 6555 ctxt->intSubName = name; 6556 6557 SKIP_BLANKS; 6558 6559 /* 6560 * Check for SystemID and ExternalID 6561 */ 6562 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6563 6564 if ((URI != NULL) || (ExternalID != NULL)) { 6565 ctxt->hasExternalSubset = 1; 6566 } 6567 ctxt->extSubURI = URI; 6568 ctxt->extSubSystem = ExternalID; 6569 6570 SKIP_BLANKS; 6571 6572 /* 6573 * Create and update the internal subset. 6574 */ 6575 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6576 (!ctxt->disableSAX)) 6577 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6578 6579 /* 6580 * Is there any internal subset declarations ? 6581 * they are handled separately in xmlParseInternalSubset() 6582 */ 6583 if (RAW == '[') 6584 return; 6585 6586 /* 6587 * We should be at the end of the DOCTYPE declaration. 6588 */ 6589 if (RAW != '>') { 6590 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6591 } 6592 NEXT; 6593} 6594 6595/** 6596 * xmlParseInternalSubset: 6597 * @ctxt: an XML parser context 6598 * 6599 * parse the internal subset declaration 6600 * 6601 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6602 */ 6603 6604static void 6605xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6606 /* 6607 * Is there any DTD definition ? 6608 */ 6609 if (RAW == '[') { 6610 ctxt->instate = XML_PARSER_DTD; 6611 NEXT; 6612 /* 6613 * Parse the succession of Markup declarations and 6614 * PEReferences. 6615 * Subsequence (markupdecl | PEReference | S)* 6616 */ 6617 while (RAW != ']') { 6618 const xmlChar *check = CUR_PTR; 6619 unsigned int cons = ctxt->input->consumed; 6620 6621 SKIP_BLANKS; 6622 xmlParseMarkupDecl(ctxt); 6623 xmlParsePEReference(ctxt); 6624 6625 /* 6626 * Pop-up of finished entities. 6627 */ 6628 while ((RAW == 0) && (ctxt->inputNr > 1)) 6629 xmlPopInput(ctxt); 6630 6631 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6632 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6633 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6634 break; 6635 } 6636 } 6637 if (RAW == ']') { 6638 NEXT; 6639 SKIP_BLANKS; 6640 } 6641 } 6642 6643 /* 6644 * We should be at the end of the DOCTYPE declaration. 6645 */ 6646 if (RAW != '>') { 6647 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6648 } 6649 NEXT; 6650} 6651 6652/** 6653 * xmlParseAttribute: 6654 * @ctxt: an XML parser context 6655 * @value: a xmlChar ** used to store the value of the attribute 6656 * 6657 * parse an attribute 6658 * 6659 * [41] Attribute ::= Name Eq AttValue 6660 * 6661 * [ WFC: No External Entity References ] 6662 * Attribute values cannot contain direct or indirect entity references 6663 * to external entities. 6664 * 6665 * [ WFC: No < in Attribute Values ] 6666 * The replacement text of any entity referred to directly or indirectly in 6667 * an attribute value (other than "<") must not contain a <. 6668 * 6669 * [ VC: Attribute Value Type ] 6670 * The attribute must have been declared; the value must be of the type 6671 * declared for it. 6672 * 6673 * [25] Eq ::= S? '=' S? 6674 * 6675 * With namespace: 6676 * 6677 * [NS 11] Attribute ::= QName Eq AttValue 6678 * 6679 * Also the case QName == xmlns:??? is handled independently as a namespace 6680 * definition. 6681 * 6682 * Returns the attribute name, and the value in *value. 6683 */ 6684 6685const xmlChar * 6686xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6687 const xmlChar *name; 6688 xmlChar *val; 6689 6690 *value = NULL; 6691 GROW; 6692 name = xmlParseName(ctxt); 6693 if (name == NULL) { 6694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6695 "error parsing attribute name\n"); 6696 return(NULL); 6697 } 6698 6699 /* 6700 * read the value 6701 */ 6702 SKIP_BLANKS; 6703 if (RAW == '=') { 6704 NEXT; 6705 SKIP_BLANKS; 6706 val = xmlParseAttValue(ctxt); 6707 ctxt->instate = XML_PARSER_CONTENT; 6708 } else { 6709 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6710 "Specification mandate value for attribute %s\n", name); 6711 return(NULL); 6712 } 6713 6714 /* 6715 * Check that xml:lang conforms to the specification 6716 * No more registered as an error, just generate a warning now 6717 * since this was deprecated in XML second edition 6718 */ 6719 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6720 if (!xmlCheckLanguageID(val)) { 6721 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6722 ctxt->sax->warning(ctxt->userData, 6723 "Malformed value for xml:lang : %s\n", val); 6724 } 6725 } 6726 6727 /* 6728 * Check that xml:space conforms to the specification 6729 */ 6730 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6731 if (xmlStrEqual(val, BAD_CAST "default")) 6732 *(ctxt->space) = 0; 6733 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6734 *(ctxt->space) = 1; 6735 else { 6736 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6737"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 6738 val); 6739 } 6740 } 6741 6742 *value = val; 6743 return(name); 6744} 6745 6746/** 6747 * xmlParseStartTag: 6748 * @ctxt: an XML parser context 6749 * 6750 * parse a start of tag either for rule element or 6751 * EmptyElement. In both case we don't parse the tag closing chars. 6752 * 6753 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6754 * 6755 * [ WFC: Unique Att Spec ] 6756 * No attribute name may appear more than once in the same start-tag or 6757 * empty-element tag. 6758 * 6759 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6760 * 6761 * [ WFC: Unique Att Spec ] 6762 * No attribute name may appear more than once in the same start-tag or 6763 * empty-element tag. 6764 * 6765 * With namespace: 6766 * 6767 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6768 * 6769 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6770 * 6771 * Returns the element name parsed 6772 */ 6773 6774const xmlChar * 6775xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6776 const xmlChar *name; 6777 const xmlChar *attname; 6778 xmlChar *attvalue; 6779 const xmlChar **atts = ctxt->atts; 6780 int nbatts = 0; 6781 int maxatts = ctxt->maxatts; 6782 int i; 6783 6784 if (RAW != '<') return(NULL); 6785 NEXT1; 6786 6787 name = xmlParseName(ctxt); 6788 if (name == NULL) { 6789 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6790 "xmlParseStartTag: invalid element name\n"); 6791 return(NULL); 6792 } 6793 6794 /* 6795 * Now parse the attributes, it ends up with the ending 6796 * 6797 * (S Attribute)* S? 6798 */ 6799 SKIP_BLANKS; 6800 GROW; 6801 6802 while ((RAW != '>') && 6803 ((RAW != '/') || (NXT(1) != '>')) && 6804 (IS_CHAR((unsigned int) RAW))) { 6805 const xmlChar *q = CUR_PTR; 6806 unsigned int cons = ctxt->input->consumed; 6807 6808 attname = xmlParseAttribute(ctxt, &attvalue); 6809 if ((attname != NULL) && (attvalue != NULL)) { 6810 /* 6811 * [ WFC: Unique Att Spec ] 6812 * No attribute name may appear more than once in the same 6813 * start-tag or empty-element tag. 6814 */ 6815 for (i = 0; i < nbatts;i += 2) { 6816 if (xmlStrEqual(atts[i], attname)) { 6817 xmlErrAttributeDup(ctxt, NULL, attname); 6818 xmlFree(attvalue); 6819 goto failed; 6820 } 6821 } 6822 /* 6823 * Add the pair to atts 6824 */ 6825 if (atts == NULL) { 6826 maxatts = 22; /* allow for 10 attrs by default */ 6827 atts = (const xmlChar **) 6828 xmlMalloc(maxatts * sizeof(xmlChar *)); 6829 if (atts == NULL) { 6830 xmlErrMemory(ctxt, NULL); 6831 if (attvalue != NULL) 6832 xmlFree(attvalue); 6833 goto failed; 6834 } 6835 ctxt->atts = atts; 6836 ctxt->maxatts = maxatts; 6837 } else if (nbatts + 4 > maxatts) { 6838 const xmlChar **n; 6839 6840 maxatts *= 2; 6841 n = (const xmlChar **) xmlRealloc((void *) atts, 6842 maxatts * sizeof(const xmlChar *)); 6843 if (n == NULL) { 6844 xmlErrMemory(ctxt, NULL); 6845 if (attvalue != NULL) 6846 xmlFree(attvalue); 6847 goto failed; 6848 } 6849 atts = n; 6850 ctxt->atts = atts; 6851 ctxt->maxatts = maxatts; 6852 } 6853 atts[nbatts++] = attname; 6854 atts[nbatts++] = attvalue; 6855 atts[nbatts] = NULL; 6856 atts[nbatts + 1] = NULL; 6857 } else { 6858 if (attvalue != NULL) 6859 xmlFree(attvalue); 6860 } 6861 6862failed: 6863 6864 GROW 6865 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6866 break; 6867 if (!IS_BLANK(RAW)) { 6868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6869 "attributes construct error\n"); 6870 } 6871 SKIP_BLANKS; 6872 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 6873 (attname == NULL) && (attvalue == NULL)) { 6874 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 6875 "xmlParseStartTag: problem parsing attributes\n"); 6876 break; 6877 } 6878 SHRINK; 6879 GROW; 6880 } 6881 6882 /* 6883 * SAX: Start of Element ! 6884 */ 6885 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6886 (!ctxt->disableSAX)) { 6887 if (nbatts > 0) 6888 ctxt->sax->startElement(ctxt->userData, name, atts); 6889 else 6890 ctxt->sax->startElement(ctxt->userData, name, NULL); 6891 } 6892 6893 if (atts != NULL) { 6894 /* Free only the content strings */ 6895 for (i = 1;i < nbatts;i+=2) 6896 if (atts[i] != NULL) 6897 xmlFree((xmlChar *) atts[i]); 6898 } 6899 return(name); 6900} 6901 6902/** 6903 * xmlParseEndTag1: 6904 * @ctxt: an XML parser context 6905 * @line: line of the start tag 6906 * @nsNr: number of namespaces on the start tag 6907 * 6908 * parse an end of tag 6909 * 6910 * [42] ETag ::= '</' Name S? '>' 6911 * 6912 * With namespace 6913 * 6914 * [NS 9] ETag ::= '</' QName S? '>' 6915 */ 6916 6917static void 6918xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 6919 const xmlChar *name; 6920 6921 GROW; 6922 if ((RAW != '<') || (NXT(1) != '/')) { 6923 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6924 "xmlParseEndTag: '</' not found\n"); 6925 return; 6926 } 6927 SKIP(2); 6928 6929 name = xmlParseNameAndCompare(ctxt,ctxt->name); 6930 6931 /* 6932 * We should definitely be at the ending "S? '>'" part 6933 */ 6934 GROW; 6935 SKIP_BLANKS; 6936 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) { 6937 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6938 } else 6939 NEXT1; 6940 6941 /* 6942 * [ WFC: Element Type Match ] 6943 * The Name in an element's end-tag must match the element type in the 6944 * start-tag. 6945 * 6946 */ 6947 if (name != (xmlChar*)1) { 6948 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6950 if (name != NULL) { 6951 ctxt->sax->error(ctxt->userData, 6952 "Opening and ending tag mismatch: %s line %d and %s\n", 6953 ctxt->name, line, name); 6954 } else { 6955 ctxt->sax->error(ctxt->userData, 6956 "Ending tag error for: %s line %d\n", ctxt->name, line); 6957 } 6958 6959 } 6960 ctxt->wellFormed = 0; 6961 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6962 } 6963 6964 /* 6965 * SAX: End of Tag 6966 */ 6967 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6968 (!ctxt->disableSAX)) 6969 ctxt->sax->endElement(ctxt->userData, ctxt->name); 6970 6971 namePop(ctxt); 6972 spacePop(ctxt); 6973 return; 6974} 6975 6976/** 6977 * xmlParseEndTag: 6978 * @ctxt: an XML parser context 6979 * 6980 * parse an end of tag 6981 * 6982 * [42] ETag ::= '</' Name S? '>' 6983 * 6984 * With namespace 6985 * 6986 * [NS 9] ETag ::= '</' QName S? '>' 6987 */ 6988 6989void 6990xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6991 xmlParseEndTag1(ctxt, 0); 6992} 6993 6994/************************************************************************ 6995 * * 6996 * SAX 2 specific operations * 6997 * * 6998 ************************************************************************/ 6999 7000static const xmlChar * 7001xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 7002 int len = 0, l; 7003 int c; 7004 int count = 0; 7005 7006 /* 7007 * Handler for more complex cases 7008 */ 7009 GROW; 7010 c = CUR_CHAR(l); 7011 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 7012 (!IS_LETTER(c) && (c != '_'))) { 7013 return(NULL); 7014 } 7015 7016 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 7017 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 7018 (c == '.') || (c == '-') || (c == '_') || 7019 (IS_COMBINING(c)) || 7020 (IS_EXTENDER(c)))) { 7021 if (count++ > 100) { 7022 count = 0; 7023 GROW; 7024 } 7025 len += l; 7026 NEXTL(l); 7027 c = CUR_CHAR(l); 7028 } 7029 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 7030} 7031 7032/* 7033 * xmlGetNamespace: 7034 * @ctxt: an XML parser context 7035 * @prefix: the prefix to lookup 7036 * 7037 * Lookup the namespace name for the @prefix (which ca be NULL) 7038 * The prefix must come from the @ctxt->dict dictionnary 7039 * 7040 * Returns the namespace name or NULL if not bound 7041 */ 7042static const xmlChar * 7043xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 7044 int i; 7045 7046 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 7047 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 7048 if (ctxt->nsTab[i] == prefix) { 7049 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 7050 return(NULL); 7051 return(ctxt->nsTab[i + 1]); 7052 } 7053 return(NULL); 7054} 7055 7056/** 7057 * xmlParseNCName: 7058 * @ctxt: an XML parser context 7059 * 7060 * parse an XML name. 7061 * 7062 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 7063 * CombiningChar | Extender 7064 * 7065 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 7066 * 7067 * Returns the Name parsed or NULL 7068 */ 7069 7070static const xmlChar * 7071xmlParseNCName(xmlParserCtxtPtr ctxt) { 7072 const xmlChar *in; 7073 const xmlChar *ret; 7074 int count = 0; 7075 7076 /* 7077 * Accelerator for simple ASCII names 7078 */ 7079 in = ctxt->input->cur; 7080 if (((*in >= 0x61) && (*in <= 0x7A)) || 7081 ((*in >= 0x41) && (*in <= 0x5A)) || 7082 (*in == '_')) { 7083 in++; 7084 while (((*in >= 0x61) && (*in <= 0x7A)) || 7085 ((*in >= 0x41) && (*in <= 0x5A)) || 7086 ((*in >= 0x30) && (*in <= 0x39)) || 7087 (*in == '_') || (*in == '-') || 7088 (*in == '.')) 7089 in++; 7090 if ((*in > 0) && (*in < 0x80)) { 7091 count = in - ctxt->input->cur; 7092 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 7093 ctxt->input->cur = in; 7094 ctxt->nbChars += count; 7095 ctxt->input->col += count; 7096 if (ret == NULL) { 7097 xmlErrMemory(ctxt, NULL); 7098 } 7099 return(ret); 7100 } 7101 } 7102 return(xmlParseNCNameComplex(ctxt)); 7103} 7104 7105/** 7106 * xmlParseQName: 7107 * @ctxt: an XML parser context 7108 * @prefix: pointer to store the prefix part 7109 * 7110 * parse an XML Namespace QName 7111 * 7112 * [6] QName ::= (Prefix ':')? LocalPart 7113 * [7] Prefix ::= NCName 7114 * [8] LocalPart ::= NCName 7115 * 7116 * Returns the Name parsed or NULL 7117 */ 7118 7119static const xmlChar * 7120xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 7121 const xmlChar *l, *p; 7122 7123 GROW; 7124 7125 l = xmlParseNCName(ctxt); 7126 if (l == NULL) { 7127 if (CUR == ':') { 7128 l = xmlParseName(ctxt); 7129 if (l != NULL) { 7130 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7131 "Failed to parse QName '%s'\n", l, NULL, NULL); 7132 *prefix = NULL; 7133 return(l); 7134 } 7135 } 7136 return(NULL); 7137 } 7138 if (CUR == ':') { 7139 NEXT; 7140 p = l; 7141 l = xmlParseNCName(ctxt); 7142 if (l == NULL) { 7143 xmlChar *tmp; 7144 7145 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7146 "Failed to parse QName '%s:'\n", p, NULL, NULL); 7147 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 7148 p = xmlDictLookup(ctxt->dict, tmp, -1); 7149 if (tmp != NULL) xmlFree(tmp); 7150 *prefix = NULL; 7151 return(p); 7152 } 7153 if (CUR == ':') { 7154 xmlChar *tmp; 7155 7156 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7157 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 7158 NEXT; 7159 tmp = (xmlChar *) xmlParseName(ctxt); 7160 if (tmp != NULL) { 7161 tmp = xmlBuildQName(tmp, l, NULL, 0); 7162 l = xmlDictLookup(ctxt->dict, tmp, -1); 7163 if (tmp != NULL) xmlFree(tmp); 7164 *prefix = p; 7165 return(l); 7166 } 7167 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 7168 l = xmlDictLookup(ctxt->dict, tmp, -1); 7169 if (tmp != NULL) xmlFree(tmp); 7170 *prefix = p; 7171 return(l); 7172 } 7173 *prefix = p; 7174 } else 7175 *prefix = NULL; 7176 return(l); 7177} 7178 7179/** 7180 * xmlParseQNameAndCompare: 7181 * @ctxt: an XML parser context 7182 * @name: the localname 7183 * @prefix: the prefix, if any. 7184 * 7185 * parse an XML name and compares for match 7186 * (specialized for endtag parsing) 7187 * 7188 * Returns NULL for an illegal name, (xmlChar*) 1 for success 7189 * and the name for mismatch 7190 */ 7191 7192static const xmlChar * 7193xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 7194 xmlChar const *prefix) { 7195 const xmlChar *cmp = name; 7196 const xmlChar *in; 7197 const xmlChar *ret; 7198 const xmlChar *prefix2; 7199 7200 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 7201 7202 GROW; 7203 in = ctxt->input->cur; 7204 7205 cmp = prefix; 7206 while (*in != 0 && *in == *cmp) { 7207 ++in; 7208 ++cmp; 7209 } 7210 if ((*cmp == 0) && (*in == ':')) { 7211 in++; 7212 cmp = name; 7213 while (*in != 0 && *in == *cmp) { 7214 ++in; 7215 ++cmp; 7216 } 7217 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) { 7218 /* success */ 7219 ctxt->input->cur = in; 7220 return((const xmlChar*) 1); 7221 } 7222 } 7223 /* 7224 * all strings coms from the dictionary, equality can be done directly 7225 */ 7226 ret = xmlParseQName (ctxt, &prefix2); 7227 if ((ret == name) && (prefix == prefix2)) 7228 return((const xmlChar*) 1); 7229 return ret; 7230} 7231 7232/** 7233 * xmlParseAttValueInternal: 7234 * @ctxt: an XML parser context 7235 * @len: attribute len result 7236 * @alloc: whether the attribute was reallocated as a new string 7237 * @normalize: if 1 then further non-CDATA normalization must be done 7238 * 7239 * parse a value for an attribute. 7240 * NOTE: if no normalization is needed, the routine will return pointers 7241 * directly from the data buffer. 7242 * 7243 * 3.3.3 Attribute-Value Normalization: 7244 * Before the value of an attribute is passed to the application or 7245 * checked for validity, the XML processor must normalize it as follows: 7246 * - a character reference is processed by appending the referenced 7247 * character to the attribute value 7248 * - an entity reference is processed by recursively processing the 7249 * replacement text of the entity 7250 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 7251 * appending #x20 to the normalized value, except that only a single 7252 * #x20 is appended for a "#xD#xA" sequence that is part of an external 7253 * parsed entity or the literal entity value of an internal parsed entity 7254 * - other characters are processed by appending them to the normalized value 7255 * If the declared value is not CDATA, then the XML processor must further 7256 * process the normalized attribute value by discarding any leading and 7257 * trailing space (#x20) characters, and by replacing sequences of space 7258 * (#x20) characters by a single space (#x20) character. 7259 * All attributes for which no declaration has been read should be treated 7260 * by a non-validating parser as if declared CDATA. 7261 * 7262 * Returns the AttValue parsed or NULL. The value has to be freed by the 7263 * caller if it was copied, this can be detected by val[*len] == 0. 7264 */ 7265 7266static xmlChar * 7267xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 7268 int normalize) 7269{ 7270 xmlChar limit = 0; 7271 const xmlChar *in = NULL, *start, *end, *last; 7272 xmlChar *ret = NULL; 7273 7274 GROW; 7275 in = (xmlChar *) CUR_PTR; 7276 if (*in != '"' && *in != '\'') { 7277 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 7278 return (NULL); 7279 } 7280 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 7281 7282 /* 7283 * try to handle in this routine the most common case where no 7284 * allocation of a new string is required and where content is 7285 * pure ASCII. 7286 */ 7287 limit = *in++; 7288 end = ctxt->input->end; 7289 start = in; 7290 if (in >= end) { 7291 const xmlChar *oldbase = ctxt->input->base; 7292 GROW; 7293 if (oldbase != ctxt->input->base) { 7294 long delta = ctxt->input->base - oldbase; 7295 start = start + delta; 7296 in = in + delta; 7297 } 7298 end = ctxt->input->end; 7299 } 7300 if (normalize) { 7301 /* 7302 * Skip any leading spaces 7303 */ 7304 while ((in < end) && (*in != limit) && 7305 ((*in == 0x20) || (*in == 0x9) || 7306 (*in == 0xA) || (*in == 0xD))) { 7307 in++; 7308 start = in; 7309 if (in >= end) { 7310 const xmlChar *oldbase = ctxt->input->base; 7311 GROW; 7312 if (oldbase != ctxt->input->base) { 7313 long delta = ctxt->input->base - oldbase; 7314 start = start + delta; 7315 in = in + delta; 7316 } 7317 end = ctxt->input->end; 7318 } 7319 } 7320 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7321 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7322 if ((*in++ == 0x20) && (*in == 0x20)) break; 7323 if (in >= end) { 7324 const xmlChar *oldbase = ctxt->input->base; 7325 GROW; 7326 if (oldbase != ctxt->input->base) { 7327 long delta = ctxt->input->base - oldbase; 7328 start = start + delta; 7329 in = in + delta; 7330 } 7331 end = ctxt->input->end; 7332 } 7333 } 7334 last = in; 7335 /* 7336 * skip the trailing blanks 7337 */ 7338 while ((last[-1] == 0x20) && (last > start)) last--; 7339 while ((in < end) && (*in != limit) && 7340 ((*in == 0x20) || (*in == 0x9) || 7341 (*in == 0xA) || (*in == 0xD))) { 7342 in++; 7343 if (in >= end) { 7344 const xmlChar *oldbase = ctxt->input->base; 7345 GROW; 7346 if (oldbase != ctxt->input->base) { 7347 long delta = ctxt->input->base - oldbase; 7348 start = start + delta; 7349 in = in + delta; 7350 last = last + delta; 7351 } 7352 end = ctxt->input->end; 7353 } 7354 } 7355 if (*in != limit) goto need_complex; 7356 } else { 7357 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7358 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7359 in++; 7360 if (in >= end) { 7361 const xmlChar *oldbase = ctxt->input->base; 7362 GROW; 7363 if (oldbase != ctxt->input->base) { 7364 long delta = ctxt->input->base - oldbase; 7365 start = start + delta; 7366 in = in + delta; 7367 } 7368 end = ctxt->input->end; 7369 } 7370 } 7371 last = in; 7372 if (*in != limit) goto need_complex; 7373 } 7374 in++; 7375 if (len != NULL) { 7376 *len = last - start; 7377 ret = (xmlChar *) start; 7378 } else { 7379 if (alloc) *alloc = 1; 7380 ret = xmlStrndup(start, last - start); 7381 } 7382 CUR_PTR = in; 7383 if (alloc) *alloc = 0; 7384 return ret; 7385need_complex: 7386 if (alloc) *alloc = 1; 7387 return xmlParseAttValueComplex(ctxt, len, normalize); 7388} 7389 7390/** 7391 * xmlParseAttribute2: 7392 * @ctxt: an XML parser context 7393 * @pref: the element prefix 7394 * @elem: the element name 7395 * @prefix: a xmlChar ** used to store the value of the attribute prefix 7396 * @value: a xmlChar ** used to store the value of the attribute 7397 * @len: an int * to save the length of the attribute 7398 * @alloc: an int * to indicate if the attribute was allocated 7399 * 7400 * parse an attribute in the new SAX2 framework. 7401 * 7402 * Returns the attribute name, and the value in *value, . 7403 */ 7404 7405static const xmlChar * 7406xmlParseAttribute2(xmlParserCtxtPtr ctxt, 7407 const xmlChar *pref, const xmlChar *elem, 7408 const xmlChar **prefix, xmlChar **value, 7409 int *len, int *alloc) { 7410 const xmlChar *name; 7411 xmlChar *val; 7412 int normalize = 0; 7413 7414 *value = NULL; 7415 GROW; 7416 name = xmlParseQName(ctxt, prefix); 7417 if (name == NULL) { 7418 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7419 "error parsing attribute name\n"); 7420 return(NULL); 7421 } 7422 7423 /* 7424 * get the type if needed 7425 */ 7426 if (ctxt->attsSpecial != NULL) { 7427 int type; 7428 7429 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 7430 pref, elem, *prefix, name); 7431 if (type != 0) normalize = 1; 7432 } 7433 7434 /* 7435 * read the value 7436 */ 7437 SKIP_BLANKS; 7438 if (RAW == '=') { 7439 NEXT; 7440 SKIP_BLANKS; 7441 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 7442 ctxt->instate = XML_PARSER_CONTENT; 7443 } else { 7444 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7445 "Specification mandate value for attribute %s\n", name); 7446 return(NULL); 7447 } 7448 7449 /* 7450 * Check that xml:lang conforms to the specification 7451 * No more registered as an error, just generate a warning now 7452 * since this was deprecated in XML second edition 7453 */ 7454 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7455 if (!xmlCheckLanguageID(val)) { 7456 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7457 ctxt->sax->warning(ctxt->userData, 7458 "Malformed value for xml:lang : %s\n", val); 7459 } 7460 } 7461 7462 /* 7463 * Check that xml:space conforms to the specification 7464 */ 7465 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7466 if (xmlStrEqual(val, BAD_CAST "default")) 7467 *(ctxt->space) = 0; 7468 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7469 *(ctxt->space) = 1; 7470 else { 7471 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7472"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7473 val); 7474 } 7475 } 7476 7477 *value = val; 7478 return(name); 7479} 7480 7481/** 7482 * xmlParseStartTag2: 7483 * @ctxt: an XML parser context 7484 * 7485 * parse a start of tag either for rule element or 7486 * EmptyElement. In both case we don't parse the tag closing chars. 7487 * This routine is called when running SAX2 parsing 7488 * 7489 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7490 * 7491 * [ WFC: Unique Att Spec ] 7492 * No attribute name may appear more than once in the same start-tag or 7493 * empty-element tag. 7494 * 7495 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7496 * 7497 * [ WFC: Unique Att Spec ] 7498 * No attribute name may appear more than once in the same start-tag or 7499 * empty-element tag. 7500 * 7501 * With namespace: 7502 * 7503 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7504 * 7505 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7506 * 7507 * Returns the element name parsed 7508 */ 7509 7510static const xmlChar * 7511xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 7512 const xmlChar **URI) { 7513 const xmlChar *localname; 7514 const xmlChar *prefix; 7515 const xmlChar *attname; 7516 const xmlChar *aprefix; 7517 const xmlChar *nsname; 7518 xmlChar *attvalue; 7519 const xmlChar **atts = ctxt->atts; 7520 int maxatts = ctxt->maxatts; 7521 int nratts, nbatts, nbdef; 7522 int i, j, nbNs, attval; 7523 const xmlChar *base; 7524 unsigned long cur; 7525 7526 if (RAW != '<') return(NULL); 7527 NEXT1; 7528 7529 /* 7530 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 7531 * point since the attribute values may be stored as pointers to 7532 * the buffer and calling SHRINK would destroy them ! 7533 * The Shrinking is only possible once the full set of attribute 7534 * callbacks have been done. 7535 */ 7536reparse: 7537 SHRINK; 7538 base = ctxt->input->base; 7539 cur = ctxt->input->cur - ctxt->input->base; 7540 nbatts = 0; 7541 nratts = 0; 7542 nbdef = 0; 7543 nbNs = 0; 7544 attval = 0; 7545 7546 localname = xmlParseQName(ctxt, &prefix); 7547 if (localname == NULL) { 7548 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7549 "StartTag: invalid element name\n"); 7550 return(NULL); 7551 } 7552 7553 /* 7554 * Now parse the attributes, it ends up with the ending 7555 * 7556 * (S Attribute)* S? 7557 */ 7558 SKIP_BLANKS; 7559 GROW; 7560 if (ctxt->input->base != base) goto base_changed; 7561 7562 while ((RAW != '>') && 7563 ((RAW != '/') || (NXT(1) != '>')) && 7564 (IS_CHAR((unsigned int) RAW))) { 7565 const xmlChar *q = CUR_PTR; 7566 unsigned int cons = ctxt->input->consumed; 7567 int len = -1, alloc = 0; 7568 7569 attname = xmlParseAttribute2(ctxt, prefix, localname, 7570 &aprefix, &attvalue, &len, &alloc); 7571 if ((attname != NULL) && (attvalue != NULL)) { 7572 if (len < 0) len = xmlStrlen(attvalue); 7573 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7574 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7575 xmlURIPtr uri; 7576 7577 if (*URL != 0) { 7578 uri = xmlParseURI((const char *) URL); 7579 if (uri == NULL) { 7580 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7581 ctxt->sax->warning(ctxt->userData, 7582 "xmlns: %s not a valid URI\n", URL); 7583 } else { 7584 if (uri->scheme == NULL) { 7585 if ((ctxt->sax != NULL) && 7586 (ctxt->sax->warning != NULL)) 7587 ctxt->sax->warning(ctxt->userData, 7588 "xmlns: URI %s is not absolute\n", URL); 7589 } 7590 xmlFreeURI(uri); 7591 } 7592 } 7593 /* 7594 * check that it's not a defined namespace 7595 */ 7596 for (j = 1;j <= nbNs;j++) 7597 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7598 break; 7599 if (j <= nbNs) 7600 xmlErrAttributeDup(ctxt, NULL, attname); 7601 else 7602 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 7603 if (alloc != 0) xmlFree(attvalue); 7604 SKIP_BLANKS; 7605 continue; 7606 } 7607 if (aprefix == ctxt->str_xmlns) { 7608 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7609 xmlURIPtr uri; 7610 7611 if (attname == ctxt->str_xml) { 7612 if (URL != ctxt->str_xml_ns) { 7613 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 7614 "xml namespace prefix mapped to wrong URI\n", 7615 NULL, NULL, NULL); 7616 } 7617 /* 7618 * Do not keep a namespace definition node 7619 */ 7620 if (alloc != 0) xmlFree(attvalue); 7621 SKIP_BLANKS; 7622 continue; 7623 } 7624 uri = xmlParseURI((const char *) URL); 7625 if (uri == NULL) { 7626 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7627 ctxt->sax->warning(ctxt->userData, 7628 "xmlns:%s: '%s' is not a valid URI\n", 7629 attname, URL); 7630 } else { 7631 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 7632 if ((ctxt->sax != NULL) && 7633 (ctxt->sax->warning != NULL)) 7634 ctxt->sax->warning(ctxt->userData, 7635 "xmlns:%s: URI %s is not absolute\n", 7636 attname, URL); 7637 } 7638 xmlFreeURI(uri); 7639 } 7640 7641 /* 7642 * check that it's not a defined namespace 7643 */ 7644 for (j = 1;j <= nbNs;j++) 7645 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7646 break; 7647 if (j <= nbNs) 7648 xmlErrAttributeDup(ctxt, aprefix, attname); 7649 else 7650 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 7651 if (alloc != 0) xmlFree(attvalue); 7652 SKIP_BLANKS; 7653 continue; 7654 } 7655 7656 /* 7657 * Add the pair to atts 7658 */ 7659 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7660 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7661 if (attvalue[len] == 0) 7662 xmlFree(attvalue); 7663 goto failed; 7664 } 7665 maxatts = ctxt->maxatts; 7666 atts = ctxt->atts; 7667 } 7668 ctxt->attallocs[nratts++] = alloc; 7669 atts[nbatts++] = attname; 7670 atts[nbatts++] = aprefix; 7671 atts[nbatts++] = NULL; /* the URI will be fetched later */ 7672 atts[nbatts++] = attvalue; 7673 attvalue += len; 7674 atts[nbatts++] = attvalue; 7675 /* 7676 * tag if some deallocation is needed 7677 */ 7678 if (alloc != 0) attval = 1; 7679 } else { 7680 if ((attvalue != NULL) && (attvalue[len] == 0)) 7681 xmlFree(attvalue); 7682 } 7683 7684failed: 7685 7686 GROW 7687 if (ctxt->input->base != base) goto base_changed; 7688 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7689 break; 7690 if (!IS_BLANK(RAW)) { 7691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7692 "attributes construct error\n"); 7693 } 7694 SKIP_BLANKS; 7695 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7696 (attname == NULL) && (attvalue == NULL)) { 7697 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7698 "xmlParseStartTag: problem parsing attributes\n"); 7699 break; 7700 } 7701 GROW; 7702 if (ctxt->input->base != base) goto base_changed; 7703 } 7704 7705 /* 7706 * The attributes checkings 7707 */ 7708 for (i = 0; i < nbatts;i += 5) { 7709 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 7710 if ((atts[i + 1] != NULL) && (nsname == NULL)) { 7711 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7712 "Namespace prefix %s for %s on %s is not defined\n", 7713 atts[i + 1], atts[i], localname); 7714 } 7715 atts[i + 2] = nsname; 7716 /* 7717 * [ WFC: Unique Att Spec ] 7718 * No attribute name may appear more than once in the same 7719 * start-tag or empty-element tag. 7720 * As extended by the Namespace in XML REC. 7721 */ 7722 for (j = 0; j < i;j += 5) { 7723 if (atts[i] == atts[j]) { 7724 if (atts[i+1] == atts[j+1]) { 7725 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 7726 break; 7727 } 7728 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 7729 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 7730 "Namespaced Attribute %s in '%s' redefined\n", 7731 atts[i], nsname, NULL); 7732 break; 7733 } 7734 } 7735 } 7736 } 7737 7738 /* 7739 * The attributes defaulting 7740 */ 7741 if (ctxt->attsDefault != NULL) { 7742 xmlDefAttrsPtr defaults; 7743 7744 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 7745 if (defaults != NULL) { 7746 for (i = 0;i < defaults->nbAttrs;i++) { 7747 attname = defaults->values[4 * i]; 7748 aprefix = defaults->values[4 * i + 1]; 7749 7750 /* 7751 * special work for namespaces defaulted defs 7752 */ 7753 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7754 /* 7755 * check that it's not a defined namespace 7756 */ 7757 for (j = 1;j <= nbNs;j++) 7758 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7759 break; 7760 if (j <= nbNs) continue; 7761 7762 nsname = xmlGetNamespace(ctxt, NULL); 7763 if (nsname != defaults->values[4 * i + 2]) { 7764 if (nsPush(ctxt, NULL, 7765 defaults->values[4 * i + 2]) > 0) 7766 nbNs++; 7767 } 7768 } else if (aprefix == ctxt->str_xmlns) { 7769 /* 7770 * check that it's not a defined namespace 7771 */ 7772 for (j = 1;j <= nbNs;j++) 7773 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7774 break; 7775 if (j <= nbNs) continue; 7776 7777 nsname = xmlGetNamespace(ctxt, attname); 7778 if (nsname != defaults->values[2]) { 7779 if (nsPush(ctxt, attname, 7780 defaults->values[4 * i + 2]) > 0) 7781 nbNs++; 7782 } 7783 } else { 7784 /* 7785 * check that it's not a defined attribute 7786 */ 7787 for (j = 0;j < nbatts;j+=5) { 7788 if ((attname == atts[j]) && (aprefix == atts[j+1])) 7789 break; 7790 } 7791 if (j < nbatts) continue; 7792 7793 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7794 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7795 goto failed; 7796 } 7797 maxatts = ctxt->maxatts; 7798 atts = ctxt->atts; 7799 } 7800 atts[nbatts++] = attname; 7801 atts[nbatts++] = aprefix; 7802 if (aprefix == NULL) 7803 atts[nbatts++] = NULL; 7804 else 7805 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 7806 atts[nbatts++] = defaults->values[4 * i + 2]; 7807 atts[nbatts++] = defaults->values[4 * i + 3]; 7808 nbdef++; 7809 } 7810 } 7811 } 7812 } 7813 7814 nsname = xmlGetNamespace(ctxt, prefix); 7815 if ((prefix != NULL) && (nsname == NULL)) { 7816 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7817 "Namespace prefix %s on %s is not defined\n", 7818 prefix, localname, NULL); 7819 } 7820 *pref = prefix; 7821 *URI = nsname; 7822 7823 /* 7824 * SAX: Start of Element ! 7825 */ 7826 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 7827 (!ctxt->disableSAX)) { 7828 if (nbNs > 0) 7829 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7830 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 7831 nbatts / 5, nbdef, atts); 7832 else 7833 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7834 nsname, 0, NULL, nbatts / 5, nbdef, atts); 7835 } 7836 7837 /* 7838 * Free up attribute allocated strings if needed 7839 */ 7840 if (attval != 0) { 7841 for (i = 3,j = 0; j < nratts;i += 5,j++) 7842 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7843 xmlFree((xmlChar *) atts[i]); 7844 } 7845 7846 return(localname); 7847 7848base_changed: 7849 /* 7850 * the attribute strings are valid iif the base didn't changed 7851 */ 7852 if (attval != 0) { 7853 for (i = 3,j = 0; j < nratts;i += 5,j++) 7854 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7855 xmlFree((xmlChar *) atts[i]); 7856 } 7857 ctxt->input->cur = ctxt->input->base + cur; 7858 if (ctxt->wellFormed == 1) { 7859 goto reparse; 7860 } 7861 return(NULL); 7862} 7863 7864/** 7865 * xmlParseEndTag2: 7866 * @ctxt: an XML parser context 7867 * @line: line of the start tag 7868 * @nsNr: number of namespaces on the start tag 7869 * 7870 * parse an end of tag 7871 * 7872 * [42] ETag ::= '</' Name S? '>' 7873 * 7874 * With namespace 7875 * 7876 * [NS 9] ETag ::= '</' QName S? '>' 7877 */ 7878 7879static void 7880xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 7881 const xmlChar *URI, int line, int nsNr) { 7882 const xmlChar *name; 7883 7884 GROW; 7885 if ((RAW != '<') || (NXT(1) != '/')) { 7886 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 7887 return; 7888 } 7889 SKIP(2); 7890 7891 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 7892 7893 /* 7894 * We should definitely be at the ending "S? '>'" part 7895 */ 7896 GROW; 7897 SKIP_BLANKS; 7898 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) { 7899 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7900 } else 7901 NEXT1; 7902 7903 /* 7904 * [ WFC: Element Type Match ] 7905 * The Name in an element's end-tag must match the element type in the 7906 * start-tag. 7907 * 7908 */ 7909 if (name != (xmlChar*)1) { 7910 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 7911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 7912 if (name != NULL) { 7913 ctxt->sax->error(ctxt->userData, 7914 "Opening and ending tag mismatch: %s line %d and %s\n", 7915 ctxt->name, line, name); 7916 } else { 7917 ctxt->sax->error(ctxt->userData, 7918 "Ending tag error for: %s line %d\n", ctxt->name, line); 7919 } 7920 7921 } 7922 ctxt->wellFormed = 0; 7923 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7924 } 7925 7926 /* 7927 * SAX: End of Tag 7928 */ 7929 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 7930 (!ctxt->disableSAX)) 7931 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 7932 7933 spacePop(ctxt); 7934 if (nsNr != 0) 7935 nsPop(ctxt, nsNr); 7936 return; 7937} 7938 7939/** 7940 * xmlParseCDSect: 7941 * @ctxt: an XML parser context 7942 * 7943 * Parse escaped pure raw content. 7944 * 7945 * [18] CDSect ::= CDStart CData CDEnd 7946 * 7947 * [19] CDStart ::= '<![CDATA[' 7948 * 7949 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 7950 * 7951 * [21] CDEnd ::= ']]>' 7952 */ 7953void 7954xmlParseCDSect(xmlParserCtxtPtr ctxt) { 7955 xmlChar *buf = NULL; 7956 int len = 0; 7957 int size = XML_PARSER_BUFFER_SIZE; 7958 int r, rl; 7959 int s, sl; 7960 int cur, l; 7961 int count = 0; 7962 7963 if ((NXT(0) == '<') && (NXT(1) == '!') && 7964 (NXT(2) == '[') && (NXT(3) == 'C') && 7965 (NXT(4) == 'D') && (NXT(5) == 'A') && 7966 (NXT(6) == 'T') && (NXT(7) == 'A') && 7967 (NXT(8) == '[')) { 7968 SKIP(9); 7969 } else 7970 return; 7971 7972 ctxt->instate = XML_PARSER_CDATA_SECTION; 7973 r = CUR_CHAR(rl); 7974 if (!IS_CHAR(r)) { 7975 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 7976 ctxt->instate = XML_PARSER_CONTENT; 7977 return; 7978 } 7979 NEXTL(rl); 7980 s = CUR_CHAR(sl); 7981 if (!IS_CHAR(s)) { 7982 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 7983 ctxt->instate = XML_PARSER_CONTENT; 7984 return; 7985 } 7986 NEXTL(sl); 7987 cur = CUR_CHAR(l); 7988 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 7989 if (buf == NULL) { 7990 xmlErrMemory(ctxt, NULL); 7991 return; 7992 } 7993 while (IS_CHAR(cur) && 7994 ((r != ']') || (s != ']') || (cur != '>'))) { 7995 if (len + 5 >= size) { 7996 size *= 2; 7997 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7998 if (buf == NULL) { 7999 xmlErrMemory(ctxt, NULL); 8000 return; 8001 } 8002 } 8003 COPY_BUF(rl,buf,len,r); 8004 r = s; 8005 rl = sl; 8006 s = cur; 8007 sl = l; 8008 count++; 8009 if (count > 50) { 8010 GROW; 8011 count = 0; 8012 } 8013 NEXTL(l); 8014 cur = CUR_CHAR(l); 8015 } 8016 buf[len] = 0; 8017 ctxt->instate = XML_PARSER_CONTENT; 8018 if (cur != '>') { 8019 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 8020 "CData section not finished\n%.50s\n", buf); 8021 xmlFree(buf); 8022 return; 8023 } 8024 NEXTL(l); 8025 8026 /* 8027 * OK the buffer is to be consumed as cdata. 8028 */ 8029 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8030 if (ctxt->sax->cdataBlock != NULL) 8031 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 8032 else if (ctxt->sax->characters != NULL) 8033 ctxt->sax->characters(ctxt->userData, buf, len); 8034 } 8035 xmlFree(buf); 8036} 8037 8038/** 8039 * xmlParseContent: 8040 * @ctxt: an XML parser context 8041 * 8042 * Parse a content: 8043 * 8044 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8045 */ 8046 8047void 8048xmlParseContent(xmlParserCtxtPtr ctxt) { 8049 GROW; 8050 while ((RAW != 0) && 8051 ((RAW != '<') || (NXT(1) != '/'))) { 8052 const xmlChar *test = CUR_PTR; 8053 unsigned int cons = ctxt->input->consumed; 8054 const xmlChar *cur = ctxt->input->cur; 8055 8056 /* 8057 * First case : a Processing Instruction. 8058 */ 8059 if ((*cur == '<') && (cur[1] == '?')) { 8060 xmlParsePI(ctxt); 8061 } 8062 8063 /* 8064 * Second case : a CDSection 8065 */ 8066 else if ((*cur == '<') && (NXT(1) == '!') && 8067 (NXT(2) == '[') && (NXT(3) == 'C') && 8068 (NXT(4) == 'D') && (NXT(5) == 'A') && 8069 (NXT(6) == 'T') && (NXT(7) == 'A') && 8070 (NXT(8) == '[')) { 8071 xmlParseCDSect(ctxt); 8072 } 8073 8074 /* 8075 * Third case : a comment 8076 */ 8077 else if ((*cur == '<') && (NXT(1) == '!') && 8078 (NXT(2) == '-') && (NXT(3) == '-')) { 8079 xmlParseComment(ctxt); 8080 ctxt->instate = XML_PARSER_CONTENT; 8081 } 8082 8083 /* 8084 * Fourth case : a sub-element. 8085 */ 8086 else if (*cur == '<') { 8087 xmlParseElement(ctxt); 8088 } 8089 8090 /* 8091 * Fifth case : a reference. If if has not been resolved, 8092 * parsing returns it's Name, create the node 8093 */ 8094 8095 else if (*cur == '&') { 8096 xmlParseReference(ctxt); 8097 } 8098 8099 /* 8100 * Last case, text. Note that References are handled directly. 8101 */ 8102 else { 8103 xmlParseCharData(ctxt, 0); 8104 } 8105 8106 GROW; 8107 /* 8108 * Pop-up of finished entities. 8109 */ 8110 while ((RAW == 0) && (ctxt->inputNr > 1)) 8111 xmlPopInput(ctxt); 8112 SHRINK; 8113 8114 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8115 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8116 "detected an error in element content\n"); 8117 ctxt->instate = XML_PARSER_EOF; 8118 break; 8119 } 8120 } 8121} 8122 8123/** 8124 * xmlParseElement: 8125 * @ctxt: an XML parser context 8126 * 8127 * parse an XML element, this is highly recursive 8128 * 8129 * [39] element ::= EmptyElemTag | STag content ETag 8130 * 8131 * [ WFC: Element Type Match ] 8132 * The Name in an element's end-tag must match the element type in the 8133 * start-tag. 8134 * 8135 */ 8136 8137void 8138xmlParseElement(xmlParserCtxtPtr ctxt) { 8139 const xmlChar *name; 8140 const xmlChar *prefix; 8141 const xmlChar *URI; 8142 xmlParserNodeInfo node_info; 8143 int line; 8144 xmlNodePtr ret; 8145 int nsNr = ctxt->nsNr; 8146 8147 /* Capture start position */ 8148 if (ctxt->record_info) { 8149 node_info.begin_pos = ctxt->input->consumed + 8150 (CUR_PTR - ctxt->input->base); 8151 node_info.begin_line = ctxt->input->line; 8152 } 8153 8154 if (ctxt->spaceNr == 0) 8155 spacePush(ctxt, -1); 8156 else 8157 spacePush(ctxt, *ctxt->space); 8158 8159 line = ctxt->input->line; 8160 if (ctxt->sax2) 8161 name = xmlParseStartTag2(ctxt, &prefix, &URI); 8162 else 8163 name = xmlParseStartTag(ctxt); 8164 if (name == NULL) { 8165 spacePop(ctxt); 8166 return; 8167 } 8168 namePush(ctxt, name); 8169 ret = ctxt->node; 8170 8171 /* 8172 * [ VC: Root Element Type ] 8173 * The Name in the document type declaration must match the element 8174 * type of the root element. 8175 */ 8176 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8177 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8178 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8179 8180 /* 8181 * Check for an Empty Element. 8182 */ 8183 if ((RAW == '/') && (NXT(1) == '>')) { 8184 SKIP(2); 8185 if (ctxt->sax2) { 8186 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8187 (!ctxt->disableSAX)) 8188 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 8189 } else { 8190 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8191 (!ctxt->disableSAX)) 8192 ctxt->sax->endElement(ctxt->userData, name); 8193 } 8194 namePop(ctxt); 8195 spacePop(ctxt); 8196 if (nsNr != ctxt->nsNr) 8197 nsPop(ctxt, ctxt->nsNr - nsNr); 8198 if ( ret != NULL && ctxt->record_info ) { 8199 node_info.end_pos = ctxt->input->consumed + 8200 (CUR_PTR - ctxt->input->base); 8201 node_info.end_line = ctxt->input->line; 8202 node_info.node = ret; 8203 xmlParserAddNodeInfo(ctxt, &node_info); 8204 } 8205 return; 8206 } 8207 if (RAW == '>') { 8208 NEXT1; 8209 } else { 8210 ctxt->errNo = XML_ERR_GT_REQUIRED; 8211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8212 ctxt->sax->error(ctxt->userData, 8213 "Couldn't find end of Start Tag %s line %d\n", 8214 name, line); 8215 ctxt->wellFormed = 0; 8216 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8217 8218 /* 8219 * end of parsing of this node. 8220 */ 8221 nodePop(ctxt); 8222 namePop(ctxt); 8223 spacePop(ctxt); 8224 if (nsNr != ctxt->nsNr) 8225 nsPop(ctxt, ctxt->nsNr - nsNr); 8226 8227 /* 8228 * Capture end position and add node 8229 */ 8230 if ( ret != NULL && ctxt->record_info ) { 8231 node_info.end_pos = ctxt->input->consumed + 8232 (CUR_PTR - ctxt->input->base); 8233 node_info.end_line = ctxt->input->line; 8234 node_info.node = ret; 8235 xmlParserAddNodeInfo(ctxt, &node_info); 8236 } 8237 return; 8238 } 8239 8240 /* 8241 * Parse the content of the element: 8242 */ 8243 xmlParseContent(ctxt); 8244 if (!IS_CHAR((unsigned int) RAW)) { 8245 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED; 8246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8247 ctxt->sax->error(ctxt->userData, 8248 "Premature end of data in tag %s line %d\n", name, line); 8249 ctxt->wellFormed = 0; 8250 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8251 8252 /* 8253 * end of parsing of this node. 8254 */ 8255 nodePop(ctxt); 8256 namePop(ctxt); 8257 spacePop(ctxt); 8258 if (nsNr != ctxt->nsNr) 8259 nsPop(ctxt, ctxt->nsNr - nsNr); 8260 return; 8261 } 8262 8263 /* 8264 * parse the end of tag: '</' should be here. 8265 */ 8266 if (ctxt->sax2) { 8267 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr); 8268 namePop(ctxt); 8269 } else 8270 xmlParseEndTag1(ctxt, line); 8271 8272 /* 8273 * Capture end position and add node 8274 */ 8275 if ( ret != NULL && ctxt->record_info ) { 8276 node_info.end_pos = ctxt->input->consumed + 8277 (CUR_PTR - ctxt->input->base); 8278 node_info.end_line = ctxt->input->line; 8279 node_info.node = ret; 8280 xmlParserAddNodeInfo(ctxt, &node_info); 8281 } 8282} 8283 8284/** 8285 * xmlParseVersionNum: 8286 * @ctxt: an XML parser context 8287 * 8288 * parse the XML version value. 8289 * 8290 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 8291 * 8292 * Returns the string giving the XML version number, or NULL 8293 */ 8294xmlChar * 8295xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 8296 xmlChar *buf = NULL; 8297 int len = 0; 8298 int size = 10; 8299 xmlChar cur; 8300 8301 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8302 if (buf == NULL) { 8303 xmlErrMemory(ctxt, NULL); 8304 return(NULL); 8305 } 8306 cur = CUR; 8307 while (((cur >= 'a') && (cur <= 'z')) || 8308 ((cur >= 'A') && (cur <= 'Z')) || 8309 ((cur >= '0') && (cur <= '9')) || 8310 (cur == '_') || (cur == '.') || 8311 (cur == ':') || (cur == '-')) { 8312 if (len + 1 >= size) { 8313 size *= 2; 8314 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8315 if (buf == NULL) { 8316 xmlErrMemory(ctxt, NULL); 8317 return(NULL); 8318 } 8319 } 8320 buf[len++] = cur; 8321 NEXT; 8322 cur=CUR; 8323 } 8324 buf[len] = 0; 8325 return(buf); 8326} 8327 8328/** 8329 * xmlParseVersionInfo: 8330 * @ctxt: an XML parser context 8331 * 8332 * parse the XML version. 8333 * 8334 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 8335 * 8336 * [25] Eq ::= S? '=' S? 8337 * 8338 * Returns the version string, e.g. "1.0" 8339 */ 8340 8341xmlChar * 8342xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 8343 xmlChar *version = NULL; 8344 const xmlChar *q; 8345 8346 if ((RAW == 'v') && (NXT(1) == 'e') && 8347 (NXT(2) == 'r') && (NXT(3) == 's') && 8348 (NXT(4) == 'i') && (NXT(5) == 'o') && 8349 (NXT(6) == 'n')) { 8350 SKIP(7); 8351 SKIP_BLANKS; 8352 if (RAW != '=') { 8353 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8354 return(NULL); 8355 } 8356 NEXT; 8357 SKIP_BLANKS; 8358 if (RAW == '"') { 8359 NEXT; 8360 q = CUR_PTR; 8361 version = xmlParseVersionNum(ctxt); 8362 if (RAW != '"') { 8363 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8364 } else 8365 NEXT; 8366 } else if (RAW == '\''){ 8367 NEXT; 8368 q = CUR_PTR; 8369 version = xmlParseVersionNum(ctxt); 8370 if (RAW != '\'') { 8371 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8372 } else 8373 NEXT; 8374 } else { 8375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8376 } 8377 } 8378 return(version); 8379} 8380 8381/** 8382 * xmlParseEncName: 8383 * @ctxt: an XML parser context 8384 * 8385 * parse the XML encoding name 8386 * 8387 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 8388 * 8389 * Returns the encoding name value or NULL 8390 */ 8391xmlChar * 8392xmlParseEncName(xmlParserCtxtPtr ctxt) { 8393 xmlChar *buf = NULL; 8394 int len = 0; 8395 int size = 10; 8396 xmlChar cur; 8397 8398 cur = CUR; 8399 if (((cur >= 'a') && (cur <= 'z')) || 8400 ((cur >= 'A') && (cur <= 'Z'))) { 8401 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8402 if (buf == NULL) { 8403 xmlErrMemory(ctxt, NULL); 8404 return(NULL); 8405 } 8406 8407 buf[len++] = cur; 8408 NEXT; 8409 cur = CUR; 8410 while (((cur >= 'a') && (cur <= 'z')) || 8411 ((cur >= 'A') && (cur <= 'Z')) || 8412 ((cur >= '0') && (cur <= '9')) || 8413 (cur == '.') || (cur == '_') || 8414 (cur == '-')) { 8415 if (len + 1 >= size) { 8416 size *= 2; 8417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8418 if (buf == NULL) { 8419 xmlErrMemory(ctxt, NULL); 8420 return(NULL); 8421 } 8422 } 8423 buf[len++] = cur; 8424 NEXT; 8425 cur = CUR; 8426 if (cur == 0) { 8427 SHRINK; 8428 GROW; 8429 cur = CUR; 8430 } 8431 } 8432 buf[len] = 0; 8433 } else { 8434 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 8435 } 8436 return(buf); 8437} 8438 8439/** 8440 * xmlParseEncodingDecl: 8441 * @ctxt: an XML parser context 8442 * 8443 * parse the XML encoding declaration 8444 * 8445 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 8446 * 8447 * this setups the conversion filters. 8448 * 8449 * Returns the encoding value or NULL 8450 */ 8451 8452const xmlChar * 8453xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 8454 xmlChar *encoding = NULL; 8455 const xmlChar *q; 8456 8457 SKIP_BLANKS; 8458 if ((RAW == 'e') && (NXT(1) == 'n') && 8459 (NXT(2) == 'c') && (NXT(3) == 'o') && 8460 (NXT(4) == 'd') && (NXT(5) == 'i') && 8461 (NXT(6) == 'n') && (NXT(7) == 'g')) { 8462 SKIP(8); 8463 SKIP_BLANKS; 8464 if (RAW != '=') { 8465 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8466 return(NULL); 8467 } 8468 NEXT; 8469 SKIP_BLANKS; 8470 if (RAW == '"') { 8471 NEXT; 8472 q = CUR_PTR; 8473 encoding = xmlParseEncName(ctxt); 8474 if (RAW != '"') { 8475 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8476 } else 8477 NEXT; 8478 } else if (RAW == '\''){ 8479 NEXT; 8480 q = CUR_PTR; 8481 encoding = xmlParseEncName(ctxt); 8482 if (RAW != '\'') { 8483 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8484 } else 8485 NEXT; 8486 } else { 8487 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8488 } 8489 /* 8490 * UTF-16 encoding stwich has already taken place at this stage, 8491 * more over the little-endian/big-endian selection is already done 8492 */ 8493 if ((encoding != NULL) && 8494 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 8495 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 8496 if (ctxt->encoding != NULL) 8497 xmlFree((xmlChar *) ctxt->encoding); 8498 ctxt->encoding = encoding; 8499 } 8500 /* 8501 * UTF-8 encoding is handled natively 8502 */ 8503 else if ((encoding != NULL) && 8504 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 8505 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 8506 if (ctxt->encoding != NULL) 8507 xmlFree((xmlChar *) ctxt->encoding); 8508 ctxt->encoding = encoding; 8509 } 8510 else if (encoding != NULL) { 8511 xmlCharEncodingHandlerPtr handler; 8512 8513 if (ctxt->input->encoding != NULL) 8514 xmlFree((xmlChar *) ctxt->input->encoding); 8515 ctxt->input->encoding = encoding; 8516 8517 handler = xmlFindCharEncodingHandler((const char *) encoding); 8518 if (handler != NULL) { 8519 xmlSwitchToEncoding(ctxt, handler); 8520 } else { 8521 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 8522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8523 ctxt->sax->error(ctxt->userData, 8524 "Unsupported encoding %s\n", encoding); 8525 return(NULL); 8526 } 8527 } 8528 } 8529 return(encoding); 8530} 8531 8532/** 8533 * xmlParseSDDecl: 8534 * @ctxt: an XML parser context 8535 * 8536 * parse the XML standalone declaration 8537 * 8538 * [32] SDDecl ::= S 'standalone' Eq 8539 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 8540 * 8541 * [ VC: Standalone Document Declaration ] 8542 * TODO The standalone document declaration must have the value "no" 8543 * if any external markup declarations contain declarations of: 8544 * - attributes with default values, if elements to which these 8545 * attributes apply appear in the document without specifications 8546 * of values for these attributes, or 8547 * - entities (other than amp, lt, gt, apos, quot), if references 8548 * to those entities appear in the document, or 8549 * - attributes with values subject to normalization, where the 8550 * attribute appears in the document with a value which will change 8551 * as a result of normalization, or 8552 * - element types with element content, if white space occurs directly 8553 * within any instance of those types. 8554 * 8555 * Returns 1 if standalone, 0 otherwise 8556 */ 8557 8558int 8559xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 8560 int standalone = -1; 8561 8562 SKIP_BLANKS; 8563 if ((RAW == 's') && (NXT(1) == 't') && 8564 (NXT(2) == 'a') && (NXT(3) == 'n') && 8565 (NXT(4) == 'd') && (NXT(5) == 'a') && 8566 (NXT(6) == 'l') && (NXT(7) == 'o') && 8567 (NXT(8) == 'n') && (NXT(9) == 'e')) { 8568 SKIP(10); 8569 SKIP_BLANKS; 8570 if (RAW != '=') { 8571 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8572 return(standalone); 8573 } 8574 NEXT; 8575 SKIP_BLANKS; 8576 if (RAW == '\''){ 8577 NEXT; 8578 if ((RAW == 'n') && (NXT(1) == 'o')) { 8579 standalone = 0; 8580 SKIP(2); 8581 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8582 (NXT(2) == 's')) { 8583 standalone = 1; 8584 SKIP(3); 8585 } else { 8586 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8587 } 8588 if (RAW != '\'') { 8589 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8590 } else 8591 NEXT; 8592 } else if (RAW == '"'){ 8593 NEXT; 8594 if ((RAW == 'n') && (NXT(1) == 'o')) { 8595 standalone = 0; 8596 SKIP(2); 8597 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8598 (NXT(2) == 's')) { 8599 standalone = 1; 8600 SKIP(3); 8601 } else { 8602 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8603 } 8604 if (RAW != '"') { 8605 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8606 } else 8607 NEXT; 8608 } else { 8609 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8610 } 8611 } 8612 return(standalone); 8613} 8614 8615/** 8616 * xmlParseXMLDecl: 8617 * @ctxt: an XML parser context 8618 * 8619 * parse an XML declaration header 8620 * 8621 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 8622 */ 8623 8624void 8625xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 8626 xmlChar *version; 8627 8628 /* 8629 * We know that '<?xml' is here. 8630 */ 8631 SKIP(5); 8632 8633 if (!IS_BLANK(RAW)) { 8634 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8635 "Blank needed after '<?xml'\n"); 8636 } 8637 SKIP_BLANKS; 8638 8639 /* 8640 * We must have the VersionInfo here. 8641 */ 8642 version = xmlParseVersionInfo(ctxt); 8643 if (version == NULL) { 8644 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 8645 } else { 8646 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 8647 /* 8648 * TODO: Blueberry should be detected here 8649 */ 8650 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 8651 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n", 8652 version); 8653 } 8654 if (ctxt->version != NULL) 8655 xmlFree((void *) ctxt->version); 8656 ctxt->version = version; 8657 } 8658 8659 /* 8660 * We may have the encoding declaration 8661 */ 8662 if (!IS_BLANK(RAW)) { 8663 if ((RAW == '?') && (NXT(1) == '>')) { 8664 SKIP(2); 8665 return; 8666 } 8667 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8668 } 8669 xmlParseEncodingDecl(ctxt); 8670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8671 /* 8672 * The XML REC instructs us to stop parsing right here 8673 */ 8674 return; 8675 } 8676 8677 /* 8678 * We may have the standalone status. 8679 */ 8680 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 8681 if ((RAW == '?') && (NXT(1) == '>')) { 8682 SKIP(2); 8683 return; 8684 } 8685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8686 } 8687 SKIP_BLANKS; 8688 ctxt->input->standalone = xmlParseSDDecl(ctxt); 8689 8690 SKIP_BLANKS; 8691 if ((RAW == '?') && (NXT(1) == '>')) { 8692 SKIP(2); 8693 } else if (RAW == '>') { 8694 /* Deprecated old WD ... */ 8695 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8696 NEXT; 8697 } else { 8698 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8699 MOVETO_ENDTAG(CUR_PTR); 8700 NEXT; 8701 } 8702} 8703 8704/** 8705 * xmlParseMisc: 8706 * @ctxt: an XML parser context 8707 * 8708 * parse an XML Misc* optional field. 8709 * 8710 * [27] Misc ::= Comment | PI | S 8711 */ 8712 8713void 8714xmlParseMisc(xmlParserCtxtPtr ctxt) { 8715 while (((RAW == '<') && (NXT(1) == '?')) || 8716 ((RAW == '<') && (NXT(1) == '!') && 8717 (NXT(2) == '-') && (NXT(3) == '-')) || 8718 IS_BLANK(CUR)) { 8719 if ((RAW == '<') && (NXT(1) == '?')) { 8720 xmlParsePI(ctxt); 8721 } else if (IS_BLANK(CUR)) { 8722 NEXT; 8723 } else 8724 xmlParseComment(ctxt); 8725 } 8726} 8727 8728/** 8729 * xmlParseDocument: 8730 * @ctxt: an XML parser context 8731 * 8732 * parse an XML document (and build a tree if using the standard SAX 8733 * interface). 8734 * 8735 * [1] document ::= prolog element Misc* 8736 * 8737 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 8738 * 8739 * Returns 0, -1 in case of error. the parser context is augmented 8740 * as a result of the parsing. 8741 */ 8742 8743int 8744xmlParseDocument(xmlParserCtxtPtr ctxt) { 8745 xmlChar start[4]; 8746 xmlCharEncoding enc; 8747 8748 xmlInitParser(); 8749 8750 GROW; 8751 8752 /* 8753 * SAX: detecting the level. 8754 */ 8755 xmlDetectSAX2(ctxt); 8756 8757 /* 8758 * SAX: beginning of the document processing. 8759 */ 8760 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8761 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8762 8763 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { 8764 /* 8765 * Get the 4 first bytes and decode the charset 8766 * if enc != XML_CHAR_ENCODING_NONE 8767 * plug some encoding conversion routines. 8768 */ 8769 start[0] = RAW; 8770 start[1] = NXT(1); 8771 start[2] = NXT(2); 8772 start[3] = NXT(3); 8773 enc = xmlDetectCharEncoding(start, 4); 8774 if (enc != XML_CHAR_ENCODING_NONE) { 8775 xmlSwitchEncoding(ctxt, enc); 8776 } 8777 } 8778 8779 8780 if (CUR == 0) { 8781 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8782 } 8783 8784 /* 8785 * Check for the XMLDecl in the Prolog. 8786 */ 8787 GROW; 8788 if ((RAW == '<') && (NXT(1) == '?') && 8789 (NXT(2) == 'x') && (NXT(3) == 'm') && 8790 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8791 8792 /* 8793 * Note that we will switch encoding on the fly. 8794 */ 8795 xmlParseXMLDecl(ctxt); 8796 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8797 /* 8798 * The XML REC instructs us to stop parsing right here 8799 */ 8800 return(-1); 8801 } 8802 ctxt->standalone = ctxt->input->standalone; 8803 SKIP_BLANKS; 8804 } else { 8805 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8806 } 8807 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8808 ctxt->sax->startDocument(ctxt->userData); 8809 8810 /* 8811 * The Misc part of the Prolog 8812 */ 8813 GROW; 8814 xmlParseMisc(ctxt); 8815 8816 /* 8817 * Then possibly doc type declaration(s) and more Misc 8818 * (doctypedecl Misc*)? 8819 */ 8820 GROW; 8821 if ((RAW == '<') && (NXT(1) == '!') && 8822 (NXT(2) == 'D') && (NXT(3) == 'O') && 8823 (NXT(4) == 'C') && (NXT(5) == 'T') && 8824 (NXT(6) == 'Y') && (NXT(7) == 'P') && 8825 (NXT(8) == 'E')) { 8826 8827 ctxt->inSubset = 1; 8828 xmlParseDocTypeDecl(ctxt); 8829 if (RAW == '[') { 8830 ctxt->instate = XML_PARSER_DTD; 8831 xmlParseInternalSubset(ctxt); 8832 } 8833 8834 /* 8835 * Create and update the external subset. 8836 */ 8837 ctxt->inSubset = 2; 8838 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 8839 (!ctxt->disableSAX)) 8840 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8841 ctxt->extSubSystem, ctxt->extSubURI); 8842 ctxt->inSubset = 0; 8843 8844 8845 ctxt->instate = XML_PARSER_PROLOG; 8846 xmlParseMisc(ctxt); 8847 } 8848 8849 /* 8850 * Time to start parsing the tree itself 8851 */ 8852 GROW; 8853 if (RAW != '<') { 8854 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 8855 "Start tag expected, '<' not found\n"); 8856 } else { 8857 ctxt->instate = XML_PARSER_CONTENT; 8858 xmlParseElement(ctxt); 8859 ctxt->instate = XML_PARSER_EPILOG; 8860 8861 8862 /* 8863 * The Misc part at the end 8864 */ 8865 xmlParseMisc(ctxt); 8866 8867 if (RAW != 0) { 8868 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 8869 } 8870 ctxt->instate = XML_PARSER_EOF; 8871 } 8872 8873 /* 8874 * SAX: end of the document processing. 8875 */ 8876 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8877 ctxt->sax->endDocument(ctxt->userData); 8878 8879 /* 8880 * Remove locally kept entity definitions if the tree was not built 8881 */ 8882 if ((ctxt->myDoc != NULL) && 8883 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 8884 xmlFreeDoc(ctxt->myDoc); 8885 ctxt->myDoc = NULL; 8886 } 8887 8888 if (! ctxt->wellFormed) { 8889 ctxt->valid = 0; 8890 return(-1); 8891 } 8892 return(0); 8893} 8894 8895/** 8896 * xmlParseExtParsedEnt: 8897 * @ctxt: an XML parser context 8898 * 8899 * parse a general parsed entity 8900 * An external general parsed entity is well-formed if it matches the 8901 * production labeled extParsedEnt. 8902 * 8903 * [78] extParsedEnt ::= TextDecl? content 8904 * 8905 * Returns 0, -1 in case of error. the parser context is augmented 8906 * as a result of the parsing. 8907 */ 8908 8909int 8910xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 8911 xmlChar start[4]; 8912 xmlCharEncoding enc; 8913 8914 xmlDefaultSAXHandlerInit(); 8915 8916 xmlDetectSAX2(ctxt); 8917 8918 GROW; 8919 8920 /* 8921 * SAX: beginning of the document processing. 8922 */ 8923 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8924 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8925 8926 /* 8927 * Get the 4 first bytes and decode the charset 8928 * if enc != XML_CHAR_ENCODING_NONE 8929 * plug some encoding conversion routines. 8930 */ 8931 start[0] = RAW; 8932 start[1] = NXT(1); 8933 start[2] = NXT(2); 8934 start[3] = NXT(3); 8935 enc = xmlDetectCharEncoding(start, 4); 8936 if (enc != XML_CHAR_ENCODING_NONE) { 8937 xmlSwitchEncoding(ctxt, enc); 8938 } 8939 8940 8941 if (CUR == 0) { 8942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8943 } 8944 8945 /* 8946 * Check for the XMLDecl in the Prolog. 8947 */ 8948 GROW; 8949 if ((RAW == '<') && (NXT(1) == '?') && 8950 (NXT(2) == 'x') && (NXT(3) == 'm') && 8951 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8952 8953 /* 8954 * Note that we will switch encoding on the fly. 8955 */ 8956 xmlParseXMLDecl(ctxt); 8957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8958 /* 8959 * The XML REC instructs us to stop parsing right here 8960 */ 8961 return(-1); 8962 } 8963 SKIP_BLANKS; 8964 } else { 8965 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8966 } 8967 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8968 ctxt->sax->startDocument(ctxt->userData); 8969 8970 /* 8971 * Doing validity checking on chunk doesn't make sense 8972 */ 8973 ctxt->instate = XML_PARSER_CONTENT; 8974 ctxt->validate = 0; 8975 ctxt->loadsubset = 0; 8976 ctxt->depth = 0; 8977 8978 xmlParseContent(ctxt); 8979 8980 if ((RAW == '<') && (NXT(1) == '/')) { 8981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 8982 } else if (RAW != 0) { 8983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 8984 } 8985 8986 /* 8987 * SAX: end of the document processing. 8988 */ 8989 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8990 ctxt->sax->endDocument(ctxt->userData); 8991 8992 if (! ctxt->wellFormed) return(-1); 8993 return(0); 8994} 8995 8996/************************************************************************ 8997 * * 8998 * Progressive parsing interfaces * 8999 * * 9000 ************************************************************************/ 9001 9002/** 9003 * xmlParseLookupSequence: 9004 * @ctxt: an XML parser context 9005 * @first: the first char to lookup 9006 * @next: the next char to lookup or zero 9007 * @third: the next char to lookup or zero 9008 * 9009 * Try to find if a sequence (first, next, third) or just (first next) or 9010 * (first) is available in the input stream. 9011 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 9012 * to avoid rescanning sequences of bytes, it DOES change the state of the 9013 * parser, do not use liberally. 9014 * 9015 * Returns the index to the current parsing point if the full sequence 9016 * is available, -1 otherwise. 9017 */ 9018static int 9019xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 9020 xmlChar next, xmlChar third) { 9021 int base, len; 9022 xmlParserInputPtr in; 9023 const xmlChar *buf; 9024 9025 in = ctxt->input; 9026 if (in == NULL) return(-1); 9027 base = in->cur - in->base; 9028 if (base < 0) return(-1); 9029 if (ctxt->checkIndex > base) 9030 base = ctxt->checkIndex; 9031 if (in->buf == NULL) { 9032 buf = in->base; 9033 len = in->length; 9034 } else { 9035 buf = in->buf->buffer->content; 9036 len = in->buf->buffer->use; 9037 } 9038 /* take into account the sequence length */ 9039 if (third) len -= 2; 9040 else if (next) len --; 9041 for (;base < len;base++) { 9042 if (buf[base] == first) { 9043 if (third != 0) { 9044 if ((buf[base + 1] != next) || 9045 (buf[base + 2] != third)) continue; 9046 } else if (next != 0) { 9047 if (buf[base + 1] != next) continue; 9048 } 9049 ctxt->checkIndex = 0; 9050#ifdef DEBUG_PUSH 9051 if (next == 0) 9052 xmlGenericError(xmlGenericErrorContext, 9053 "PP: lookup '%c' found at %d\n", 9054 first, base); 9055 else if (third == 0) 9056 xmlGenericError(xmlGenericErrorContext, 9057 "PP: lookup '%c%c' found at %d\n", 9058 first, next, base); 9059 else 9060 xmlGenericError(xmlGenericErrorContext, 9061 "PP: lookup '%c%c%c' found at %d\n", 9062 first, next, third, base); 9063#endif 9064 return(base - (in->cur - in->base)); 9065 } 9066 } 9067 ctxt->checkIndex = base; 9068#ifdef DEBUG_PUSH 9069 if (next == 0) 9070 xmlGenericError(xmlGenericErrorContext, 9071 "PP: lookup '%c' failed\n", first); 9072 else if (third == 0) 9073 xmlGenericError(xmlGenericErrorContext, 9074 "PP: lookup '%c%c' failed\n", first, next); 9075 else 9076 xmlGenericError(xmlGenericErrorContext, 9077 "PP: lookup '%c%c%c' failed\n", first, next, third); 9078#endif 9079 return(-1); 9080} 9081 9082/** 9083 * xmlParseGetLasts: 9084 * @ctxt: an XML parser context 9085 * @lastlt: pointer to store the last '<' from the input 9086 * @lastgt: pointer to store the last '>' from the input 9087 * 9088 * Lookup the last < and > in the current chunk 9089 */ 9090static void 9091xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 9092 const xmlChar **lastgt) { 9093 const xmlChar *tmp; 9094 9095 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 9096 xmlGenericError(xmlGenericErrorContext, 9097 "Internal error: xmlParseGetLasts\n"); 9098 return; 9099 } 9100 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) { 9101 tmp = ctxt->input->end; 9102 tmp--; 9103 while ((tmp >= ctxt->input->base) && (*tmp != '<') && 9104 (*tmp != '>')) tmp--; 9105 if (tmp < ctxt->input->base) { 9106 *lastlt = NULL; 9107 *lastgt = NULL; 9108 } else if (*tmp == '<') { 9109 *lastlt = tmp; 9110 tmp--; 9111 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 9112 if (tmp < ctxt->input->base) 9113 *lastgt = NULL; 9114 else 9115 *lastgt = tmp; 9116 } else { 9117 *lastgt = tmp; 9118 tmp--; 9119 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 9120 if (tmp < ctxt->input->base) 9121 *lastlt = NULL; 9122 else 9123 *lastlt = tmp; 9124 } 9125 9126 } else { 9127 *lastlt = NULL; 9128 *lastgt = NULL; 9129 } 9130} 9131/** 9132 * xmlParseTryOrFinish: 9133 * @ctxt: an XML parser context 9134 * @terminate: last chunk indicator 9135 * 9136 * Try to progress on parsing 9137 * 9138 * Returns zero if no parsing was possible 9139 */ 9140static int 9141xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 9142 int ret = 0; 9143 int avail; 9144 xmlChar cur, next; 9145 const xmlChar *lastlt, *lastgt; 9146 9147#ifdef DEBUG_PUSH 9148 switch (ctxt->instate) { 9149 case XML_PARSER_EOF: 9150 xmlGenericError(xmlGenericErrorContext, 9151 "PP: try EOF\n"); break; 9152 case XML_PARSER_START: 9153 xmlGenericError(xmlGenericErrorContext, 9154 "PP: try START\n"); break; 9155 case XML_PARSER_MISC: 9156 xmlGenericError(xmlGenericErrorContext, 9157 "PP: try MISC\n");break; 9158 case XML_PARSER_COMMENT: 9159 xmlGenericError(xmlGenericErrorContext, 9160 "PP: try COMMENT\n");break; 9161 case XML_PARSER_PROLOG: 9162 xmlGenericError(xmlGenericErrorContext, 9163 "PP: try PROLOG\n");break; 9164 case XML_PARSER_START_TAG: 9165 xmlGenericError(xmlGenericErrorContext, 9166 "PP: try START_TAG\n");break; 9167 case XML_PARSER_CONTENT: 9168 xmlGenericError(xmlGenericErrorContext, 9169 "PP: try CONTENT\n");break; 9170 case XML_PARSER_CDATA_SECTION: 9171 xmlGenericError(xmlGenericErrorContext, 9172 "PP: try CDATA_SECTION\n");break; 9173 case XML_PARSER_END_TAG: 9174 xmlGenericError(xmlGenericErrorContext, 9175 "PP: try END_TAG\n");break; 9176 case XML_PARSER_ENTITY_DECL: 9177 xmlGenericError(xmlGenericErrorContext, 9178 "PP: try ENTITY_DECL\n");break; 9179 case XML_PARSER_ENTITY_VALUE: 9180 xmlGenericError(xmlGenericErrorContext, 9181 "PP: try ENTITY_VALUE\n");break; 9182 case XML_PARSER_ATTRIBUTE_VALUE: 9183 xmlGenericError(xmlGenericErrorContext, 9184 "PP: try ATTRIBUTE_VALUE\n");break; 9185 case XML_PARSER_DTD: 9186 xmlGenericError(xmlGenericErrorContext, 9187 "PP: try DTD\n");break; 9188 case XML_PARSER_EPILOG: 9189 xmlGenericError(xmlGenericErrorContext, 9190 "PP: try EPILOG\n");break; 9191 case XML_PARSER_PI: 9192 xmlGenericError(xmlGenericErrorContext, 9193 "PP: try PI\n");break; 9194 case XML_PARSER_IGNORE: 9195 xmlGenericError(xmlGenericErrorContext, 9196 "PP: try IGNORE\n");break; 9197 } 9198#endif 9199 9200 if (ctxt->input->cur - ctxt->input->base > 4096) { 9201 xmlSHRINK(ctxt); 9202 ctxt->checkIndex = 0; 9203 } 9204 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9205 9206 while (1) { 9207 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9208 return(0); 9209 9210 9211 /* 9212 * Pop-up of finished entities. 9213 */ 9214 while ((RAW == 0) && (ctxt->inputNr > 1)) 9215 xmlPopInput(ctxt); 9216 9217 if (ctxt->input ==NULL) break; 9218 if (ctxt->input->buf == NULL) 9219 avail = ctxt->input->length - 9220 (ctxt->input->cur - ctxt->input->base); 9221 else { 9222 /* 9223 * If we are operating on converted input, try to flush 9224 * remainng chars to avoid them stalling in the non-converted 9225 * buffer. 9226 */ 9227 if ((ctxt->input->buf->raw != NULL) && 9228 (ctxt->input->buf->raw->use > 0)) { 9229 int base = ctxt->input->base - 9230 ctxt->input->buf->buffer->content; 9231 int current = ctxt->input->cur - ctxt->input->base; 9232 9233 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 9234 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9235 ctxt->input->cur = ctxt->input->base + current; 9236 ctxt->input->end = 9237 &ctxt->input->buf->buffer->content[ 9238 ctxt->input->buf->buffer->use]; 9239 } 9240 avail = ctxt->input->buf->buffer->use - 9241 (ctxt->input->cur - ctxt->input->base); 9242 } 9243 if (avail < 1) 9244 goto done; 9245 switch (ctxt->instate) { 9246 case XML_PARSER_EOF: 9247 /* 9248 * Document parsing is done ! 9249 */ 9250 goto done; 9251 case XML_PARSER_START: 9252 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 9253 xmlChar start[4]; 9254 xmlCharEncoding enc; 9255 9256 /* 9257 * Very first chars read from the document flow. 9258 */ 9259 if (avail < 4) 9260 goto done; 9261 9262 /* 9263 * Get the 4 first bytes and decode the charset 9264 * if enc != XML_CHAR_ENCODING_NONE 9265 * plug some encoding conversion routines. 9266 */ 9267 start[0] = RAW; 9268 start[1] = NXT(1); 9269 start[2] = NXT(2); 9270 start[3] = NXT(3); 9271 enc = xmlDetectCharEncoding(start, 4); 9272 if (enc != XML_CHAR_ENCODING_NONE) { 9273 xmlSwitchEncoding(ctxt, enc); 9274 } 9275 break; 9276 } 9277 9278 cur = ctxt->input->cur[0]; 9279 next = ctxt->input->cur[1]; 9280 if (cur == 0) { 9281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9282 ctxt->sax->setDocumentLocator(ctxt->userData, 9283 &xmlDefaultSAXLocator); 9284 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9285 ctxt->instate = XML_PARSER_EOF; 9286#ifdef DEBUG_PUSH 9287 xmlGenericError(xmlGenericErrorContext, 9288 "PP: entering EOF\n"); 9289#endif 9290 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9291 ctxt->sax->endDocument(ctxt->userData); 9292 goto done; 9293 } 9294 if ((cur == '<') && (next == '?')) { 9295 /* PI or XML decl */ 9296 if (avail < 5) return(ret); 9297 if ((!terminate) && 9298 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9299 return(ret); 9300 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9301 ctxt->sax->setDocumentLocator(ctxt->userData, 9302 &xmlDefaultSAXLocator); 9303 if ((ctxt->input->cur[2] == 'x') && 9304 (ctxt->input->cur[3] == 'm') && 9305 (ctxt->input->cur[4] == 'l') && 9306 (IS_BLANK(ctxt->input->cur[5]))) { 9307 ret += 5; 9308#ifdef DEBUG_PUSH 9309 xmlGenericError(xmlGenericErrorContext, 9310 "PP: Parsing XML Decl\n"); 9311#endif 9312 xmlParseXMLDecl(ctxt); 9313 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9314 /* 9315 * The XML REC instructs us to stop parsing right 9316 * here 9317 */ 9318 ctxt->instate = XML_PARSER_EOF; 9319 return(0); 9320 } 9321 ctxt->standalone = ctxt->input->standalone; 9322 if ((ctxt->encoding == NULL) && 9323 (ctxt->input->encoding != NULL)) 9324 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 9325 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9326 (!ctxt->disableSAX)) 9327 ctxt->sax->startDocument(ctxt->userData); 9328 ctxt->instate = XML_PARSER_MISC; 9329#ifdef DEBUG_PUSH 9330 xmlGenericError(xmlGenericErrorContext, 9331 "PP: entering MISC\n"); 9332#endif 9333 } else { 9334 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9335 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9336 (!ctxt->disableSAX)) 9337 ctxt->sax->startDocument(ctxt->userData); 9338 ctxt->instate = XML_PARSER_MISC; 9339#ifdef DEBUG_PUSH 9340 xmlGenericError(xmlGenericErrorContext, 9341 "PP: entering MISC\n"); 9342#endif 9343 } 9344 } else { 9345 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9346 ctxt->sax->setDocumentLocator(ctxt->userData, 9347 &xmlDefaultSAXLocator); 9348 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9349 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9350 (!ctxt->disableSAX)) 9351 ctxt->sax->startDocument(ctxt->userData); 9352 ctxt->instate = XML_PARSER_MISC; 9353#ifdef DEBUG_PUSH 9354 xmlGenericError(xmlGenericErrorContext, 9355 "PP: entering MISC\n"); 9356#endif 9357 } 9358 break; 9359 case XML_PARSER_START_TAG: { 9360 const xmlChar *name; 9361 const xmlChar *prefix; 9362 const xmlChar *URI; 9363 int nsNr = ctxt->nsNr; 9364 9365 if ((avail < 2) && (ctxt->inputNr == 1)) 9366 goto done; 9367 cur = ctxt->input->cur[0]; 9368 if (cur != '<') { 9369 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9370 ctxt->instate = XML_PARSER_EOF; 9371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9372 ctxt->sax->endDocument(ctxt->userData); 9373 goto done; 9374 } 9375 if (!terminate) { 9376 if (ctxt->progressive) { 9377 if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) 9378 goto done; 9379 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9380 goto done; 9381 } 9382 } 9383 if (ctxt->spaceNr == 0) 9384 spacePush(ctxt, -1); 9385 else 9386 spacePush(ctxt, *ctxt->space); 9387 if (ctxt->sax2) 9388 name = xmlParseStartTag2(ctxt, &prefix, &URI); 9389 else 9390 name = xmlParseStartTag(ctxt); 9391 if (name == NULL) { 9392 spacePop(ctxt); 9393 ctxt->instate = XML_PARSER_EOF; 9394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9395 ctxt->sax->endDocument(ctxt->userData); 9396 goto done; 9397 } 9398 /* 9399 * [ VC: Root Element Type ] 9400 * The Name in the document type declaration must match 9401 * the element type of the root element. 9402 */ 9403 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9404 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9405 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9406 9407 /* 9408 * Check for an Empty Element. 9409 */ 9410 if ((RAW == '/') && (NXT(1) == '>')) { 9411 SKIP(2); 9412 9413 if (ctxt->sax2) { 9414 if ((ctxt->sax != NULL) && 9415 (ctxt->sax->endElementNs != NULL) && 9416 (!ctxt->disableSAX)) 9417 ctxt->sax->endElementNs(ctxt->userData, name, 9418 prefix, URI); 9419 } else { 9420 if ((ctxt->sax != NULL) && 9421 (ctxt->sax->endElement != NULL) && 9422 (!ctxt->disableSAX)) 9423 ctxt->sax->endElement(ctxt->userData, name); 9424 } 9425 spacePop(ctxt); 9426 if (ctxt->nameNr == 0) { 9427 ctxt->instate = XML_PARSER_EPILOG; 9428 } else { 9429 ctxt->instate = XML_PARSER_CONTENT; 9430 } 9431 break; 9432 } 9433 if (RAW == '>') { 9434 NEXT; 9435 } else { 9436 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 9437 "Couldn't find end of Start Tag %s\n", 9438 name); 9439 nodePop(ctxt); 9440 spacePop(ctxt); 9441 } 9442 if (ctxt->sax2) 9443 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 9444 else 9445 namePush(ctxt, name); 9446 9447 ctxt->instate = XML_PARSER_CONTENT; 9448 break; 9449 } 9450 case XML_PARSER_CONTENT: { 9451 const xmlChar *test; 9452 unsigned int cons; 9453 if ((avail < 2) && (ctxt->inputNr == 1)) 9454 goto done; 9455 cur = ctxt->input->cur[0]; 9456 next = ctxt->input->cur[1]; 9457 9458 test = CUR_PTR; 9459 cons = ctxt->input->consumed; 9460 if ((cur == '<') && (next == '/')) { 9461 ctxt->instate = XML_PARSER_END_TAG; 9462 break; 9463 } else if ((cur == '<') && (next == '?')) { 9464 if ((!terminate) && 9465 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9466 goto done; 9467 xmlParsePI(ctxt); 9468 } else if ((cur == '<') && (next != '!')) { 9469 ctxt->instate = XML_PARSER_START_TAG; 9470 break; 9471 } else if ((cur == '<') && (next == '!') && 9472 (ctxt->input->cur[2] == '-') && 9473 (ctxt->input->cur[3] == '-')) { 9474 if ((!terminate) && 9475 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9476 goto done; 9477 xmlParseComment(ctxt); 9478 ctxt->instate = XML_PARSER_CONTENT; 9479 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 9480 (ctxt->input->cur[2] == '[') && 9481 (ctxt->input->cur[3] == 'C') && 9482 (ctxt->input->cur[4] == 'D') && 9483 (ctxt->input->cur[5] == 'A') && 9484 (ctxt->input->cur[6] == 'T') && 9485 (ctxt->input->cur[7] == 'A') && 9486 (ctxt->input->cur[8] == '[')) { 9487 SKIP(9); 9488 ctxt->instate = XML_PARSER_CDATA_SECTION; 9489 break; 9490 } else if ((cur == '<') && (next == '!') && 9491 (avail < 9)) { 9492 goto done; 9493 } else if (cur == '&') { 9494 if ((!terminate) && 9495 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 9496 goto done; 9497 xmlParseReference(ctxt); 9498 } else { 9499 /* TODO Avoid the extra copy, handle directly !!! */ 9500 /* 9501 * Goal of the following test is: 9502 * - minimize calls to the SAX 'character' callback 9503 * when they are mergeable 9504 * - handle an problem for isBlank when we only parse 9505 * a sequence of blank chars and the next one is 9506 * not available to check against '<' presence. 9507 * - tries to homogenize the differences in SAX 9508 * callbacks between the push and pull versions 9509 * of the parser. 9510 */ 9511 if ((ctxt->inputNr == 1) && 9512 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 9513 if (!terminate) { 9514 if (ctxt->progressive) { 9515 if ((lastlt == NULL) || 9516 (ctxt->input->cur > lastlt)) 9517 goto done; 9518 } else if (xmlParseLookupSequence(ctxt, 9519 '<', 0, 0) < 0) { 9520 goto done; 9521 } 9522 } 9523 } 9524 ctxt->checkIndex = 0; 9525 xmlParseCharData(ctxt, 0); 9526 } 9527 /* 9528 * Pop-up of finished entities. 9529 */ 9530 while ((RAW == 0) && (ctxt->inputNr > 1)) 9531 xmlPopInput(ctxt); 9532 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9533 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9534 "detected an error in element content\n"); 9535 ctxt->instate = XML_PARSER_EOF; 9536 break; 9537 } 9538 break; 9539 } 9540 case XML_PARSER_END_TAG: 9541 if (avail < 2) 9542 goto done; 9543 if (!terminate) { 9544 if (ctxt->progressive) { 9545 if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) 9546 goto done; 9547 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9548 goto done; 9549 } 9550 } 9551 if (ctxt->sax2) { 9552 xmlParseEndTag2(ctxt, 9553 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 9554 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 9555 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]); 9556 nameNsPop(ctxt); 9557 } else 9558 xmlParseEndTag1(ctxt, 0); 9559 if (ctxt->nameNr == 0) { 9560 ctxt->instate = XML_PARSER_EPILOG; 9561 } else { 9562 ctxt->instate = XML_PARSER_CONTENT; 9563 } 9564 break; 9565 case XML_PARSER_CDATA_SECTION: { 9566 /* 9567 * The Push mode need to have the SAX callback for 9568 * cdataBlock merge back contiguous callbacks. 9569 */ 9570 int base; 9571 9572 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 9573 if (base < 0) { 9574 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 9575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9576 if (ctxt->sax->cdataBlock != NULL) 9577 ctxt->sax->cdataBlock(ctxt->userData, 9578 ctxt->input->cur, 9579 XML_PARSER_BIG_BUFFER_SIZE); 9580 else if (ctxt->sax->characters != NULL) 9581 ctxt->sax->characters(ctxt->userData, 9582 ctxt->input->cur, 9583 XML_PARSER_BIG_BUFFER_SIZE); 9584 } 9585 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 9586 ctxt->checkIndex = 0; 9587 } 9588 goto done; 9589 } else { 9590 if ((ctxt->sax != NULL) && (base > 0) && 9591 (!ctxt->disableSAX)) { 9592 if (ctxt->sax->cdataBlock != NULL) 9593 ctxt->sax->cdataBlock(ctxt->userData, 9594 ctxt->input->cur, base); 9595 else if (ctxt->sax->characters != NULL) 9596 ctxt->sax->characters(ctxt->userData, 9597 ctxt->input->cur, base); 9598 } 9599 SKIP(base + 3); 9600 ctxt->checkIndex = 0; 9601 ctxt->instate = XML_PARSER_CONTENT; 9602#ifdef DEBUG_PUSH 9603 xmlGenericError(xmlGenericErrorContext, 9604 "PP: entering CONTENT\n"); 9605#endif 9606 } 9607 break; 9608 } 9609 case XML_PARSER_MISC: 9610 SKIP_BLANKS; 9611 if (ctxt->input->buf == NULL) 9612 avail = ctxt->input->length - 9613 (ctxt->input->cur - ctxt->input->base); 9614 else 9615 avail = ctxt->input->buf->buffer->use - 9616 (ctxt->input->cur - ctxt->input->base); 9617 if (avail < 2) 9618 goto done; 9619 cur = ctxt->input->cur[0]; 9620 next = ctxt->input->cur[1]; 9621 if ((cur == '<') && (next == '?')) { 9622 if ((!terminate) && 9623 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9624 goto done; 9625#ifdef DEBUG_PUSH 9626 xmlGenericError(xmlGenericErrorContext, 9627 "PP: Parsing PI\n"); 9628#endif 9629 xmlParsePI(ctxt); 9630 } else if ((cur == '<') && (next == '!') && 9631 (ctxt->input->cur[2] == '-') && 9632 (ctxt->input->cur[3] == '-')) { 9633 if ((!terminate) && 9634 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9635 goto done; 9636#ifdef DEBUG_PUSH 9637 xmlGenericError(xmlGenericErrorContext, 9638 "PP: Parsing Comment\n"); 9639#endif 9640 xmlParseComment(ctxt); 9641 ctxt->instate = XML_PARSER_MISC; 9642 } else if ((cur == '<') && (next == '!') && 9643 (ctxt->input->cur[2] == 'D') && 9644 (ctxt->input->cur[3] == 'O') && 9645 (ctxt->input->cur[4] == 'C') && 9646 (ctxt->input->cur[5] == 'T') && 9647 (ctxt->input->cur[6] == 'Y') && 9648 (ctxt->input->cur[7] == 'P') && 9649 (ctxt->input->cur[8] == 'E')) { 9650 if ((!terminate) && 9651 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 9652 goto done; 9653#ifdef DEBUG_PUSH 9654 xmlGenericError(xmlGenericErrorContext, 9655 "PP: Parsing internal subset\n"); 9656#endif 9657 ctxt->inSubset = 1; 9658 xmlParseDocTypeDecl(ctxt); 9659 if (RAW == '[') { 9660 ctxt->instate = XML_PARSER_DTD; 9661#ifdef DEBUG_PUSH 9662 xmlGenericError(xmlGenericErrorContext, 9663 "PP: entering DTD\n"); 9664#endif 9665 } else { 9666 /* 9667 * Create and update the external subset. 9668 */ 9669 ctxt->inSubset = 2; 9670 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9671 (ctxt->sax->externalSubset != NULL)) 9672 ctxt->sax->externalSubset(ctxt->userData, 9673 ctxt->intSubName, ctxt->extSubSystem, 9674 ctxt->extSubURI); 9675 ctxt->inSubset = 0; 9676 ctxt->instate = XML_PARSER_PROLOG; 9677#ifdef DEBUG_PUSH 9678 xmlGenericError(xmlGenericErrorContext, 9679 "PP: entering PROLOG\n"); 9680#endif 9681 } 9682 } else if ((cur == '<') && (next == '!') && 9683 (avail < 9)) { 9684 goto done; 9685 } else { 9686 ctxt->instate = XML_PARSER_START_TAG; 9687 ctxt->progressive = 1; 9688 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9689#ifdef DEBUG_PUSH 9690 xmlGenericError(xmlGenericErrorContext, 9691 "PP: entering START_TAG\n"); 9692#endif 9693 } 9694 break; 9695 case XML_PARSER_PROLOG: 9696 SKIP_BLANKS; 9697 if (ctxt->input->buf == NULL) 9698 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9699 else 9700 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9701 if (avail < 2) 9702 goto done; 9703 cur = ctxt->input->cur[0]; 9704 next = ctxt->input->cur[1]; 9705 if ((cur == '<') && (next == '?')) { 9706 if ((!terminate) && 9707 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9708 goto done; 9709#ifdef DEBUG_PUSH 9710 xmlGenericError(xmlGenericErrorContext, 9711 "PP: Parsing PI\n"); 9712#endif 9713 xmlParsePI(ctxt); 9714 } else if ((cur == '<') && (next == '!') && 9715 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9716 if ((!terminate) && 9717 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9718 goto done; 9719#ifdef DEBUG_PUSH 9720 xmlGenericError(xmlGenericErrorContext, 9721 "PP: Parsing Comment\n"); 9722#endif 9723 xmlParseComment(ctxt); 9724 ctxt->instate = XML_PARSER_PROLOG; 9725 } else if ((cur == '<') && (next == '!') && 9726 (avail < 4)) { 9727 goto done; 9728 } else { 9729 ctxt->instate = XML_PARSER_START_TAG; 9730 ctxt->progressive = 1; 9731 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9732#ifdef DEBUG_PUSH 9733 xmlGenericError(xmlGenericErrorContext, 9734 "PP: entering START_TAG\n"); 9735#endif 9736 } 9737 break; 9738 case XML_PARSER_EPILOG: 9739 SKIP_BLANKS; 9740 if (ctxt->input->buf == NULL) 9741 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9742 else 9743 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9744 if (avail < 2) 9745 goto done; 9746 cur = ctxt->input->cur[0]; 9747 next = ctxt->input->cur[1]; 9748 if ((cur == '<') && (next == '?')) { 9749 if ((!terminate) && 9750 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9751 goto done; 9752#ifdef DEBUG_PUSH 9753 xmlGenericError(xmlGenericErrorContext, 9754 "PP: Parsing PI\n"); 9755#endif 9756 xmlParsePI(ctxt); 9757 ctxt->instate = XML_PARSER_EPILOG; 9758 } else if ((cur == '<') && (next == '!') && 9759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9760 if ((!terminate) && 9761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9762 goto done; 9763#ifdef DEBUG_PUSH 9764 xmlGenericError(xmlGenericErrorContext, 9765 "PP: Parsing Comment\n"); 9766#endif 9767 xmlParseComment(ctxt); 9768 ctxt->instate = XML_PARSER_EPILOG; 9769 } else if ((cur == '<') && (next == '!') && 9770 (avail < 4)) { 9771 goto done; 9772 } else { 9773 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9774 ctxt->instate = XML_PARSER_EOF; 9775#ifdef DEBUG_PUSH 9776 xmlGenericError(xmlGenericErrorContext, 9777 "PP: entering EOF\n"); 9778#endif 9779 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9780 ctxt->sax->endDocument(ctxt->userData); 9781 goto done; 9782 } 9783 break; 9784 case XML_PARSER_DTD: { 9785 /* 9786 * Sorry but progressive parsing of the internal subset 9787 * is not expected to be supported. We first check that 9788 * the full content of the internal subset is available and 9789 * the parsing is launched only at that point. 9790 * Internal subset ends up with "']' S? '>'" in an unescaped 9791 * section and not in a ']]>' sequence which are conditional 9792 * sections (whoever argued to keep that crap in XML deserve 9793 * a place in hell !). 9794 */ 9795 int base, i; 9796 xmlChar *buf; 9797 xmlChar quote = 0; 9798 9799 base = ctxt->input->cur - ctxt->input->base; 9800 if (base < 0) return(0); 9801 if (ctxt->checkIndex > base) 9802 base = ctxt->checkIndex; 9803 buf = ctxt->input->buf->buffer->content; 9804 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 9805 base++) { 9806 if (quote != 0) { 9807 if (buf[base] == quote) 9808 quote = 0; 9809 continue; 9810 } 9811 if (buf[base] == '"') { 9812 quote = '"'; 9813 continue; 9814 } 9815 if (buf[base] == '\'') { 9816 quote = '\''; 9817 continue; 9818 } 9819 if (buf[base] == ']') { 9820 if ((unsigned int) base +1 >= 9821 ctxt->input->buf->buffer->use) 9822 break; 9823 if (buf[base + 1] == ']') { 9824 /* conditional crap, skip both ']' ! */ 9825 base++; 9826 continue; 9827 } 9828 for (i = 0; 9829 (unsigned int) base + i < ctxt->input->buf->buffer->use; 9830 i++) { 9831 if (buf[base + i] == '>') 9832 goto found_end_int_subset; 9833 } 9834 break; 9835 } 9836 } 9837 /* 9838 * We didn't found the end of the Internal subset 9839 */ 9840 if (quote == 0) 9841 ctxt->checkIndex = base; 9842#ifdef DEBUG_PUSH 9843 if (next == 0) 9844 xmlGenericError(xmlGenericErrorContext, 9845 "PP: lookup of int subset end filed\n"); 9846#endif 9847 goto done; 9848 9849found_end_int_subset: 9850 xmlParseInternalSubset(ctxt); 9851 ctxt->inSubset = 2; 9852 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9853 (ctxt->sax->externalSubset != NULL)) 9854 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9855 ctxt->extSubSystem, ctxt->extSubURI); 9856 ctxt->inSubset = 0; 9857 ctxt->instate = XML_PARSER_PROLOG; 9858 ctxt->checkIndex = 0; 9859#ifdef DEBUG_PUSH 9860 xmlGenericError(xmlGenericErrorContext, 9861 "PP: entering PROLOG\n"); 9862#endif 9863 break; 9864 } 9865 case XML_PARSER_COMMENT: 9866 xmlGenericError(xmlGenericErrorContext, 9867 "PP: internal error, state == COMMENT\n"); 9868 ctxt->instate = XML_PARSER_CONTENT; 9869#ifdef DEBUG_PUSH 9870 xmlGenericError(xmlGenericErrorContext, 9871 "PP: entering CONTENT\n"); 9872#endif 9873 break; 9874 case XML_PARSER_IGNORE: 9875 xmlGenericError(xmlGenericErrorContext, 9876 "PP: internal error, state == IGNORE"); 9877 ctxt->instate = XML_PARSER_DTD; 9878#ifdef DEBUG_PUSH 9879 xmlGenericError(xmlGenericErrorContext, 9880 "PP: entering DTD\n"); 9881#endif 9882 break; 9883 case XML_PARSER_PI: 9884 xmlGenericError(xmlGenericErrorContext, 9885 "PP: internal error, state == PI\n"); 9886 ctxt->instate = XML_PARSER_CONTENT; 9887#ifdef DEBUG_PUSH 9888 xmlGenericError(xmlGenericErrorContext, 9889 "PP: entering CONTENT\n"); 9890#endif 9891 break; 9892 case XML_PARSER_ENTITY_DECL: 9893 xmlGenericError(xmlGenericErrorContext, 9894 "PP: internal error, state == ENTITY_DECL\n"); 9895 ctxt->instate = XML_PARSER_DTD; 9896#ifdef DEBUG_PUSH 9897 xmlGenericError(xmlGenericErrorContext, 9898 "PP: entering DTD\n"); 9899#endif 9900 break; 9901 case XML_PARSER_ENTITY_VALUE: 9902 xmlGenericError(xmlGenericErrorContext, 9903 "PP: internal error, state == ENTITY_VALUE\n"); 9904 ctxt->instate = XML_PARSER_CONTENT; 9905#ifdef DEBUG_PUSH 9906 xmlGenericError(xmlGenericErrorContext, 9907 "PP: entering DTD\n"); 9908#endif 9909 break; 9910 case XML_PARSER_ATTRIBUTE_VALUE: 9911 xmlGenericError(xmlGenericErrorContext, 9912 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 9913 ctxt->instate = XML_PARSER_START_TAG; 9914#ifdef DEBUG_PUSH 9915 xmlGenericError(xmlGenericErrorContext, 9916 "PP: entering START_TAG\n"); 9917#endif 9918 break; 9919 case XML_PARSER_SYSTEM_LITERAL: 9920 xmlGenericError(xmlGenericErrorContext, 9921 "PP: internal error, state == SYSTEM_LITERAL\n"); 9922 ctxt->instate = XML_PARSER_START_TAG; 9923#ifdef DEBUG_PUSH 9924 xmlGenericError(xmlGenericErrorContext, 9925 "PP: entering START_TAG\n"); 9926#endif 9927 break; 9928 case XML_PARSER_PUBLIC_LITERAL: 9929 xmlGenericError(xmlGenericErrorContext, 9930 "PP: internal error, state == PUBLIC_LITERAL\n"); 9931 ctxt->instate = XML_PARSER_START_TAG; 9932#ifdef DEBUG_PUSH 9933 xmlGenericError(xmlGenericErrorContext, 9934 "PP: entering START_TAG\n"); 9935#endif 9936 break; 9937 } 9938 } 9939done: 9940#ifdef DEBUG_PUSH 9941 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 9942#endif 9943 return(ret); 9944} 9945 9946/** 9947 * xmlParseChunk: 9948 * @ctxt: an XML parser context 9949 * @chunk: an char array 9950 * @size: the size in byte of the chunk 9951 * @terminate: last chunk indicator 9952 * 9953 * Parse a Chunk of memory 9954 * 9955 * Returns zero if no error, the xmlParserErrors otherwise. 9956 */ 9957int 9958xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 9959 int terminate) { 9960 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9961 return(ctxt->errNo); 9962 if (ctxt->instate == XML_PARSER_START) 9963 xmlDetectSAX2(ctxt); 9964 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 9965 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 9966 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 9967 int cur = ctxt->input->cur - ctxt->input->base; 9968 9969 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9970 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9971 ctxt->input->cur = ctxt->input->base + cur; 9972 ctxt->input->end = 9973 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9974#ifdef DEBUG_PUSH 9975 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9976#endif 9977 9978 } else if (ctxt->instate != XML_PARSER_EOF) { 9979 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 9980 xmlParserInputBufferPtr in = ctxt->input->buf; 9981 if ((in->encoder != NULL) && (in->buffer != NULL) && 9982 (in->raw != NULL)) { 9983 int nbchars; 9984 9985 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 9986 if (nbchars < 0) { 9987 xmlGenericError(xmlGenericErrorContext, 9988 "xmlParseChunk: encoder error\n"); 9989 return(XML_ERR_INVALID_ENCODING); 9990 } 9991 } 9992 } 9993 } 9994 xmlParseTryOrFinish(ctxt, terminate); 9995 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9996 return(ctxt->errNo); 9997 if (terminate) { 9998 /* 9999 * Check for termination 10000 */ 10001 int avail = 0; 10002 if (ctxt->input->buf == NULL) 10003 avail = ctxt->input->length - 10004 (ctxt->input->cur - ctxt->input->base); 10005 else 10006 avail = ctxt->input->buf->buffer->use - 10007 (ctxt->input->cur - ctxt->input->base); 10008 10009 if ((ctxt->instate != XML_PARSER_EOF) && 10010 (ctxt->instate != XML_PARSER_EPILOG)) { 10011 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10012 } 10013 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 10014 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10015 } 10016 if (ctxt->instate != XML_PARSER_EOF) { 10017 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10018 ctxt->sax->endDocument(ctxt->userData); 10019 } 10020 ctxt->instate = XML_PARSER_EOF; 10021 } 10022 return((xmlParserErrors) ctxt->errNo); 10023} 10024 10025/************************************************************************ 10026 * * 10027 * I/O front end functions to the parser * 10028 * * 10029 ************************************************************************/ 10030 10031/** 10032 * xmlStopParser: 10033 * @ctxt: an XML parser context 10034 * 10035 * Blocks further parser processing 10036 */ 10037void 10038xmlStopParser(xmlParserCtxtPtr ctxt) { 10039 ctxt->instate = XML_PARSER_EOF; 10040 if (ctxt->input != NULL) 10041 ctxt->input->cur = BAD_CAST""; 10042} 10043 10044/** 10045 * xmlCreatePushParserCtxt: 10046 * @sax: a SAX handler 10047 * @user_data: The user data returned on SAX callbacks 10048 * @chunk: a pointer to an array of chars 10049 * @size: number of chars in the array 10050 * @filename: an optional file name or URI 10051 * 10052 * Create a parser context for using the XML parser in push mode. 10053 * If @buffer and @size are non-NULL, the data is used to detect 10054 * the encoding. The remaining characters will be parsed so they 10055 * don't need to be fed in again through xmlParseChunk. 10056 * To allow content encoding detection, @size should be >= 4 10057 * The value of @filename is used for fetching external entities 10058 * and error/warning reports. 10059 * 10060 * Returns the new parser context or NULL 10061 */ 10062 10063xmlParserCtxtPtr 10064xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10065 const char *chunk, int size, const char *filename) { 10066 xmlParserCtxtPtr ctxt; 10067 xmlParserInputPtr inputStream; 10068 xmlParserInputBufferPtr buf; 10069 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 10070 10071 /* 10072 * plug some encoding conversion routines 10073 */ 10074 if ((chunk != NULL) && (size >= 4)) 10075 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 10076 10077 buf = xmlAllocParserInputBuffer(enc); 10078 if (buf == NULL) return(NULL); 10079 10080 ctxt = xmlNewParserCtxt(); 10081 if (ctxt == NULL) { 10082 xmlGenericError(xmlGenericErrorContext, 10083 "xml parser: out of memory\n"); 10084 xmlFreeParserInputBuffer(buf); 10085 return(NULL); 10086 } 10087 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 10088 if (ctxt->pushTab == NULL) { 10089 xmlErrMemory(ctxt, NULL); 10090 xmlFreeParserInputBuffer(buf); 10091 xmlFreeParserCtxt(ctxt); 10092 return(NULL); 10093 } 10094 if (sax != NULL) { 10095 if (ctxt->sax != &xmlDefaultSAXHandler) 10096 xmlFree(ctxt->sax); 10097 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10098 if (ctxt->sax == NULL) { 10099 xmlErrMemory(ctxt, NULL); 10100 xmlFreeParserInputBuffer(buf); 10101 xmlFreeParserCtxt(ctxt); 10102 return(NULL); 10103 } 10104 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10105 if (user_data != NULL) 10106 ctxt->userData = user_data; 10107 } 10108 if (filename == NULL) { 10109 ctxt->directory = NULL; 10110 } else { 10111 ctxt->directory = xmlParserGetDirectory(filename); 10112 } 10113 10114 inputStream = xmlNewInputStream(ctxt); 10115 if (inputStream == NULL) { 10116 xmlFreeParserCtxt(ctxt); 10117 xmlFreeParserInputBuffer(buf); 10118 return(NULL); 10119 } 10120 10121 if (filename == NULL) 10122 inputStream->filename = NULL; 10123 else 10124 inputStream->filename = (char *) 10125 xmlCanonicPath((const xmlChar *) filename); 10126 inputStream->buf = buf; 10127 inputStream->base = inputStream->buf->buffer->content; 10128 inputStream->cur = inputStream->buf->buffer->content; 10129 inputStream->end = 10130 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 10131 10132 inputPush(ctxt, inputStream); 10133 10134 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10135 (ctxt->input->buf != NULL)) { 10136 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10137 int cur = ctxt->input->cur - ctxt->input->base; 10138 10139 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10140 10141 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10142 ctxt->input->cur = ctxt->input->base + cur; 10143 ctxt->input->end = 10144 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10145#ifdef DEBUG_PUSH 10146 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10147#endif 10148 } 10149 10150 if (enc != XML_CHAR_ENCODING_NONE) { 10151 xmlSwitchEncoding(ctxt, enc); 10152 } 10153 10154 return(ctxt); 10155} 10156 10157/** 10158 * xmlCreateIOParserCtxt: 10159 * @sax: a SAX handler 10160 * @user_data: The user data returned on SAX callbacks 10161 * @ioread: an I/O read function 10162 * @ioclose: an I/O close function 10163 * @ioctx: an I/O handler 10164 * @enc: the charset encoding if known 10165 * 10166 * Create a parser context for using the XML parser with an existing 10167 * I/O stream 10168 * 10169 * Returns the new parser context or NULL 10170 */ 10171xmlParserCtxtPtr 10172xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10173 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 10174 void *ioctx, xmlCharEncoding enc) { 10175 xmlParserCtxtPtr ctxt; 10176 xmlParserInputPtr inputStream; 10177 xmlParserInputBufferPtr buf; 10178 10179 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 10180 if (buf == NULL) return(NULL); 10181 10182 ctxt = xmlNewParserCtxt(); 10183 if (ctxt == NULL) { 10184 xmlFree(buf); 10185 return(NULL); 10186 } 10187 if (sax != NULL) { 10188 if (ctxt->sax != &xmlDefaultSAXHandler) 10189 xmlFree(ctxt->sax); 10190 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10191 if (ctxt->sax == NULL) { 10192 xmlErrMemory(ctxt, NULL); 10193 xmlFree(ctxt); 10194 return(NULL); 10195 } 10196 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10197 if (user_data != NULL) 10198 ctxt->userData = user_data; 10199 } 10200 10201 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 10202 if (inputStream == NULL) { 10203 xmlFreeParserCtxt(ctxt); 10204 return(NULL); 10205 } 10206 inputPush(ctxt, inputStream); 10207 10208 return(ctxt); 10209} 10210 10211/************************************************************************ 10212 * * 10213 * Front ends when parsing a DTD * 10214 * * 10215 ************************************************************************/ 10216 10217/** 10218 * xmlIOParseDTD: 10219 * @sax: the SAX handler block or NULL 10220 * @input: an Input Buffer 10221 * @enc: the charset encoding if known 10222 * 10223 * Load and parse a DTD 10224 * 10225 * Returns the resulting xmlDtdPtr or NULL in case of error. 10226 * @input will be freed at parsing end. 10227 */ 10228 10229xmlDtdPtr 10230xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 10231 xmlCharEncoding enc) { 10232 xmlDtdPtr ret = NULL; 10233 xmlParserCtxtPtr ctxt; 10234 xmlParserInputPtr pinput = NULL; 10235 xmlChar start[4]; 10236 10237 if (input == NULL) 10238 return(NULL); 10239 10240 ctxt = xmlNewParserCtxt(); 10241 if (ctxt == NULL) { 10242 return(NULL); 10243 } 10244 10245 /* 10246 * Set-up the SAX context 10247 */ 10248 if (sax != NULL) { 10249 if (ctxt->sax != NULL) 10250 xmlFree(ctxt->sax); 10251 ctxt->sax = sax; 10252 ctxt->userData = NULL; 10253 } 10254 xmlDetectSAX2(ctxt); 10255 10256 /* 10257 * generate a parser input from the I/O handler 10258 */ 10259 10260 pinput = xmlNewIOInputStream(ctxt, input, enc); 10261 if (pinput == NULL) { 10262 if (sax != NULL) ctxt->sax = NULL; 10263 xmlFreeParserCtxt(ctxt); 10264 return(NULL); 10265 } 10266 10267 /* 10268 * plug some encoding conversion routines here. 10269 */ 10270 xmlPushInput(ctxt, pinput); 10271 10272 pinput->filename = NULL; 10273 pinput->line = 1; 10274 pinput->col = 1; 10275 pinput->base = ctxt->input->cur; 10276 pinput->cur = ctxt->input->cur; 10277 pinput->free = NULL; 10278 10279 /* 10280 * let's parse that entity knowing it's an external subset. 10281 */ 10282 ctxt->inSubset = 2; 10283 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10284 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10285 BAD_CAST "none", BAD_CAST "none"); 10286 10287 if (enc == XML_CHAR_ENCODING_NONE) { 10288 /* 10289 * Get the 4 first bytes and decode the charset 10290 * if enc != XML_CHAR_ENCODING_NONE 10291 * plug some encoding conversion routines. 10292 */ 10293 start[0] = RAW; 10294 start[1] = NXT(1); 10295 start[2] = NXT(2); 10296 start[3] = NXT(3); 10297 enc = xmlDetectCharEncoding(start, 4); 10298 if (enc != XML_CHAR_ENCODING_NONE) { 10299 xmlSwitchEncoding(ctxt, enc); 10300 } 10301 } 10302 10303 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 10304 10305 if (ctxt->myDoc != NULL) { 10306 if (ctxt->wellFormed) { 10307 ret = ctxt->myDoc->extSubset; 10308 ctxt->myDoc->extSubset = NULL; 10309 if (ret != NULL) { 10310 xmlNodePtr tmp; 10311 10312 ret->doc = NULL; 10313 tmp = ret->children; 10314 while (tmp != NULL) { 10315 tmp->doc = NULL; 10316 tmp = tmp->next; 10317 } 10318 } 10319 } else { 10320 ret = NULL; 10321 } 10322 xmlFreeDoc(ctxt->myDoc); 10323 ctxt->myDoc = NULL; 10324 } 10325 if (sax != NULL) ctxt->sax = NULL; 10326 xmlFreeParserCtxt(ctxt); 10327 10328 return(ret); 10329} 10330 10331/** 10332 * xmlSAXParseDTD: 10333 * @sax: the SAX handler block 10334 * @ExternalID: a NAME* containing the External ID of the DTD 10335 * @SystemID: a NAME* containing the URL to the DTD 10336 * 10337 * Load and parse an external subset. 10338 * 10339 * Returns the resulting xmlDtdPtr or NULL in case of error. 10340 */ 10341 10342xmlDtdPtr 10343xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 10344 const xmlChar *SystemID) { 10345 xmlDtdPtr ret = NULL; 10346 xmlParserCtxtPtr ctxt; 10347 xmlParserInputPtr input = NULL; 10348 xmlCharEncoding enc; 10349 10350 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 10351 10352 ctxt = xmlNewParserCtxt(); 10353 if (ctxt == NULL) { 10354 return(NULL); 10355 } 10356 10357 /* 10358 * Set-up the SAX context 10359 */ 10360 if (sax != NULL) { 10361 if (ctxt->sax != NULL) 10362 xmlFree(ctxt->sax); 10363 ctxt->sax = sax; 10364 ctxt->userData = ctxt; 10365 } 10366 10367 /* 10368 * Ask the Entity resolver to load the damn thing 10369 */ 10370 10371 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 10372 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID); 10373 if (input == NULL) { 10374 if (sax != NULL) ctxt->sax = NULL; 10375 xmlFreeParserCtxt(ctxt); 10376 return(NULL); 10377 } 10378 10379 /* 10380 * plug some encoding conversion routines here. 10381 */ 10382 xmlPushInput(ctxt, input); 10383 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 10384 xmlSwitchEncoding(ctxt, enc); 10385 10386 if (input->filename == NULL) 10387 input->filename = (char *) xmlCanonicPath(SystemID); 10388 input->line = 1; 10389 input->col = 1; 10390 input->base = ctxt->input->cur; 10391 input->cur = ctxt->input->cur; 10392 input->free = NULL; 10393 10394 /* 10395 * let's parse that entity knowing it's an external subset. 10396 */ 10397 ctxt->inSubset = 2; 10398 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10399 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10400 ExternalID, SystemID); 10401 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 10402 10403 if (ctxt->myDoc != NULL) { 10404 if (ctxt->wellFormed) { 10405 ret = ctxt->myDoc->extSubset; 10406 ctxt->myDoc->extSubset = NULL; 10407 if (ret != NULL) { 10408 xmlNodePtr tmp; 10409 10410 ret->doc = NULL; 10411 tmp = ret->children; 10412 while (tmp != NULL) { 10413 tmp->doc = NULL; 10414 tmp = tmp->next; 10415 } 10416 } 10417 } else { 10418 ret = NULL; 10419 } 10420 xmlFreeDoc(ctxt->myDoc); 10421 ctxt->myDoc = NULL; 10422 } 10423 if (sax != NULL) ctxt->sax = NULL; 10424 xmlFreeParserCtxt(ctxt); 10425 10426 return(ret); 10427} 10428 10429/** 10430 * xmlParseDTD: 10431 * @ExternalID: a NAME* containing the External ID of the DTD 10432 * @SystemID: a NAME* containing the URL to the DTD 10433 * 10434 * Load and parse an external subset. 10435 * 10436 * Returns the resulting xmlDtdPtr or NULL in case of error. 10437 */ 10438 10439xmlDtdPtr 10440xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 10441 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 10442} 10443 10444/************************************************************************ 10445 * * 10446 * Front ends when parsing an Entity * 10447 * * 10448 ************************************************************************/ 10449 10450/** 10451 * xmlParseCtxtExternalEntity: 10452 * @ctx: the existing parsing context 10453 * @URL: the URL for the entity to load 10454 * @ID: the System ID for the entity to load 10455 * @lst: the return value for the set of parsed nodes 10456 * 10457 * Parse an external general entity within an existing parsing context 10458 * An external general parsed entity is well-formed if it matches the 10459 * production labeled extParsedEnt. 10460 * 10461 * [78] extParsedEnt ::= TextDecl? content 10462 * 10463 * Returns 0 if the entity is well formed, -1 in case of args problem and 10464 * the parser error code otherwise 10465 */ 10466 10467int 10468xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 10469 const xmlChar *ID, xmlNodePtr *lst) { 10470 xmlParserCtxtPtr ctxt; 10471 xmlDocPtr newDoc; 10472 xmlSAXHandlerPtr oldsax = NULL; 10473 int ret = 0; 10474 xmlChar start[4]; 10475 xmlCharEncoding enc; 10476 10477 if (ctx->depth > 40) { 10478 return(XML_ERR_ENTITY_LOOP); 10479 } 10480 10481 if (lst != NULL) 10482 *lst = NULL; 10483 if ((URL == NULL) && (ID == NULL)) 10484 return(-1); 10485 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 10486 return(-1); 10487 10488 10489 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10490 if (ctxt == NULL) return(-1); 10491 ctxt->userData = ctxt; 10492 ctxt->_private = ctx->_private; 10493 oldsax = ctxt->sax; 10494 ctxt->sax = ctx->sax; 10495 xmlDetectSAX2(ctxt); 10496 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10497 if (newDoc == NULL) { 10498 xmlFreeParserCtxt(ctxt); 10499 return(-1); 10500 } 10501 if (ctx->myDoc != NULL) { 10502 newDoc->intSubset = ctx->myDoc->intSubset; 10503 newDoc->extSubset = ctx->myDoc->extSubset; 10504 } 10505 if (ctx->myDoc->URL != NULL) { 10506 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 10507 } 10508 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10509 if (newDoc->children == NULL) { 10510 ctxt->sax = oldsax; 10511 xmlFreeParserCtxt(ctxt); 10512 newDoc->intSubset = NULL; 10513 newDoc->extSubset = NULL; 10514 xmlFreeDoc(newDoc); 10515 return(-1); 10516 } 10517 nodePush(ctxt, newDoc->children); 10518 if (ctx->myDoc == NULL) { 10519 ctxt->myDoc = newDoc; 10520 } else { 10521 ctxt->myDoc = ctx->myDoc; 10522 newDoc->children->doc = ctx->myDoc; 10523 } 10524 10525 /* 10526 * Get the 4 first bytes and decode the charset 10527 * if enc != XML_CHAR_ENCODING_NONE 10528 * plug some encoding conversion routines. 10529 */ 10530 GROW 10531 start[0] = RAW; 10532 start[1] = NXT(1); 10533 start[2] = NXT(2); 10534 start[3] = NXT(3); 10535 enc = xmlDetectCharEncoding(start, 4); 10536 if (enc != XML_CHAR_ENCODING_NONE) { 10537 xmlSwitchEncoding(ctxt, enc); 10538 } 10539 10540 /* 10541 * Parse a possible text declaration first 10542 */ 10543 if ((RAW == '<') && (NXT(1) == '?') && 10544 (NXT(2) == 'x') && (NXT(3) == 'm') && 10545 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 10546 xmlParseTextDecl(ctxt); 10547 } 10548 10549 /* 10550 * Doing validity checking on chunk doesn't make sense 10551 */ 10552 ctxt->instate = XML_PARSER_CONTENT; 10553 ctxt->validate = ctx->validate; 10554 ctxt->valid = ctx->valid; 10555 ctxt->loadsubset = ctx->loadsubset; 10556 ctxt->depth = ctx->depth + 1; 10557 ctxt->replaceEntities = ctx->replaceEntities; 10558 if (ctxt->validate) { 10559 ctxt->vctxt.error = ctx->vctxt.error; 10560 ctxt->vctxt.warning = ctx->vctxt.warning; 10561 } else { 10562 ctxt->vctxt.error = NULL; 10563 ctxt->vctxt.warning = NULL; 10564 } 10565 ctxt->vctxt.nodeTab = NULL; 10566 ctxt->vctxt.nodeNr = 0; 10567 ctxt->vctxt.nodeMax = 0; 10568 ctxt->vctxt.node = NULL; 10569 10570 xmlParseContent(ctxt); 10571 10572 ctx->validate = ctxt->validate; 10573 ctx->valid = ctxt->valid; 10574 if ((RAW == '<') && (NXT(1) == '/')) { 10575 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10576 } else if (RAW != 0) { 10577 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10578 } 10579 if (ctxt->node != newDoc->children) { 10580 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10581 } 10582 10583 if (!ctxt->wellFormed) { 10584 if (ctxt->errNo == 0) 10585 ret = 1; 10586 else 10587 ret = ctxt->errNo; 10588 } else { 10589 if (lst != NULL) { 10590 xmlNodePtr cur; 10591 10592 /* 10593 * Return the newly created nodeset after unlinking it from 10594 * they pseudo parent. 10595 */ 10596 cur = newDoc->children->children; 10597 *lst = cur; 10598 while (cur != NULL) { 10599 cur->parent = NULL; 10600 cur = cur->next; 10601 } 10602 newDoc->children->children = NULL; 10603 } 10604 ret = 0; 10605 } 10606 ctxt->sax = oldsax; 10607 xmlFreeParserCtxt(ctxt); 10608 newDoc->intSubset = NULL; 10609 newDoc->extSubset = NULL; 10610 xmlFreeDoc(newDoc); 10611 10612 return(ret); 10613} 10614 10615/** 10616 * xmlParseExternalEntityPrivate: 10617 * @doc: the document the chunk pertains to 10618 * @oldctxt: the previous parser context if available 10619 * @sax: the SAX handler bloc (possibly NULL) 10620 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10621 * @depth: Used for loop detection, use 0 10622 * @URL: the URL for the entity to load 10623 * @ID: the System ID for the entity to load 10624 * @list: the return value for the set of parsed nodes 10625 * 10626 * Private version of xmlParseExternalEntity() 10627 * 10628 * Returns 0 if the entity is well formed, -1 in case of args problem and 10629 * the parser error code otherwise 10630 */ 10631 10632static int 10633xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 10634 xmlSAXHandlerPtr sax, 10635 void *user_data, int depth, const xmlChar *URL, 10636 const xmlChar *ID, xmlNodePtr *list) { 10637 xmlParserCtxtPtr ctxt; 10638 xmlDocPtr newDoc; 10639 xmlSAXHandlerPtr oldsax = NULL; 10640 int ret = 0; 10641 xmlChar start[4]; 10642 xmlCharEncoding enc; 10643 10644 if (depth > 40) { 10645 return(XML_ERR_ENTITY_LOOP); 10646 } 10647 10648 10649 10650 if (list != NULL) 10651 *list = NULL; 10652 if ((URL == NULL) && (ID == NULL)) 10653 return(-1); 10654 if (doc == NULL) /* @@ relax but check for dereferences */ 10655 return(-1); 10656 10657 10658 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10659 if (ctxt == NULL) return(-1); 10660 ctxt->userData = ctxt; 10661 if (oldctxt != NULL) { 10662 ctxt->_private = oldctxt->_private; 10663 ctxt->loadsubset = oldctxt->loadsubset; 10664 ctxt->validate = oldctxt->validate; 10665 ctxt->external = oldctxt->external; 10666 ctxt->record_info = oldctxt->record_info; 10667 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 10668 ctxt->node_seq.length = oldctxt->node_seq.length; 10669 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 10670 } else { 10671 /* 10672 * Doing validity checking on chunk without context 10673 * doesn't make sense 10674 */ 10675 ctxt->_private = NULL; 10676 ctxt->validate = 0; 10677 ctxt->external = 2; 10678 ctxt->loadsubset = 0; 10679 } 10680 if (sax != NULL) { 10681 oldsax = ctxt->sax; 10682 ctxt->sax = sax; 10683 if (user_data != NULL) 10684 ctxt->userData = user_data; 10685 } 10686 xmlDetectSAX2(ctxt); 10687 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10688 if (newDoc == NULL) { 10689 ctxt->node_seq.maximum = 0; 10690 ctxt->node_seq.length = 0; 10691 ctxt->node_seq.buffer = NULL; 10692 xmlFreeParserCtxt(ctxt); 10693 return(-1); 10694 } 10695 if (doc != NULL) { 10696 newDoc->intSubset = doc->intSubset; 10697 newDoc->extSubset = doc->extSubset; 10698 } 10699 if (doc->URL != NULL) { 10700 newDoc->URL = xmlStrdup(doc->URL); 10701 } 10702 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10703 if (newDoc->children == NULL) { 10704 if (sax != NULL) 10705 ctxt->sax = oldsax; 10706 ctxt->node_seq.maximum = 0; 10707 ctxt->node_seq.length = 0; 10708 ctxt->node_seq.buffer = NULL; 10709 xmlFreeParserCtxt(ctxt); 10710 newDoc->intSubset = NULL; 10711 newDoc->extSubset = NULL; 10712 xmlFreeDoc(newDoc); 10713 return(-1); 10714 } 10715 nodePush(ctxt, newDoc->children); 10716 if (doc == NULL) { 10717 ctxt->myDoc = newDoc; 10718 } else { 10719 ctxt->myDoc = doc; 10720 newDoc->children->doc = doc; 10721 } 10722 10723 /* 10724 * Get the 4 first bytes and decode the charset 10725 * if enc != XML_CHAR_ENCODING_NONE 10726 * plug some encoding conversion routines. 10727 */ 10728 GROW; 10729 start[0] = RAW; 10730 start[1] = NXT(1); 10731 start[2] = NXT(2); 10732 start[3] = NXT(3); 10733 enc = xmlDetectCharEncoding(start, 4); 10734 if (enc != XML_CHAR_ENCODING_NONE) { 10735 xmlSwitchEncoding(ctxt, enc); 10736 } 10737 10738 /* 10739 * Parse a possible text declaration first 10740 */ 10741 if ((RAW == '<') && (NXT(1) == '?') && 10742 (NXT(2) == 'x') && (NXT(3) == 'm') && 10743 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 10744 xmlParseTextDecl(ctxt); 10745 } 10746 10747 ctxt->instate = XML_PARSER_CONTENT; 10748 ctxt->depth = depth; 10749 10750 xmlParseContent(ctxt); 10751 10752 if ((RAW == '<') && (NXT(1) == '/')) { 10753 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10754 } else if (RAW != 0) { 10755 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10756 } 10757 if (ctxt->node != newDoc->children) { 10758 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10759 } 10760 10761 if (!ctxt->wellFormed) { 10762 if (ctxt->errNo == 0) 10763 ret = 1; 10764 else 10765 ret = ctxt->errNo; 10766 } else { 10767 if (list != NULL) { 10768 xmlNodePtr cur; 10769 10770 /* 10771 * Return the newly created nodeset after unlinking it from 10772 * they pseudo parent. 10773 */ 10774 cur = newDoc->children->children; 10775 *list = cur; 10776 while (cur != NULL) { 10777 cur->parent = NULL; 10778 cur = cur->next; 10779 } 10780 newDoc->children->children = NULL; 10781 } 10782 ret = 0; 10783 } 10784 if (sax != NULL) 10785 ctxt->sax = oldsax; 10786 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 10787 oldctxt->node_seq.length = ctxt->node_seq.length; 10788 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 10789 ctxt->node_seq.maximum = 0; 10790 ctxt->node_seq.length = 0; 10791 ctxt->node_seq.buffer = NULL; 10792 xmlFreeParserCtxt(ctxt); 10793 newDoc->intSubset = NULL; 10794 newDoc->extSubset = NULL; 10795 xmlFreeDoc(newDoc); 10796 10797 return(ret); 10798} 10799 10800/** 10801 * xmlParseExternalEntity: 10802 * @doc: the document the chunk pertains to 10803 * @sax: the SAX handler bloc (possibly NULL) 10804 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10805 * @depth: Used for loop detection, use 0 10806 * @URL: the URL for the entity to load 10807 * @ID: the System ID for the entity to load 10808 * @lst: the return value for the set of parsed nodes 10809 * 10810 * Parse an external general entity 10811 * An external general parsed entity is well-formed if it matches the 10812 * production labeled extParsedEnt. 10813 * 10814 * [78] extParsedEnt ::= TextDecl? content 10815 * 10816 * Returns 0 if the entity is well formed, -1 in case of args problem and 10817 * the parser error code otherwise 10818 */ 10819 10820int 10821xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 10822 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 10823 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 10824 ID, lst)); 10825} 10826 10827/** 10828 * xmlParseBalancedChunkMemory: 10829 * @doc: the document the chunk pertains to 10830 * @sax: the SAX handler bloc (possibly NULL) 10831 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10832 * @depth: Used for loop detection, use 0 10833 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10834 * @lst: the return value for the set of parsed nodes 10835 * 10836 * Parse a well-balanced chunk of an XML document 10837 * called by the parser 10838 * The allowed sequence for the Well Balanced Chunk is the one defined by 10839 * the content production in the XML grammar: 10840 * 10841 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10842 * 10843 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10844 * the parser error code otherwise 10845 */ 10846 10847int 10848xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 10849 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 10850 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 10851 depth, string, lst, 0 ); 10852} 10853 10854/** 10855 * xmlParseBalancedChunkMemoryInternal: 10856 * @oldctxt: the existing parsing context 10857 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10858 * @user_data: the user data field for the parser context 10859 * @lst: the return value for the set of parsed nodes 10860 * 10861 * 10862 * Parse a well-balanced chunk of an XML document 10863 * called by the parser 10864 * The allowed sequence for the Well Balanced Chunk is the one defined by 10865 * the content production in the XML grammar: 10866 * 10867 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10868 * 10869 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10870 * the parser error code otherwise 10871 * 10872 * In case recover is set to 1, the nodelist will not be empty even if 10873 * the parsed chunk is not well balanced. 10874 */ 10875static int 10876xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 10877 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 10878 xmlParserCtxtPtr ctxt; 10879 xmlDocPtr newDoc = NULL; 10880 xmlSAXHandlerPtr oldsax = NULL; 10881 xmlNodePtr content = NULL; 10882 int size; 10883 int ret = 0; 10884 10885 if (oldctxt->depth > 40) { 10886 return(XML_ERR_ENTITY_LOOP); 10887 } 10888 10889 10890 if (lst != NULL) 10891 *lst = NULL; 10892 if (string == NULL) 10893 return(-1); 10894 10895 size = xmlStrlen(string); 10896 10897 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 10898 if (ctxt == NULL) return(-1); 10899 if (user_data != NULL) 10900 ctxt->userData = user_data; 10901 else 10902 ctxt->userData = ctxt; 10903 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 10904 ctxt->dict = oldctxt->dict; 10905 10906 oldsax = ctxt->sax; 10907 ctxt->sax = oldctxt->sax; 10908 xmlDetectSAX2(ctxt); 10909 10910 ctxt->_private = oldctxt->_private; 10911 if (oldctxt->myDoc == NULL) { 10912 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10913 if (newDoc == NULL) { 10914 ctxt->sax = oldsax; 10915 ctxt->dict = NULL; 10916 xmlFreeParserCtxt(ctxt); 10917 return(-1); 10918 } 10919 ctxt->myDoc = newDoc; 10920 } else { 10921 ctxt->myDoc = oldctxt->myDoc; 10922 content = ctxt->myDoc->children; 10923 } 10924 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL, 10925 BAD_CAST "pseudoroot", NULL); 10926 if (ctxt->myDoc->children == NULL) { 10927 ctxt->sax = oldsax; 10928 ctxt->dict = NULL; 10929 xmlFreeParserCtxt(ctxt); 10930 if (newDoc != NULL) 10931 xmlFreeDoc(newDoc); 10932 return(-1); 10933 } 10934 nodePush(ctxt, ctxt->myDoc->children); 10935 ctxt->instate = XML_PARSER_CONTENT; 10936 ctxt->depth = oldctxt->depth + 1; 10937 10938 ctxt->validate = 0; 10939 ctxt->loadsubset = oldctxt->loadsubset; 10940 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 10941 /* 10942 * ID/IDREF registration will be done in xmlValidateElement below 10943 */ 10944 ctxt->loadsubset |= XML_SKIP_IDS; 10945 } 10946 ctxt->dictNames = oldctxt->dictNames; 10947 10948 xmlParseContent(ctxt); 10949 if ((RAW == '<') && (NXT(1) == '/')) { 10950 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10951 } else if (RAW != 0) { 10952 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10953 } 10954 if (ctxt->node != ctxt->myDoc->children) { 10955 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10956 } 10957 10958 if (!ctxt->wellFormed) { 10959 if (ctxt->errNo == 0) 10960 ret = 1; 10961 else 10962 ret = ctxt->errNo; 10963 } else { 10964 ret = 0; 10965 } 10966 10967 if ((lst != NULL) && (ret == 0)) { 10968 xmlNodePtr cur; 10969 10970 /* 10971 * Return the newly created nodeset after unlinking it from 10972 * they pseudo parent. 10973 */ 10974 cur = ctxt->myDoc->children->children; 10975 *lst = cur; 10976 while (cur != NULL) { 10977 if (oldctxt->validate && oldctxt->wellFormed && 10978 oldctxt->myDoc && oldctxt->myDoc->intSubset) { 10979 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 10980 oldctxt->myDoc, cur); 10981 } 10982 cur->parent = NULL; 10983 cur = cur->next; 10984 } 10985 ctxt->myDoc->children->children = NULL; 10986 } 10987 if (ctxt->myDoc != NULL) { 10988 xmlFreeNode(ctxt->myDoc->children); 10989 ctxt->myDoc->children = content; 10990 } 10991 10992 ctxt->sax = oldsax; 10993 ctxt->dict = NULL; 10994 xmlFreeParserCtxt(ctxt); 10995 if (newDoc != NULL) 10996 xmlFreeDoc(newDoc); 10997 10998 return(ret); 10999} 11000 11001/** 11002 * xmlParseBalancedChunkMemoryRecover: 11003 * @doc: the document the chunk pertains to 11004 * @sax: the SAX handler bloc (possibly NULL) 11005 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11006 * @depth: Used for loop detection, use 0 11007 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11008 * @lst: the return value for the set of parsed nodes 11009 * @recover: return nodes even if the data is broken (use 0) 11010 * 11011 * 11012 * Parse a well-balanced chunk of an XML document 11013 * called by the parser 11014 * The allowed sequence for the Well Balanced Chunk is the one defined by 11015 * the content production in the XML grammar: 11016 * 11017 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11018 * 11019 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11020 * the parser error code otherwise 11021 * 11022 * In case recover is set to 1, the nodelist will not be empty even if 11023 * the parsed chunk is not well balanced. 11024 */ 11025int 11026xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11027 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 11028 int recover) { 11029 xmlParserCtxtPtr ctxt; 11030 xmlDocPtr newDoc; 11031 xmlSAXHandlerPtr oldsax = NULL; 11032 xmlNodePtr content; 11033 int size; 11034 int ret = 0; 11035 11036 if (depth > 40) { 11037 return(XML_ERR_ENTITY_LOOP); 11038 } 11039 11040 11041 if (lst != NULL) 11042 *lst = NULL; 11043 if (string == NULL) 11044 return(-1); 11045 11046 size = xmlStrlen(string); 11047 11048 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11049 if (ctxt == NULL) return(-1); 11050 ctxt->userData = ctxt; 11051 if (sax != NULL) { 11052 oldsax = ctxt->sax; 11053 ctxt->sax = sax; 11054 if (user_data != NULL) 11055 ctxt->userData = user_data; 11056 } 11057 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11058 if (newDoc == NULL) { 11059 xmlFreeParserCtxt(ctxt); 11060 return(-1); 11061 } 11062 if (doc != NULL) { 11063 newDoc->intSubset = doc->intSubset; 11064 newDoc->extSubset = doc->extSubset; 11065 } 11066 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11067 if (newDoc->children == NULL) { 11068 if (sax != NULL) 11069 ctxt->sax = oldsax; 11070 xmlFreeParserCtxt(ctxt); 11071 newDoc->intSubset = NULL; 11072 newDoc->extSubset = NULL; 11073 xmlFreeDoc(newDoc); 11074 return(-1); 11075 } 11076 nodePush(ctxt, newDoc->children); 11077 if (doc == NULL) { 11078 ctxt->myDoc = newDoc; 11079 } else { 11080 ctxt->myDoc = newDoc; 11081 newDoc->children->doc = doc; 11082 } 11083 ctxt->instate = XML_PARSER_CONTENT; 11084 ctxt->depth = depth; 11085 11086 /* 11087 * Doing validity checking on chunk doesn't make sense 11088 */ 11089 ctxt->validate = 0; 11090 ctxt->loadsubset = 0; 11091 xmlDetectSAX2(ctxt); 11092 11093 if ( doc != NULL ){ 11094 content = doc->children; 11095 doc->children = NULL; 11096 xmlParseContent(ctxt); 11097 doc->children = content; 11098 } 11099 else { 11100 xmlParseContent(ctxt); 11101 } 11102 if ((RAW == '<') && (NXT(1) == '/')) { 11103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11104 } else if (RAW != 0) { 11105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11106 } 11107 if (ctxt->node != newDoc->children) { 11108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11109 } 11110 11111 if (!ctxt->wellFormed) { 11112 if (ctxt->errNo == 0) 11113 ret = 1; 11114 else 11115 ret = ctxt->errNo; 11116 } else { 11117 ret = 0; 11118 } 11119 11120 if (lst != NULL && (ret == 0 || recover == 1)) { 11121 xmlNodePtr cur; 11122 11123 /* 11124 * Return the newly created nodeset after unlinking it from 11125 * they pseudo parent. 11126 */ 11127 cur = newDoc->children->children; 11128 *lst = cur; 11129 while (cur != NULL) { 11130 cur->parent = NULL; 11131 cur = cur->next; 11132 } 11133 newDoc->children->children = NULL; 11134 } 11135 11136 if (sax != NULL) 11137 ctxt->sax = oldsax; 11138 xmlFreeParserCtxt(ctxt); 11139 newDoc->intSubset = NULL; 11140 newDoc->extSubset = NULL; 11141 xmlFreeDoc(newDoc); 11142 11143 return(ret); 11144} 11145 11146/** 11147 * xmlSAXParseEntity: 11148 * @sax: the SAX handler block 11149 * @filename: the filename 11150 * 11151 * parse an XML external entity out of context and build a tree. 11152 * It use the given SAX function block to handle the parsing callback. 11153 * If sax is NULL, fallback to the default DOM tree building routines. 11154 * 11155 * [78] extParsedEnt ::= TextDecl? content 11156 * 11157 * This correspond to a "Well Balanced" chunk 11158 * 11159 * Returns the resulting document tree 11160 */ 11161 11162xmlDocPtr 11163xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 11164 xmlDocPtr ret; 11165 xmlParserCtxtPtr ctxt; 11166 11167 ctxt = xmlCreateFileParserCtxt(filename); 11168 if (ctxt == NULL) { 11169 return(NULL); 11170 } 11171 if (sax != NULL) { 11172 if (ctxt->sax != NULL) 11173 xmlFree(ctxt->sax); 11174 ctxt->sax = sax; 11175 ctxt->userData = NULL; 11176 } 11177 11178 xmlParseExtParsedEnt(ctxt); 11179 11180 if (ctxt->wellFormed) 11181 ret = ctxt->myDoc; 11182 else { 11183 ret = NULL; 11184 xmlFreeDoc(ctxt->myDoc); 11185 ctxt->myDoc = NULL; 11186 } 11187 if (sax != NULL) 11188 ctxt->sax = NULL; 11189 xmlFreeParserCtxt(ctxt); 11190 11191 return(ret); 11192} 11193 11194/** 11195 * xmlParseEntity: 11196 * @filename: the filename 11197 * 11198 * parse an XML external entity out of context and build a tree. 11199 * 11200 * [78] extParsedEnt ::= TextDecl? content 11201 * 11202 * This correspond to a "Well Balanced" chunk 11203 * 11204 * Returns the resulting document tree 11205 */ 11206 11207xmlDocPtr 11208xmlParseEntity(const char *filename) { 11209 return(xmlSAXParseEntity(NULL, filename)); 11210} 11211 11212/** 11213 * xmlCreateEntityParserCtxt: 11214 * @URL: the entity URL 11215 * @ID: the entity PUBLIC ID 11216 * @base: a possible base for the target URI 11217 * 11218 * Create a parser context for an external entity 11219 * Automatic support for ZLIB/Compress compressed document is provided 11220 * by default if found at compile-time. 11221 * 11222 * Returns the new parser context or NULL 11223 */ 11224xmlParserCtxtPtr 11225xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 11226 const xmlChar *base) { 11227 xmlParserCtxtPtr ctxt; 11228 xmlParserInputPtr inputStream; 11229 char *directory = NULL; 11230 xmlChar *uri; 11231 11232 ctxt = xmlNewParserCtxt(); 11233 if (ctxt == NULL) { 11234 return(NULL); 11235 } 11236 11237 uri = xmlBuildURI(URL, base); 11238 11239 if (uri == NULL) { 11240 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 11241 if (inputStream == NULL) { 11242 xmlFreeParserCtxt(ctxt); 11243 return(NULL); 11244 } 11245 11246 inputPush(ctxt, inputStream); 11247 11248 if ((ctxt->directory == NULL) && (directory == NULL)) 11249 directory = xmlParserGetDirectory((char *)URL); 11250 if ((ctxt->directory == NULL) && (directory != NULL)) 11251 ctxt->directory = directory; 11252 } else { 11253 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 11254 if (inputStream == NULL) { 11255 xmlFree(uri); 11256 xmlFreeParserCtxt(ctxt); 11257 return(NULL); 11258 } 11259 11260 inputPush(ctxt, inputStream); 11261 11262 if ((ctxt->directory == NULL) && (directory == NULL)) 11263 directory = xmlParserGetDirectory((char *)uri); 11264 if ((ctxt->directory == NULL) && (directory != NULL)) 11265 ctxt->directory = directory; 11266 xmlFree(uri); 11267 } 11268 return(ctxt); 11269} 11270 11271/************************************************************************ 11272 * * 11273 * Front ends when parsing from a file * 11274 * * 11275 ************************************************************************/ 11276 11277/** 11278 * xmlCreateFileParserCtxt: 11279 * @filename: the filename 11280 * 11281 * Create a parser context for a file content. 11282 * Automatic support for ZLIB/Compress compressed document is provided 11283 * by default if found at compile-time. 11284 * 11285 * Returns the new parser context or NULL 11286 */ 11287xmlParserCtxtPtr 11288xmlCreateFileParserCtxt(const char *filename) 11289{ 11290 xmlParserCtxtPtr ctxt; 11291 xmlParserInputPtr inputStream; 11292 char *directory = NULL; 11293 11294 ctxt = xmlNewParserCtxt(); 11295 if (ctxt == NULL) { 11296 if (xmlDefaultSAXHandler.error != NULL) { 11297 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 11298 } 11299 return(NULL); 11300 } 11301 11302 11303 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 11304 if (inputStream == NULL) { 11305 xmlFreeParserCtxt(ctxt); 11306 return(NULL); 11307 } 11308 11309 inputPush(ctxt, inputStream); 11310 if ((ctxt->directory == NULL) && (directory == NULL)) 11311 directory = xmlParserGetDirectory(filename); 11312 if ((ctxt->directory == NULL) && (directory != NULL)) 11313 ctxt->directory = directory; 11314 11315 return(ctxt); 11316} 11317 11318/** 11319 * xmlSAXParseFileWithData: 11320 * @sax: the SAX handler block 11321 * @filename: the filename 11322 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11323 * documents 11324 * @data: the userdata 11325 * 11326 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11327 * compressed document is provided by default if found at compile-time. 11328 * It use the given SAX function block to handle the parsing callback. 11329 * If sax is NULL, fallback to the default DOM tree building routines. 11330 * 11331 * User data (void *) is stored within the parser context in the 11332 * context's _private member, so it is available nearly everywhere in libxml 11333 * 11334 * Returns the resulting document tree 11335 */ 11336 11337xmlDocPtr 11338xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 11339 int recovery, void *data) { 11340 xmlDocPtr ret; 11341 xmlParserCtxtPtr ctxt; 11342 char *directory = NULL; 11343 11344 xmlInitParser(); 11345 11346 ctxt = xmlCreateFileParserCtxt(filename); 11347 if (ctxt == NULL) { 11348 return(NULL); 11349 } 11350 if (sax != NULL) { 11351 if (ctxt->sax != NULL) 11352 xmlFree(ctxt->sax); 11353 ctxt->sax = sax; 11354 } 11355 xmlDetectSAX2(ctxt); 11356 if (data!=NULL) { 11357 ctxt->_private=data; 11358 } 11359 11360 if ((ctxt->directory == NULL) && (directory == NULL)) 11361 directory = xmlParserGetDirectory(filename); 11362 if ((ctxt->directory == NULL) && (directory != NULL)) 11363 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 11364 11365 ctxt->recovery = recovery; 11366 11367 xmlParseDocument(ctxt); 11368 11369 if ((ctxt->wellFormed) || recovery) { 11370 ret = ctxt->myDoc; 11371 if (ctxt->input->buf->compressed > 0) 11372 ret->compression = 9; 11373 else 11374 ret->compression = ctxt->input->buf->compressed; 11375 } 11376 else { 11377 ret = NULL; 11378 xmlFreeDoc(ctxt->myDoc); 11379 ctxt->myDoc = NULL; 11380 } 11381 if (sax != NULL) 11382 ctxt->sax = NULL; 11383 xmlFreeParserCtxt(ctxt); 11384 11385 return(ret); 11386} 11387 11388/** 11389 * xmlSAXParseFile: 11390 * @sax: the SAX handler block 11391 * @filename: the filename 11392 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11393 * documents 11394 * 11395 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11396 * compressed document is provided by default if found at compile-time. 11397 * It use the given SAX function block to handle the parsing callback. 11398 * If sax is NULL, fallback to the default DOM tree building routines. 11399 * 11400 * Returns the resulting document tree 11401 */ 11402 11403xmlDocPtr 11404xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 11405 int recovery) { 11406 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 11407} 11408 11409/** 11410 * xmlRecoverDoc: 11411 * @cur: a pointer to an array of xmlChar 11412 * 11413 * parse an XML in-memory document and build a tree. 11414 * In the case the document is not Well Formed, a tree is built anyway 11415 * 11416 * Returns the resulting document tree 11417 */ 11418 11419xmlDocPtr 11420xmlRecoverDoc(xmlChar *cur) { 11421 return(xmlSAXParseDoc(NULL, cur, 1)); 11422} 11423 11424/** 11425 * xmlParseFile: 11426 * @filename: the filename 11427 * 11428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11429 * compressed document is provided by default if found at compile-time. 11430 * 11431 * Returns the resulting document tree if the file was wellformed, 11432 * NULL otherwise. 11433 */ 11434 11435xmlDocPtr 11436xmlParseFile(const char *filename) { 11437 return(xmlSAXParseFile(NULL, filename, 0)); 11438} 11439 11440/** 11441 * xmlRecoverFile: 11442 * @filename: the filename 11443 * 11444 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11445 * compressed document is provided by default if found at compile-time. 11446 * In the case the document is not Well Formed, a tree is built anyway 11447 * 11448 * Returns the resulting document tree 11449 */ 11450 11451xmlDocPtr 11452xmlRecoverFile(const char *filename) { 11453 return(xmlSAXParseFile(NULL, filename, 1)); 11454} 11455 11456 11457/** 11458 * xmlSetupParserForBuffer: 11459 * @ctxt: an XML parser context 11460 * @buffer: a xmlChar * buffer 11461 * @filename: a file name 11462 * 11463 * Setup the parser context to parse a new buffer; Clears any prior 11464 * contents from the parser context. The buffer parameter must not be 11465 * NULL, but the filename parameter can be 11466 */ 11467void 11468xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 11469 const char* filename) 11470{ 11471 xmlParserInputPtr input; 11472 11473 input = xmlNewInputStream(ctxt); 11474 if (input == NULL) { 11475 xmlGenericError(xmlGenericErrorContext, 11476 "malloc"); 11477 xmlFree(ctxt); 11478 return; 11479 } 11480 11481 xmlClearParserCtxt(ctxt); 11482 if (filename != NULL) 11483 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 11484 input->base = buffer; 11485 input->cur = buffer; 11486 input->end = &buffer[xmlStrlen(buffer)]; 11487 inputPush(ctxt, input); 11488} 11489 11490/** 11491 * xmlSAXUserParseFile: 11492 * @sax: a SAX handler 11493 * @user_data: The user data returned on SAX callbacks 11494 * @filename: a file name 11495 * 11496 * parse an XML file and call the given SAX handler routines. 11497 * Automatic support for ZLIB/Compress compressed document is provided 11498 * 11499 * Returns 0 in case of success or a error number otherwise 11500 */ 11501int 11502xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 11503 const char *filename) { 11504 int ret = 0; 11505 xmlParserCtxtPtr ctxt; 11506 11507 ctxt = xmlCreateFileParserCtxt(filename); 11508 if (ctxt == NULL) return -1; 11509 if (ctxt->sax != &xmlDefaultSAXHandler) 11510 xmlFree(ctxt->sax); 11511 ctxt->sax = sax; 11512 xmlDetectSAX2(ctxt); 11513 11514 if (user_data != NULL) 11515 ctxt->userData = user_data; 11516 11517 xmlParseDocument(ctxt); 11518 11519 if (ctxt->wellFormed) 11520 ret = 0; 11521 else { 11522 if (ctxt->errNo != 0) 11523 ret = ctxt->errNo; 11524 else 11525 ret = -1; 11526 } 11527 if (sax != NULL) 11528 ctxt->sax = NULL; 11529 xmlFreeParserCtxt(ctxt); 11530 11531 return ret; 11532} 11533 11534/************************************************************************ 11535 * * 11536 * Front ends when parsing from memory * 11537 * * 11538 ************************************************************************/ 11539 11540/** 11541 * xmlCreateMemoryParserCtxt: 11542 * @buffer: a pointer to a char array 11543 * @size: the size of the array 11544 * 11545 * Create a parser context for an XML in-memory document. 11546 * 11547 * Returns the new parser context or NULL 11548 */ 11549xmlParserCtxtPtr 11550xmlCreateMemoryParserCtxt(const char *buffer, int size) { 11551 xmlParserCtxtPtr ctxt; 11552 xmlParserInputPtr input; 11553 xmlParserInputBufferPtr buf; 11554 11555 if (buffer == NULL) 11556 return(NULL); 11557 if (size <= 0) 11558 return(NULL); 11559 11560 ctxt = xmlNewParserCtxt(); 11561 if (ctxt == NULL) 11562 return(NULL); 11563 11564 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 11565 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 11566 if (buf == NULL) { 11567 xmlFreeParserCtxt(ctxt); 11568 return(NULL); 11569 } 11570 11571 input = xmlNewInputStream(ctxt); 11572 if (input == NULL) { 11573 xmlFreeParserInputBuffer(buf); 11574 xmlFreeParserCtxt(ctxt); 11575 return(NULL); 11576 } 11577 11578 input->filename = NULL; 11579 input->buf = buf; 11580 input->base = input->buf->buffer->content; 11581 input->cur = input->buf->buffer->content; 11582 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 11583 11584 inputPush(ctxt, input); 11585 return(ctxt); 11586} 11587 11588/** 11589 * xmlSAXParseMemoryWithData: 11590 * @sax: the SAX handler block 11591 * @buffer: an pointer to a char array 11592 * @size: the size of the array 11593 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11594 * documents 11595 * @data: the userdata 11596 * 11597 * parse an XML in-memory block and use the given SAX function block 11598 * to handle the parsing callback. If sax is NULL, fallback to the default 11599 * DOM tree building routines. 11600 * 11601 * User data (void *) is stored within the parser context in the 11602 * context's _private member, so it is available nearly everywhere in libxml 11603 * 11604 * Returns the resulting document tree 11605 */ 11606 11607xmlDocPtr 11608xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 11609 int size, int recovery, void *data) { 11610 xmlDocPtr ret; 11611 xmlParserCtxtPtr ctxt; 11612 11613 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11614 if (ctxt == NULL) return(NULL); 11615 if (sax != NULL) { 11616 if (ctxt->sax != NULL) 11617 xmlFree(ctxt->sax); 11618 ctxt->sax = sax; 11619 } 11620 xmlDetectSAX2(ctxt); 11621 if (data!=NULL) { 11622 ctxt->_private=data; 11623 } 11624 11625 ctxt->recovery = recovery; 11626 11627 xmlParseDocument(ctxt); 11628 11629 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11630 else { 11631 ret = NULL; 11632 xmlFreeDoc(ctxt->myDoc); 11633 ctxt->myDoc = NULL; 11634 } 11635 if (sax != NULL) 11636 ctxt->sax = NULL; 11637 xmlFreeParserCtxt(ctxt); 11638 11639 return(ret); 11640} 11641 11642/** 11643 * xmlSAXParseMemory: 11644 * @sax: the SAX handler block 11645 * @buffer: an pointer to a char array 11646 * @size: the size of the array 11647 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 11648 * documents 11649 * 11650 * parse an XML in-memory block and use the given SAX function block 11651 * to handle the parsing callback. If sax is NULL, fallback to the default 11652 * DOM tree building routines. 11653 * 11654 * Returns the resulting document tree 11655 */ 11656xmlDocPtr 11657xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 11658 int size, int recovery) { 11659 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 11660} 11661 11662/** 11663 * xmlParseMemory: 11664 * @buffer: an pointer to a char array 11665 * @size: the size of the array 11666 * 11667 * parse an XML in-memory block and build a tree. 11668 * 11669 * Returns the resulting document tree 11670 */ 11671 11672xmlDocPtr xmlParseMemory(const char *buffer, int size) { 11673 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 11674} 11675 11676/** 11677 * xmlRecoverMemory: 11678 * @buffer: an pointer to a char array 11679 * @size: the size of the array 11680 * 11681 * parse an XML in-memory block and build a tree. 11682 * In the case the document is not Well Formed, a tree is built anyway 11683 * 11684 * Returns the resulting document tree 11685 */ 11686 11687xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 11688 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 11689} 11690 11691/** 11692 * xmlSAXUserParseMemory: 11693 * @sax: a SAX handler 11694 * @user_data: The user data returned on SAX callbacks 11695 * @buffer: an in-memory XML document input 11696 * @size: the length of the XML document in bytes 11697 * 11698 * A better SAX parsing routine. 11699 * parse an XML in-memory buffer and call the given SAX handler routines. 11700 * 11701 * Returns 0 in case of success or a error number otherwise 11702 */ 11703int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 11704 const char *buffer, int size) { 11705 int ret = 0; 11706 xmlParserCtxtPtr ctxt; 11707 xmlSAXHandlerPtr oldsax = NULL; 11708 11709 if (sax == NULL) return -1; 11710 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11711 if (ctxt == NULL) return -1; 11712 oldsax = ctxt->sax; 11713 ctxt->sax = sax; 11714 xmlDetectSAX2(ctxt); 11715 if (user_data != NULL) 11716 ctxt->userData = user_data; 11717 11718 xmlParseDocument(ctxt); 11719 11720 if (ctxt->wellFormed) 11721 ret = 0; 11722 else { 11723 if (ctxt->errNo != 0) 11724 ret = ctxt->errNo; 11725 else 11726 ret = -1; 11727 } 11728 ctxt->sax = oldsax; 11729 xmlFreeParserCtxt(ctxt); 11730 11731 return ret; 11732} 11733 11734/** 11735 * xmlCreateDocParserCtxt: 11736 * @cur: a pointer to an array of xmlChar 11737 * 11738 * Creates a parser context for an XML in-memory document. 11739 * 11740 * Returns the new parser context or NULL 11741 */ 11742xmlParserCtxtPtr 11743xmlCreateDocParserCtxt(xmlChar *cur) { 11744 int len; 11745 11746 if (cur == NULL) 11747 return(NULL); 11748 len = xmlStrlen(cur); 11749 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 11750} 11751 11752/** 11753 * xmlSAXParseDoc: 11754 * @sax: the SAX handler block 11755 * @cur: a pointer to an array of xmlChar 11756 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11757 * documents 11758 * 11759 * parse an XML in-memory document and build a tree. 11760 * It use the given SAX function block to handle the parsing callback. 11761 * If sax is NULL, fallback to the default DOM tree building routines. 11762 * 11763 * Returns the resulting document tree 11764 */ 11765 11766xmlDocPtr 11767xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 11768 xmlDocPtr ret; 11769 xmlParserCtxtPtr ctxt; 11770 11771 if (cur == NULL) return(NULL); 11772 11773 11774 ctxt = xmlCreateDocParserCtxt(cur); 11775 if (ctxt == NULL) return(NULL); 11776 if (sax != NULL) { 11777 ctxt->sax = sax; 11778 ctxt->userData = NULL; 11779 } 11780 xmlDetectSAX2(ctxt); 11781 11782 xmlParseDocument(ctxt); 11783 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11784 else { 11785 ret = NULL; 11786 xmlFreeDoc(ctxt->myDoc); 11787 ctxt->myDoc = NULL; 11788 } 11789 if (sax != NULL) 11790 ctxt->sax = NULL; 11791 xmlFreeParserCtxt(ctxt); 11792 11793 return(ret); 11794} 11795 11796/** 11797 * xmlParseDoc: 11798 * @cur: a pointer to an array of xmlChar 11799 * 11800 * parse an XML in-memory document and build a tree. 11801 * 11802 * Returns the resulting document tree 11803 */ 11804 11805xmlDocPtr 11806xmlParseDoc(xmlChar *cur) { 11807 return(xmlSAXParseDoc(NULL, cur, 0)); 11808} 11809 11810/************************************************************************ 11811 * * 11812 * Specific function to keep track of entities references * 11813 * and used by the XSLT debugger * 11814 * * 11815 ************************************************************************/ 11816 11817static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 11818 11819/** 11820 * xmlAddEntityReference: 11821 * @ent : A valid entity 11822 * @firstNode : A valid first node for children of entity 11823 * @lastNode : A valid last node of children entity 11824 * 11825 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 11826 */ 11827static void 11828xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 11829 xmlNodePtr lastNode) 11830{ 11831 if (xmlEntityRefFunc != NULL) { 11832 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 11833 } 11834} 11835 11836 11837/** 11838 * xmlSetEntityReferenceFunc: 11839 * @func: A valid function 11840 * 11841 * Set the function to call call back when a xml reference has been made 11842 */ 11843void 11844xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 11845{ 11846 xmlEntityRefFunc = func; 11847} 11848 11849/************************************************************************ 11850 * * 11851 * Miscellaneous * 11852 * * 11853 ************************************************************************/ 11854 11855#ifdef LIBXML_XPATH_ENABLED 11856#include <libxml/xpath.h> 11857#endif 11858 11859extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 11860static int xmlParserInitialized = 0; 11861 11862/** 11863 * xmlInitParser: 11864 * 11865 * Initialization function for the XML parser. 11866 * This is not reentrant. Call once before processing in case of 11867 * use in multithreaded programs. 11868 */ 11869 11870void 11871xmlInitParser(void) { 11872 if (xmlParserInitialized != 0) 11873 return; 11874 11875 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 11876 (xmlGenericError == NULL)) 11877 initGenericErrorDefaultFunc(NULL); 11878 xmlInitGlobals(); 11879 xmlInitThreads(); 11880 xmlInitMemory(); 11881 xmlInitCharEncodingHandlers(); 11882 xmlInitializePredefinedEntities(); 11883 xmlDefaultSAXHandlerInit(); 11884 xmlRegisterDefaultInputCallbacks(); 11885 xmlRegisterDefaultOutputCallbacks(); 11886#ifdef LIBXML_HTML_ENABLED 11887 htmlInitAutoClose(); 11888 htmlDefaultSAXHandlerInit(); 11889#endif 11890#ifdef LIBXML_XPATH_ENABLED 11891 xmlXPathInit(); 11892#endif 11893 xmlParserInitialized = 1; 11894} 11895 11896/** 11897 * xmlCleanupParser: 11898 * 11899 * Cleanup function for the XML parser. It tries to reclaim all 11900 * parsing related global memory allocated for the parser processing. 11901 * It doesn't deallocate any document related memory. Calling this 11902 * function should not prevent reusing the parser. 11903 * One should call xmlCleanupParser() only when the process has 11904 * finished using the library or XML document built with it. 11905 */ 11906 11907void 11908xmlCleanupParser(void) { 11909 if (!xmlParserInitialized) 11910 return; 11911 11912 xmlCleanupCharEncodingHandlers(); 11913 xmlCleanupPredefinedEntities(); 11914#ifdef LIBXML_CATALOG_ENABLED 11915 xmlCatalogCleanup(); 11916#endif 11917 xmlCleanupThreads(); 11918 xmlCleanupGlobals(); 11919 xmlParserInitialized = 0; 11920} 11921