parser.h revision 9475a352bdd1f15b1e0c53472a74938f9d5dc04e
1/* 2 * parser.h : Interfaces, constants and types related to the XML parser. 3 * 4 * See Copyright for the status of this software. 5 * 6 * daniel@veillard.com 7 */ 8 9#ifndef __XML_PARSER_H__ 10#define __XML_PARSER_H__ 11 12#include <libxml/xmlversion.h> 13#include <libxml/tree.h> 14#include <libxml/dict.h> 15#include <libxml/hash.h> 16#include <libxml/valid.h> 17#include <libxml/entities.h> 18 19#ifdef __cplusplus 20extern "C" { 21#endif 22 23/** 24 * XML_DEFAULT_VERSION: 25 * 26 * The default version of XML used: 1.0 27 */ 28#define XML_DEFAULT_VERSION "1.0" 29 30/** 31 * xmlParserInput: 32 * 33 * An xmlParserInput is an input flow for the XML processor. 34 * Each entity parsed is associated an xmlParserInput (except the 35 * few predefined ones). This is the case both for internal entities 36 * - in which case the flow is already completely in memory - or 37 * external entities - in which case we use the buf structure for 38 * progressive reading and I18N conversions to the internal UTF-8 format. 39 */ 40 41/** 42 * xmlParserInputDeallocate: 43 * @str: the string to deallocate 44 * 45 * Callback for freeing some parser input allocations. 46 */ 47typedef void (* xmlParserInputDeallocate)(xmlChar *str); 48 49struct _xmlParserInput { 50 /* Input buffer */ 51 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 52 53 const char *filename; /* The file analyzed, if any */ 54 const char *directory; /* the directory/base of the file */ 55 const xmlChar *base; /* Base of the array to parse */ 56 const xmlChar *cur; /* Current char being parsed */ 57 const xmlChar *end; /* end of the array to parse */ 58 int length; /* length if known */ 59 int line; /* Current line */ 60 int col; /* Current column */ 61 /* 62 * NOTE: consumed is only tested for equality in the parser code, 63 * so even if there is an overflow this should not give troubles 64 * for parsing very large instances. 65 */ 66 unsigned long consumed; /* How many xmlChars already consumed */ 67 xmlParserInputDeallocate free; /* function to deallocate the base */ 68 const xmlChar *encoding; /* the encoding string for entity */ 69 const xmlChar *version; /* the version string for entity */ 70 int standalone; /* Was that entity marked standalone */ 71 int id; /* an unique identifier for the entity */ 72}; 73 74/** 75 * xmlParserNodeInfo: 76 * 77 * The parser can be asked to collect Node informations, i.e. at what 78 * place in the file they were detected. 79 * NOTE: This is off by default and not very well tested. 80 */ 81typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 82typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 83 84struct _xmlParserNodeInfo { 85 const struct _xmlNode* node; 86 /* Position & line # that text that created the node begins & ends on */ 87 unsigned long begin_pos; 88 unsigned long begin_line; 89 unsigned long end_pos; 90 unsigned long end_line; 91}; 92 93typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 94typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 95struct _xmlParserNodeInfoSeq { 96 unsigned long maximum; 97 unsigned long length; 98 xmlParserNodeInfo* buffer; 99}; 100 101/** 102 * xmlParserInputState: 103 * 104 * The parser is now working also as a state based parser. 105 * The recursive one use the state info for entities processing. 106 */ 107typedef enum { 108 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 109 XML_PARSER_START = 0, /* nothing has been parsed */ 110 XML_PARSER_MISC, /* Misc* before int subset */ 111 XML_PARSER_PI, /* Within a processing instruction */ 112 XML_PARSER_DTD, /* within some DTD content */ 113 XML_PARSER_PROLOG, /* Misc* after internal subset */ 114 XML_PARSER_COMMENT, /* within a comment */ 115 XML_PARSER_START_TAG, /* within a start tag */ 116 XML_PARSER_CONTENT, /* within the content */ 117 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 118 XML_PARSER_END_TAG, /* within a closing tag */ 119 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 120 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 121 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 122 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 123 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 124 XML_PARSER_IGNORE, /* within an IGNORED section */ 125 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 126} xmlParserInputState; 127 128/** 129 * XML_DETECT_IDS: 130 * 131 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 132 * Use it to initialize xmlLoadExtDtdDefaultValue. 133 */ 134#define XML_DETECT_IDS 2 135 136/** 137 * XML_COMPLETE_ATTRS: 138 * 139 * Bit in the loadsubset context field to tell to do complete the 140 * elements attributes lists with the ones defaulted from the DTDs. 141 * Use it to initialize xmlLoadExtDtdDefaultValue. 142 */ 143#define XML_COMPLETE_ATTRS 4 144 145/** 146 * XML_SKIP_IDS: 147 * 148 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 149 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 150 */ 151#define XML_SKIP_IDS 8 152 153/** 154 * xmlParserCtxt: 155 * 156 * The parser context. 157 * NOTE This doesn't completely define the parser state, the (current ?) 158 * design of the parser uses recursive function calls since this allow 159 * and easy mapping from the production rules of the specification 160 * to the actual code. The drawback is that the actual function call 161 * also reflect the parser state. However most of the parsing routines 162 * takes as the only argument the parser context pointer, so migrating 163 * to a state based parser for progressive parsing shouldn't be too hard. 164 */ 165struct _xmlParserCtxt { 166 struct _xmlSAXHandler *sax; /* The SAX handler */ 167 void *userData; /* For SAX interface only, used by DOM build */ 168 xmlDocPtr myDoc; /* the document being built */ 169 int wellFormed; /* is the document well formed */ 170 int replaceEntities; /* shall we replace entities ? */ 171 const xmlChar *version; /* the XML version string */ 172 const xmlChar *encoding; /* the declared encoding, if any */ 173 int standalone; /* standalone document */ 174 int html; /* an HTML(1)/Docbook(2) document */ 175 176 /* Input stream stack */ 177 xmlParserInputPtr input; /* Current input stream */ 178 int inputNr; /* Number of current input streams */ 179 int inputMax; /* Max number of input streams */ 180 xmlParserInputPtr *inputTab; /* stack of inputs */ 181 182 /* Node analysis stack only used for DOM building */ 183 xmlNodePtr node; /* Current parsed Node */ 184 int nodeNr; /* Depth of the parsing stack */ 185 int nodeMax; /* Max depth of the parsing stack */ 186 xmlNodePtr *nodeTab; /* array of nodes */ 187 188 int record_info; /* Whether node info should be kept */ 189 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 190 191 int errNo; /* error code */ 192 193 int hasExternalSubset; /* reference and external subset */ 194 int hasPErefs; /* the internal subset has PE refs */ 195 int external; /* are we parsing an external entity */ 196 197 int valid; /* is the document valid */ 198 int validate; /* shall we try to validate ? */ 199 xmlValidCtxt vctxt; /* The validity context */ 200 201 xmlParserInputState instate; /* current type of input */ 202 int token; /* next char look-ahead */ 203 204 char *directory; /* the data directory */ 205 206 /* Node name stack */ 207 const xmlChar *name; /* Current parsed Node */ 208 int nameNr; /* Depth of the parsing stack */ 209 int nameMax; /* Max depth of the parsing stack */ 210 const xmlChar * *nameTab; /* array of nodes */ 211 212 long nbChars; /* number of xmlChar processed */ 213 long checkIndex; /* used by progressive parsing lookup */ 214 int keepBlanks; /* ugly but ... */ 215 int disableSAX; /* SAX callbacks are disabled */ 216 int inSubset; /* Parsing is in int 1/ext 2 subset */ 217 const xmlChar * intSubName; /* name of subset */ 218 xmlChar * extSubURI; /* URI of external subset */ 219 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 220 221 /* xml:space values */ 222 int * space; /* Should the parser preserve spaces */ 223 int spaceNr; /* Depth of the parsing stack */ 224 int spaceMax; /* Max depth of the parsing stack */ 225 int * spaceTab; /* array of space infos */ 226 227 int depth; /* to prevent entity substitution loops */ 228 xmlParserInputPtr entity; /* used to check entities boundaries */ 229 int charset; /* encoding of the in-memory content 230 actually an xmlCharEncoding */ 231 int nodelen; /* Those two fields are there to */ 232 int nodemem; /* Speed up large node parsing */ 233 int pedantic; /* signal pedantic warnings */ 234 void *_private; /* For user data, libxml won't touch it */ 235 236 int loadsubset; /* should the external subset be loaded */ 237 int linenumbers; /* set line number in element content */ 238 void *catalogs; /* document's own catalog */ 239 int recovery; /* run in recovery mode */ 240 int progressive; /* is this a progressive parsing */ 241 xmlDictPtr dict; /* dictionnary for the parser */ 242 const xmlChar * *atts; /* array for the attributes callbacks */ 243 int maxatts; /* the size of the array */ 244 int docdict; /* use strings from dict to build tree */ 245 246 /* 247 * pre-interned strings 248 */ 249 const xmlChar *str_xml; 250 const xmlChar *str_xmlns; 251 const xmlChar *str_xml_ns; 252 253 /* 254 * Everything below is used only by the new SAX mode 255 */ 256 int sax2; /* operating in the new SAX mode */ 257 int nsNr; /* the number of inherited namespaces */ 258 int nsMax; /* the size of the arrays */ 259 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 260 int *attallocs; /* which attribute were allocated */ 261 void * *pushTab; /* array of data for push */ 262 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 263 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 264 int nsWellFormed; /* is the document XML Nanespace okay */ 265 int options; /* Extra options */ 266 267 /* 268 * Those fields are needed only for treaming parsing so far 269 */ 270 int dictNames; /* Use dictionary names for the tree */ 271 int freeElemsNr; /* number of freed element nodes */ 272 xmlNodePtr freeElems; /* List of freed element nodes */ 273 int freeAttrsNr; /* number of freed attributes nodes */ 274 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 275}; 276 277/** 278 * xmlSAXLocator: 279 * 280 * A SAX Locator. 281 */ 282struct _xmlSAXLocator { 283 const xmlChar *(*getPublicId)(void *ctx); 284 const xmlChar *(*getSystemId)(void *ctx); 285 int (*getLineNumber)(void *ctx); 286 int (*getColumnNumber)(void *ctx); 287}; 288 289/** 290 * xmlSAXHandler: 291 * 292 * A SAX handler is bunch of callbacks called by the parser when processing 293 * of the input generate data or structure informations. 294 */ 295 296/** 297 * resolveEntitySAXFunc: 298 * @ctx: the user data (XML parser context) 299 * @publicId: The public ID of the entity 300 * @systemId: The system ID of the entity 301 * 302 * Callback: 303 * The entity loader, to control the loading of external entities, 304 * the application can either: 305 * - override this resolveEntity() callback in the SAX block 306 * - or better use the xmlSetExternalEntityLoader() function to 307 * set up it's own entity resolution routine 308 * 309 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 310 */ 311typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 312 const xmlChar *publicId, 313 const xmlChar *systemId); 314/** 315 * internalSubsetSAXFunc: 316 * @ctx: the user data (XML parser context) 317 * @name: the root element name 318 * @ExternalID: the external ID 319 * @SystemID: the SYSTEM ID (e.g. filename or URL) 320 * 321 * Callback on internal subset declaration. 322 */ 323typedef void (*internalSubsetSAXFunc) (void *ctx, 324 const xmlChar *name, 325 const xmlChar *ExternalID, 326 const xmlChar *SystemID); 327/** 328 * externalSubsetSAXFunc: 329 * @ctx: the user data (XML parser context) 330 * @name: the root element name 331 * @ExternalID: the external ID 332 * @SystemID: the SYSTEM ID (e.g. filename or URL) 333 * 334 * Callback on external subset declaration. 335 */ 336typedef void (*externalSubsetSAXFunc) (void *ctx, 337 const xmlChar *name, 338 const xmlChar *ExternalID, 339 const xmlChar *SystemID); 340/** 341 * getEntitySAXFunc: 342 * @ctx: the user data (XML parser context) 343 * @name: The entity name 344 * 345 * Get an entity by name. 346 * 347 * Returns the xmlEntityPtr if found. 348 */ 349typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 350 const xmlChar *name); 351/** 352 * getParameterEntitySAXFunc: 353 * @ctx: the user data (XML parser context) 354 * @name: The entity name 355 * 356 * Get a parameter entity by name. 357 * 358 * Returns the xmlEntityPtr if found. 359 */ 360typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 361 const xmlChar *name); 362/** 363 * entityDeclSAXFunc: 364 * @ctx: the user data (XML parser context) 365 * @name: the entity name 366 * @type: the entity type 367 * @publicId: The public ID of the entity 368 * @systemId: The system ID of the entity 369 * @content: the entity value (without processing). 370 * 371 * An entity definition has been parsed. 372 */ 373typedef void (*entityDeclSAXFunc) (void *ctx, 374 const xmlChar *name, 375 int type, 376 const xmlChar *publicId, 377 const xmlChar *systemId, 378 xmlChar *content); 379/** 380 * notationDeclSAXFunc: 381 * @ctx: the user data (XML parser context) 382 * @name: The name of the notation 383 * @publicId: The public ID of the entity 384 * @systemId: The system ID of the entity 385 * 386 * What to do when a notation declaration has been parsed. 387 */ 388typedef void (*notationDeclSAXFunc)(void *ctx, 389 const xmlChar *name, 390 const xmlChar *publicId, 391 const xmlChar *systemId); 392/** 393 * attributeDeclSAXFunc: 394 * @ctx: the user data (XML parser context) 395 * @elem: the name of the element 396 * @fullname: the attribute name 397 * @type: the attribute type 398 * @def: the type of default value 399 * @defaultValue: the attribute default value 400 * @tree: the tree of enumerated value set 401 * 402 * An attribute definition has been parsed. 403 */ 404typedef void (*attributeDeclSAXFunc)(void *ctx, 405 const xmlChar *elem, 406 const xmlChar *fullname, 407 int type, 408 int def, 409 const xmlChar *defaultValue, 410 xmlEnumerationPtr tree); 411/** 412 * elementDeclSAXFunc: 413 * @ctx: the user data (XML parser context) 414 * @name: the element name 415 * @type: the element type 416 * @content: the element value tree 417 * 418 * An element definition has been parsed. 419 */ 420typedef void (*elementDeclSAXFunc)(void *ctx, 421 const xmlChar *name, 422 int type, 423 xmlElementContentPtr content); 424/** 425 * unparsedEntityDeclSAXFunc: 426 * @ctx: the user data (XML parser context) 427 * @name: The name of the entity 428 * @publicId: The public ID of the entity 429 * @systemId: The system ID of the entity 430 * @notationName: the name of the notation 431 * 432 * What to do when an unparsed entity declaration is parsed. 433 */ 434typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 435 const xmlChar *name, 436 const xmlChar *publicId, 437 const xmlChar *systemId, 438 const xmlChar *notationName); 439/** 440 * setDocumentLocatorSAXFunc: 441 * @ctx: the user data (XML parser context) 442 * @loc: A SAX Locator 443 * 444 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 445 * Everything is available on the context, so this is useless in our case. 446 */ 447typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 448 xmlSAXLocatorPtr loc); 449/** 450 * startDocumentSAXFunc: 451 * @ctx: the user data (XML parser context) 452 * 453 * Called when the document start being processed. 454 */ 455typedef void (*startDocumentSAXFunc) (void *ctx); 456/** 457 * endDocumentSAXFunc: 458 * @ctx: the user data (XML parser context) 459 * 460 * Called when the document end has been detected. 461 */ 462typedef void (*endDocumentSAXFunc) (void *ctx); 463/** 464 * startElementSAXFunc: 465 * @ctx: the user data (XML parser context) 466 * @name: The element name, including namespace prefix 467 * @atts: An array of name/value attributes pairs, NULL terminated 468 * 469 * Called when an opening tag has been processed. 470 */ 471typedef void (*startElementSAXFunc) (void *ctx, 472 const xmlChar *name, 473 const xmlChar **atts); 474/** 475 * endElementSAXFunc: 476 * @ctx: the user data (XML parser context) 477 * @name: The element name 478 * 479 * Called when the end of an element has been detected. 480 */ 481typedef void (*endElementSAXFunc) (void *ctx, 482 const xmlChar *name); 483/** 484 * attributeSAXFunc: 485 * @ctx: the user data (XML parser context) 486 * @name: The attribute name, including namespace prefix 487 * @value: The attribute value 488 * 489 * Handle an attribute that has been read by the parser. 490 * The default handling is to convert the attribute into an 491 * DOM subtree and past it in a new xmlAttr element added to 492 * the element. 493 */ 494typedef void (*attributeSAXFunc) (void *ctx, 495 const xmlChar *name, 496 const xmlChar *value); 497/** 498 * referenceSAXFunc: 499 * @ctx: the user data (XML parser context) 500 * @name: The entity name 501 * 502 * Called when an entity reference is detected. 503 */ 504typedef void (*referenceSAXFunc) (void *ctx, 505 const xmlChar *name); 506/** 507 * charactersSAXFunc: 508 * @ctx: the user data (XML parser context) 509 * @ch: a xmlChar string 510 * @len: the number of xmlChar 511 * 512 * Receiving some chars from the parser. 513 */ 514typedef void (*charactersSAXFunc) (void *ctx, 515 const xmlChar *ch, 516 int len); 517/** 518 * ignorableWhitespaceSAXFunc: 519 * @ctx: the user data (XML parser context) 520 * @ch: a xmlChar string 521 * @len: the number of xmlChar 522 * 523 * Receiving some ignorable whitespaces from the parser. 524 * UNUSED: by default the DOM building will use characters. 525 */ 526typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 527 const xmlChar *ch, 528 int len); 529/** 530 * processingInstructionSAXFunc: 531 * @ctx: the user data (XML parser context) 532 * @target: the target name 533 * @data: the PI data's 534 * 535 * A processing instruction has been parsed. 536 */ 537typedef void (*processingInstructionSAXFunc) (void *ctx, 538 const xmlChar *target, 539 const xmlChar *data); 540/** 541 * commentSAXFunc: 542 * @ctx: the user data (XML parser context) 543 * @value: the comment content 544 * 545 * A comment has been parsed. 546 */ 547typedef void (*commentSAXFunc) (void *ctx, 548 const xmlChar *value); 549/** 550 * cdataBlockSAXFunc: 551 * @ctx: the user data (XML parser context) 552 * @value: The pcdata content 553 * @len: the block length 554 * 555 * Called when a pcdata block has been parsed. 556 */ 557typedef void (*cdataBlockSAXFunc) ( 558 void *ctx, 559 const xmlChar *value, 560 int len); 561/** 562 * warningSAXFunc: 563 * @ctx: an XML parser context 564 * @msg: the message to display/transmit 565 * @...: extra parameters for the message display 566 * 567 * Display and format a warning messages, callback. 568 */ 569typedef void (*warningSAXFunc) (void *ctx, 570 const char *msg, ...); 571/** 572 * errorSAXFunc: 573 * @ctx: an XML parser context 574 * @msg: the message to display/transmit 575 * @...: extra parameters for the message display 576 * 577 * Display and format an error messages, callback. 578 */ 579typedef void (*errorSAXFunc) (void *ctx, 580 const char *msg, ...); 581/** 582 * fatalErrorSAXFunc: 583 * @ctx: an XML parser context 584 * @msg: the message to display/transmit 585 * @...: extra parameters for the message display 586 * 587 * Display and format fatal error messages, callback. 588 * Note: so far fatalError() SAX callbacks are not used, error() 589 * get all the callbacks for errors. 590 */ 591typedef void (*fatalErrorSAXFunc) (void *ctx, 592 const char *msg, ...); 593/** 594 * isStandaloneSAXFunc: 595 * @ctx: the user data (XML parser context) 596 * 597 * Is this document tagged standalone? 598 * 599 * Returns 1 if true 600 */ 601typedef int (*isStandaloneSAXFunc) (void *ctx); 602/** 603 * hasInternalSubsetSAXFunc: 604 * @ctx: the user data (XML parser context) 605 * 606 * Does this document has an internal subset. 607 * 608 * Returns 1 if true 609 */ 610typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 611 612/** 613 * hasExternalSubsetSAXFunc: 614 * @ctx: the user data (XML parser context) 615 * 616 * Does this document has an external subset? 617 * 618 * Returns 1 if true 619 */ 620typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 621 622/************************************************************************ 623 * * 624 * The SAX version 2 API extensions * 625 * * 626 ************************************************************************/ 627/** 628 * XML_SAX2_MAGIC: 629 * 630 * Special constant found in SAX2 blocks initialized fields 631 */ 632#define XML_SAX2_MAGIC 0xDEEDBEAF 633 634/** 635 * startElementNsSAX2Func: 636 * @ctx: the user data (XML parser context) 637 * @localname: the local name of the element 638 * @prefix: the element namespace prefix if available 639 * @URI: the element namespace name if available 640 * @nb_namespaces: number of namespace definitions on that node 641 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 642 * @nb_attributes: the number of attributes on that node 643 * @nb_defaulted: the number of defaulted attributes. The defaulted 644 * ones are at the end of the array 645 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 646 * attribute values. 647 * 648 * SAX2 callback when an element start has been detected by the parser. 649 * It provides the namespace informations for the element, as well as 650 * the new namespace declarations on the element. 651 */ 652 653typedef void (*startElementNsSAX2Func) (void *ctx, 654 const xmlChar *localname, 655 const xmlChar *prefix, 656 const xmlChar *URI, 657 int nb_namespaces, 658 const xmlChar **namespaces, 659 int nb_attributes, 660 int nb_defaulted, 661 const xmlChar **attributes); 662 663/** 664 * endElementNsSAX2Func: 665 * @ctx: the user data (XML parser context) 666 * @localname: the local name of the element 667 * @prefix: the element namespace prefix if available 668 * @URI: the element namespace name if available 669 * 670 * SAX2 callback when an element end has been detected by the parser. 671 * It provides the namespace informations for the element. 672 */ 673 674typedef void (*endElementNsSAX2Func) (void *ctx, 675 const xmlChar *localname, 676 const xmlChar *prefix, 677 const xmlChar *URI); 678 679 680struct _xmlSAXHandler { 681 internalSubsetSAXFunc internalSubset; 682 isStandaloneSAXFunc isStandalone; 683 hasInternalSubsetSAXFunc hasInternalSubset; 684 hasExternalSubsetSAXFunc hasExternalSubset; 685 resolveEntitySAXFunc resolveEntity; 686 getEntitySAXFunc getEntity; 687 entityDeclSAXFunc entityDecl; 688 notationDeclSAXFunc notationDecl; 689 attributeDeclSAXFunc attributeDecl; 690 elementDeclSAXFunc elementDecl; 691 unparsedEntityDeclSAXFunc unparsedEntityDecl; 692 setDocumentLocatorSAXFunc setDocumentLocator; 693 startDocumentSAXFunc startDocument; 694 endDocumentSAXFunc endDocument; 695 startElementSAXFunc startElement; 696 endElementSAXFunc endElement; 697 referenceSAXFunc reference; 698 charactersSAXFunc characters; 699 ignorableWhitespaceSAXFunc ignorableWhitespace; 700 processingInstructionSAXFunc processingInstruction; 701 commentSAXFunc comment; 702 warningSAXFunc warning; 703 errorSAXFunc error; 704 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 705 getParameterEntitySAXFunc getParameterEntity; 706 cdataBlockSAXFunc cdataBlock; 707 externalSubsetSAXFunc externalSubset; 708 unsigned int initialized; 709 /* The following fields are extensions available only on version 2 */ 710 void *_private; 711 startElementNsSAX2Func startElementNs; 712 endElementNsSAX2Func endElementNs; 713}; 714 715/** 716 * xmlExternalEntityLoader: 717 * @URL: The System ID of the resource requested 718 * @ID: The Public ID of the resource requested 719 * @context: the XML parser context 720 * 721 * External entity loaders types. 722 * 723 * Returns the entity input parser. 724 */ 725typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 726 const char *ID, 727 xmlParserCtxtPtr context); 728 729#ifdef __cplusplus 730} 731#endif 732 733#include <libxml/encoding.h> 734#include <libxml/xmlIO.h> 735#include <libxml/globals.h> 736 737#ifdef __cplusplus 738extern "C" { 739#endif 740 741 742/* 743 * Init/Cleanup 744 */ 745XMLPUBFUN void XMLCALL 746 xmlInitParser (void); 747XMLPUBFUN void XMLCALL 748 xmlCleanupParser (void); 749 750/* 751 * Input functions 752 */ 753XMLPUBFUN int XMLCALL 754 xmlParserInputRead (xmlParserInputPtr in, 755 int len); 756XMLPUBFUN int XMLCALL 757 xmlParserInputGrow (xmlParserInputPtr in, 758 int len); 759 760/* 761 * xmlChar handling 762 */ 763XMLPUBFUN xmlChar * XMLCALL 764 xmlStrdup (const xmlChar *cur); 765XMLPUBFUN xmlChar * XMLCALL 766 xmlStrndup (const xmlChar *cur, 767 int len); 768XMLPUBFUN xmlChar * XMLCALL 769 xmlCharStrndup (const char *cur, 770 int len); 771XMLPUBFUN xmlChar * XMLCALL 772 xmlCharStrdup (const char *cur); 773XMLPUBFUN xmlChar * XMLCALL 774 xmlStrsub (const xmlChar *str, 775 int start, 776 int len); 777XMLPUBFUN const xmlChar * XMLCALL 778 xmlStrchr (const xmlChar *str, 779 xmlChar val); 780XMLPUBFUN const xmlChar * XMLCALL 781 xmlStrstr (const xmlChar *str, 782 const xmlChar *val); 783XMLPUBFUN const xmlChar * XMLCALL 784 xmlStrcasestr (const xmlChar *str, 785 xmlChar *val); 786XMLPUBFUN int XMLCALL 787 xmlStrcmp (const xmlChar *str1, 788 const xmlChar *str2); 789XMLPUBFUN int XMLCALL 790 xmlStrncmp (const xmlChar *str1, 791 const xmlChar *str2, 792 int len); 793XMLPUBFUN int XMLCALL 794 xmlStrcasecmp (const xmlChar *str1, 795 const xmlChar *str2); 796XMLPUBFUN int XMLCALL 797 xmlStrncasecmp (const xmlChar *str1, 798 const xmlChar *str2, 799 int len); 800XMLPUBFUN int XMLCALL 801 xmlStrEqual (const xmlChar *str1, 802 const xmlChar *str2); 803XMLPUBFUN int XMLCALL 804 xmlStrQEqual (const xmlChar *pref, 805 const xmlChar *name, 806 const xmlChar *str); 807XMLPUBFUN int XMLCALL 808 xmlStrlen (const xmlChar *str); 809XMLPUBFUN xmlChar * XMLCALL 810 xmlStrcat (xmlChar *cur, 811 const xmlChar *add); 812XMLPUBFUN xmlChar * XMLCALL 813 xmlStrncat (xmlChar *cur, 814 const xmlChar *add, 815 int len); 816 817/* 818 * Basic parsing Interfaces 819 */ 820XMLPUBFUN xmlDocPtr XMLCALL 821 xmlParseDoc (xmlChar *cur); 822XMLPUBFUN xmlDocPtr XMLCALL 823 xmlParseMemory (const char *buffer, 824 int size); 825XMLPUBFUN xmlDocPtr XMLCALL 826 xmlParseFile (const char *filename); 827XMLPUBFUN int XMLCALL 828 xmlSubstituteEntitiesDefault(int val); 829XMLPUBFUN int XMLCALL 830 xmlKeepBlanksDefault (int val); 831XMLPUBFUN void XMLCALL 832 xmlStopParser (xmlParserCtxtPtr ctxt); 833XMLPUBFUN int XMLCALL 834 xmlPedanticParserDefault(int val); 835XMLPUBFUN int XMLCALL 836 xmlLineNumbersDefault (int val); 837 838/* 839 * Recovery mode 840 */ 841XMLPUBFUN xmlDocPtr XMLCALL 842 xmlRecoverDoc (xmlChar *cur); 843XMLPUBFUN xmlDocPtr XMLCALL 844 xmlRecoverMemory (const char *buffer, 845 int size); 846XMLPUBFUN xmlDocPtr XMLCALL 847 xmlRecoverFile (const char *filename); 848 849/* 850 * Less common routines and SAX interfaces 851 */ 852XMLPUBFUN int XMLCALL 853 xmlParseDocument (xmlParserCtxtPtr ctxt); 854XMLPUBFUN int XMLCALL 855 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 856XMLPUBFUN xmlDocPtr XMLCALL 857 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 858 xmlChar *cur, 859 int recovery); 860XMLPUBFUN int XMLCALL 861 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 862 void *user_data, 863 const char *filename); 864XMLPUBFUN int XMLCALL 865 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 866 void *user_data, 867 const char *buffer, 868 int size); 869XMLPUBFUN xmlDocPtr XMLCALL 870 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 871 const char *buffer, 872 int size, 873 int recovery); 874XMLPUBFUN xmlDocPtr XMLCALL 875 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 876 const char *buffer, 877 int size, 878 int recovery, 879 void *data); 880XMLPUBFUN xmlDocPtr XMLCALL 881 xmlSAXParseFile (xmlSAXHandlerPtr sax, 882 const char *filename, 883 int recovery); 884XMLPUBFUN xmlDocPtr XMLCALL 885 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 886 const char *filename, 887 int recovery, 888 void *data); 889XMLPUBFUN xmlDocPtr XMLCALL 890 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 891 const char *filename); 892XMLPUBFUN xmlDocPtr XMLCALL 893 xmlParseEntity (const char *filename); 894XMLPUBFUN xmlDtdPtr XMLCALL 895 xmlParseDTD (const xmlChar *ExternalID, 896 const xmlChar *SystemID); 897XMLPUBFUN xmlDtdPtr XMLCALL 898 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 899 const xmlChar *ExternalID, 900 const xmlChar *SystemID); 901XMLPUBFUN xmlDtdPtr XMLCALL 902 xmlIOParseDTD (xmlSAXHandlerPtr sax, 903 xmlParserInputBufferPtr input, 904 xmlCharEncoding enc); 905XMLPUBFUN int XMLCALL 906 xmlParseBalancedChunkMemory(xmlDocPtr doc, 907 xmlSAXHandlerPtr sax, 908 void *user_data, 909 int depth, 910 const xmlChar *string, 911 xmlNodePtr *lst); 912XMLPUBFUN int XMLCALL 913 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 914 xmlSAXHandlerPtr sax, 915 void *user_data, 916 int depth, 917 const xmlChar *string, 918 xmlNodePtr *lst, 919 int recover); 920XMLPUBFUN int XMLCALL 921 xmlParseExternalEntity (xmlDocPtr doc, 922 xmlSAXHandlerPtr sax, 923 void *user_data, 924 int depth, 925 const xmlChar *URL, 926 const xmlChar *ID, 927 xmlNodePtr *lst); 928XMLPUBFUN int XMLCALL 929 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 930 const xmlChar *URL, 931 const xmlChar *ID, 932 xmlNodePtr *lst); 933 934/* 935 * Parser contexts handling. 936 */ 937XMLPUBFUN int XMLCALL 938 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 939XMLPUBFUN void XMLCALL 940 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 941XMLPUBFUN void XMLCALL 942 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 943XMLPUBFUN void XMLCALL 944 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 945 const xmlChar* buffer, 946 const char *filename); 947XMLPUBFUN xmlParserCtxtPtr XMLCALL 948 xmlCreateDocParserCtxt (const xmlChar *cur); 949 950/* 951 * Reading/setting optional parsing features. 952 */ 953 954XMLPUBFUN int XMLCALL 955 xmlGetFeaturesList (int *len, 956 const char **result); 957XMLPUBFUN int XMLCALL 958 xmlGetFeature (xmlParserCtxtPtr ctxt, 959 const char *name, 960 void *result); 961XMLPUBFUN int XMLCALL 962 xmlSetFeature (xmlParserCtxtPtr ctxt, 963 const char *name, 964 void *value); 965 966/* 967 * Interfaces for the Push mode. 968 */ 969XMLPUBFUN xmlParserCtxtPtr XMLCALL 970 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 971 void *user_data, 972 const char *chunk, 973 int size, 974 const char *filename); 975XMLPUBFUN int XMLCALL 976 xmlParseChunk (xmlParserCtxtPtr ctxt, 977 const char *chunk, 978 int size, 979 int terminate); 980 981/* 982 * Special I/O mode. 983 */ 984 985XMLPUBFUN xmlParserCtxtPtr XMLCALL 986 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 987 void *user_data, 988 xmlInputReadCallback ioread, 989 xmlInputCloseCallback ioclose, 990 void *ioctx, 991 xmlCharEncoding enc); 992 993XMLPUBFUN xmlParserInputPtr XMLCALL 994 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 995 xmlParserInputBufferPtr input, 996 xmlCharEncoding enc); 997 998/* 999 * Node infos. 1000 */ 1001XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1002 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1003 const xmlNodePtr node); 1004XMLPUBFUN void XMLCALL 1005 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1006XMLPUBFUN void XMLCALL 1007 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1008XMLPUBFUN unsigned long XMLCALL 1009 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1010 const xmlNodePtr node); 1011XMLPUBFUN void XMLCALL 1012 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1013 const xmlParserNodeInfoPtr info); 1014 1015/* 1016 * External entities handling actually implemented in xmlIO. 1017 */ 1018 1019XMLPUBFUN void XMLCALL 1020 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1021XMLPUBFUN xmlExternalEntityLoader XMLCALL 1022 xmlGetExternalEntityLoader(void); 1023XMLPUBFUN xmlParserInputPtr XMLCALL 1024 xmlLoadExternalEntity (const char *URL, 1025 const char *ID, 1026 xmlParserCtxtPtr ctxt); 1027/* 1028 * New set of simpler/more flexible APIs 1029 */ 1030/** 1031 * xmlParserOption: 1032 * 1033 * This is the set of XML parser options that can be passed down 1034 * to the xmlReadDoc() and similar calls. 1035 */ 1036typedef enum { 1037 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1038 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1039 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1040 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1041 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1042 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1043 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1044 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1045 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1046 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1047 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */ 1048 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1049 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ 1050 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1051 XML_PARSE_NOCDATA = 1<<14 /* merge CDATA as text nodes */ 1052} xmlParserOption; 1053 1054XMLPUBFUN void XMLCALL 1055 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1056XMLPUBFUN int XMLCALL 1057 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1058 int options); 1059XMLPUBFUN xmlDocPtr XMLCALL 1060 xmlReadDoc (const xmlChar *cur, 1061 const char *URL, 1062 const char *encoding, 1063 int options); 1064XMLPUBFUN xmlDocPtr XMLCALL 1065 xmlReadFile (const char *URL, 1066 const char *encoding, 1067 int options); 1068XMLPUBFUN xmlDocPtr XMLCALL 1069 xmlReadMemory (const char *buffer, 1070 int size, 1071 const char *URL, 1072 const char *encoding, 1073 int options); 1074XMLPUBFUN xmlDocPtr XMLCALL 1075 xmlReadFd (int fd, 1076 const char *URL, 1077 const char *encoding, 1078 int options); 1079XMLPUBFUN xmlDocPtr XMLCALL 1080 xmlReadIO (xmlInputReadCallback ioread, 1081 xmlInputCloseCallback ioclose, 1082 void *ioctx, 1083 const char *URL, 1084 const char *encoding, 1085 int options); 1086XMLPUBFUN xmlDocPtr XMLCALL 1087 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1088 const xmlChar *cur, 1089 const char *URL, 1090 const char *encoding, 1091 int options); 1092XMLPUBFUN xmlDocPtr XMLCALL 1093 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1094 const char *filename, 1095 const char *encoding, 1096 int options); 1097XMLPUBFUN xmlDocPtr XMLCALL 1098 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1099 const char *buffer, 1100 int size, 1101 const char *URL, 1102 const char *encoding, 1103 int options); 1104XMLPUBFUN xmlDocPtr XMLCALL 1105 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1106 int fd, 1107 const char *URL, 1108 const char *encoding, 1109 int options); 1110XMLPUBFUN xmlDocPtr XMLCALL 1111 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1112 xmlInputReadCallback ioread, 1113 xmlInputCloseCallback ioclose, 1114 void *ioctx, 1115 const char *URL, 1116 const char *encoding, 1117 int options); 1118 1119#ifdef __cplusplus 1120} 1121#endif 1122#endif /* __XML_PARSER_H__ */ 1123 1124