parser.h revision d16df9f6efe5c0a4f41f4b3e60312c3f584659a5
1/* 2 * parser.h : Interfaces, constants and types related to the XML parser. 3 * 4 * See Copyright for the status of this software. 5 * 6 * Daniel.Veillard@w3.org 7 */ 8 9#ifndef __XML_PARSER_H__ 10#define __XML_PARSER_H__ 11 12#include <libxml/tree.h> 13#include <libxml/valid.h> 14#include <libxml/xmlIO.h> 15#include <libxml/entities.h> 16 17 18#ifdef __cplusplus 19extern "C" { 20#endif 21 22/* 23 * Constants. 24 */ 25#define XML_DEFAULT_VERSION "1.0" 26 27/** 28 * xmlParserInput: 29 * 30 * an xmlParserInput is an input flow for the XML processor. 31 * Each entity parsed is associated an xmlParserInput (except the 32 * few predefined ones). This is the case both for internal entities 33 * - in which case the flow is already completely in memory - or 34 * external entities - in which case we use the buf structure for 35 * progressive reading and I18N conversions to the internal UTF-8 format. 36 */ 37 38typedef void (* xmlParserInputDeallocate)(xmlChar *); 39typedef struct _xmlParserInput xmlParserInput; 40typedef xmlParserInput *xmlParserInputPtr; 41struct _xmlParserInput { 42 /* Input buffer */ 43 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 44 45 const char *filename; /* The file analyzed, if any */ 46 const char *directory; /* the directory/base of teh file */ 47 const xmlChar *base; /* Base of the array to parse */ 48 const xmlChar *cur; /* Current char being parsed */ 49 const xmlChar *end; /* end of the arry to parse */ 50 int length; /* length if known */ 51 int line; /* Current line */ 52 int col; /* Current column */ 53 int consumed; /* How many xmlChars already consumed */ 54 xmlParserInputDeallocate free; /* function to deallocate the base */ 55 const xmlChar *encoding; /* the encoding string for entity */ 56 const xmlChar *version; /* the version string for entity */ 57 int standalone; /* Was that entity marked standalone */ 58}; 59 60/** 61 * xmlParserNodeInfo: 62 * 63 * the parser can be asked to collect Node informations, i.e. at what 64 * place in the file they were detected. 65 * NOTE: This is off by default and not very well tested. 66 */ 67typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 68typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 69 70struct _xmlParserNodeInfo { 71 const struct _xmlNode* node; 72 /* Position & line # that text that created the node begins & ends on */ 73 unsigned long begin_pos; 74 unsigned long begin_line; 75 unsigned long end_pos; 76 unsigned long end_line; 77}; 78 79typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 80typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 81struct _xmlParserNodeInfoSeq { 82 unsigned long maximum; 83 unsigned long length; 84 xmlParserNodeInfo* buffer; 85}; 86 87/** 88 * xmlParserInputState: 89 * 90 * The parser is now working also as a state based parser 91 * The recursive one use the stagte info for entities processing 92 */ 93typedef enum { 94 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 95 XML_PARSER_START = 0, /* nothing has been parsed */ 96 XML_PARSER_MISC, /* Misc* before int subset */ 97 XML_PARSER_PI, /* Whithin a processing instruction */ 98 XML_PARSER_DTD, /* within some DTD content */ 99 XML_PARSER_PROLOG, /* Misc* after internal subset */ 100 XML_PARSER_COMMENT, /* within a comment */ 101 XML_PARSER_START_TAG, /* within a start tag */ 102 XML_PARSER_CONTENT, /* within the content */ 103 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 104 XML_PARSER_END_TAG, /* within a closing tag */ 105 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 106 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 107 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 108 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 109 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 110 XML_PARSER_IGNORE /* within an IGNORED section */ 111} xmlParserInputState; 112 113/** 114 * XML_DETECT_IDS: 115 * 116 * Bit in the loadsubset context field to tell to do ID/REFs lookups 117 * Use it to initialize xmlLoadExtDtdDefaultValue 118 */ 119#define XML_DETECT_IDS 2 120 121/** 122 * XML_COMPLETE_ATTRS: 123 * 124 * Bit in the loadsubset context field to tell to do complete the 125 * elements attributes lists with the ones defaulted from the DTDs 126 * Use it to initialize xmlLoadExtDtdDefaultValue 127 */ 128#define XML_COMPLETE_ATTRS 4 129 130/** 131 * xmlParserCtxt: 132 * 133 * The parser context. 134 * NOTE This doesn't completely defines the parser state, the (current ?) 135 * design of the parser uses recursive function calls since this allow 136 * and easy mapping from the production rules of the specification 137 * to the actual code. The drawback is that the actual function call 138 * also reflect the parser state. However most of the parsing routines 139 * takes as the only argument the parser context pointer, so migrating 140 * to a state based parser for progressive parsing shouldn't be too hard. 141 */ 142typedef struct _xmlParserCtxt xmlParserCtxt; 143typedef xmlParserCtxt *xmlParserCtxtPtr; 144struct _xmlParserCtxt { 145 struct _xmlSAXHandler *sax; /* The SAX handler */ 146 void *userData; /* For SAX interface only, used by DOM build */ 147 xmlDocPtr myDoc; /* the document being built */ 148 int wellFormed; /* is the document well formed */ 149 int replaceEntities; /* shall we replace entities ? */ 150 const xmlChar *version; /* the XML version string */ 151 const xmlChar *encoding; /* the declared encoding, if any */ 152 int standalone; /* standalone document */ 153 int html; /* an HTML(1)/Docbook(2) document */ 154 155 /* Input stream stack */ 156 xmlParserInputPtr input; /* Current input stream */ 157 int inputNr; /* Number of current input streams */ 158 int inputMax; /* Max number of input streams */ 159 xmlParserInputPtr *inputTab; /* stack of inputs */ 160 161 /* Node analysis stack only used for DOM building */ 162 xmlNodePtr node; /* Current parsed Node */ 163 int nodeNr; /* Depth of the parsing stack */ 164 int nodeMax; /* Max depth of the parsing stack */ 165 xmlNodePtr *nodeTab; /* array of nodes */ 166 167 int record_info; /* Whether node info should be kept */ 168 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 169 170 int errNo; /* error code */ 171 172 int hasExternalSubset; /* reference and external subset */ 173 int hasPErefs; /* the internal subset has PE refs */ 174 int external; /* are we parsing an external entity */ 175 176 int valid; /* is the document valid */ 177 int validate; /* shall we try to validate ? */ 178 xmlValidCtxt vctxt; /* The validity context */ 179 180 xmlParserInputState instate; /* current type of input */ 181 int token; /* next char look-ahead */ 182 183 char *directory; /* the data directory */ 184 185 /* Node name stack */ 186 xmlChar *name; /* Current parsed Node */ 187 int nameNr; /* Depth of the parsing stack */ 188 int nameMax; /* Max depth of the parsing stack */ 189 xmlChar * *nameTab; /* array of nodes */ 190 191 long nbChars; /* number of xmlChar processed */ 192 long checkIndex; /* used by progressive parsing lookup */ 193 int keepBlanks; /* ugly but ... */ 194 int disableSAX; /* SAX callbacks are disabled */ 195 int inSubset; /* Parsing is in int 1/ext 2 subset */ 196 xmlChar * intSubName; /* name of subset */ 197 xmlChar * extSubURI; /* URI of external subset */ 198 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 199 200 /* xml:space values */ 201 int * space; /* Should the parser preserve spaces */ 202 int spaceNr; /* Depth of the parsing stack */ 203 int spaceMax; /* Max depth of the parsing stack */ 204 int * spaceTab; /* array of space infos */ 205 206 int depth; /* to prevent entity substitution loops */ 207 xmlParserInputPtr entity; /* used to check entities boundaries */ 208 int charset; /* encoding of the in-memory content 209 actually an xmlCharEncoding */ 210 int nodelen; /* Those two fields are there to */ 211 int nodemem; /* Speed up large node parsing */ 212 int pedantic; /* signal pedantic warnings */ 213 void *_private; /* For user data, libxml won't touch it */ 214 215 int loadsubset; /* should the external subset be loaded */ 216}; 217 218/** 219 * xmlSAXLocator: 220 * 221 * a SAX Locator. 222 */ 223typedef struct _xmlSAXLocator xmlSAXLocator; 224typedef xmlSAXLocator *xmlSAXLocatorPtr; 225struct _xmlSAXLocator { 226 const xmlChar *(*getPublicId)(void *ctx); 227 const xmlChar *(*getSystemId)(void *ctx); 228 int (*getLineNumber)(void *ctx); 229 int (*getColumnNumber)(void *ctx); 230}; 231 232/** 233 * xmlSAXHandler: 234 * 235 * a SAX handler is bunch of callbacks called by the parser when processing 236 * of the input generate data or structure informations. 237 */ 238 239typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 240 const xmlChar *publicId, const xmlChar *systemId); 241typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, 242 const xmlChar *ExternalID, const xmlChar *SystemID); 243typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name, 244 const xmlChar *ExternalID, const xmlChar *SystemID); 245typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 246 const xmlChar *name); 247typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 248 const xmlChar *name); 249typedef void (*entityDeclSAXFunc) (void *ctx, 250 const xmlChar *name, int type, const xmlChar *publicId, 251 const xmlChar *systemId, xmlChar *content); 252typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name, 253 const xmlChar *publicId, const xmlChar *systemId); 254typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem, 255 const xmlChar *name, int type, int def, 256 const xmlChar *defaultValue, xmlEnumerationPtr tree); 257typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name, 258 int type, xmlElementContentPtr content); 259typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 260 const xmlChar *name, const xmlChar *publicId, 261 const xmlChar *systemId, const xmlChar *notationName); 262typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 263 xmlSAXLocatorPtr loc); 264typedef void (*startDocumentSAXFunc) (void *ctx); 265typedef void (*endDocumentSAXFunc) (void *ctx); 266typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name, 267 const xmlChar **atts); 268typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name); 269typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name, 270 const xmlChar *value); 271typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name); 272typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch, 273 int len); 274typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 275 const xmlChar *ch, int len); 276typedef void (*processingInstructionSAXFunc) (void *ctx, 277 const xmlChar *target, const xmlChar *data); 278typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value); 279typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len); 280typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...); 281typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...); 282typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...); 283typedef int (*isStandaloneSAXFunc) (void *ctx); 284typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 285typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 286 287typedef struct _xmlSAXHandler xmlSAXHandler; 288typedef xmlSAXHandler *xmlSAXHandlerPtr; 289struct _xmlSAXHandler { 290 internalSubsetSAXFunc internalSubset; 291 isStandaloneSAXFunc isStandalone; 292 hasInternalSubsetSAXFunc hasInternalSubset; 293 hasExternalSubsetSAXFunc hasExternalSubset; 294 resolveEntitySAXFunc resolveEntity; 295 getEntitySAXFunc getEntity; 296 entityDeclSAXFunc entityDecl; 297 notationDeclSAXFunc notationDecl; 298 attributeDeclSAXFunc attributeDecl; 299 elementDeclSAXFunc elementDecl; 300 unparsedEntityDeclSAXFunc unparsedEntityDecl; 301 setDocumentLocatorSAXFunc setDocumentLocator; 302 startDocumentSAXFunc startDocument; 303 endDocumentSAXFunc endDocument; 304 startElementSAXFunc startElement; 305 endElementSAXFunc endElement; 306 referenceSAXFunc reference; 307 charactersSAXFunc characters; 308 ignorableWhitespaceSAXFunc ignorableWhitespace; 309 processingInstructionSAXFunc processingInstruction; 310 commentSAXFunc comment; 311 warningSAXFunc warning; 312 errorSAXFunc error; 313 fatalErrorSAXFunc fatalError; 314 getParameterEntitySAXFunc getParameterEntity; 315 cdataBlockSAXFunc cdataBlock; 316 externalSubsetSAXFunc externalSubset; 317}; 318 319/** 320 * xmlExternalEntityLoader: 321 * @URL: The System ID of the resource requested 322 * @ID: The Public ID of the resource requested 323 * @xmlParserCtxtPtr: the XML parser context 324 * 325 * External entity loaders types 326 */ 327typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL, 328 const char *ID, 329 xmlParserCtxtPtr context); 330 331/* 332 * Global variables: just the default SAX interface tables and XML 333 * version infos. 334 */ 335LIBXML_DLL_IMPORT extern const char *xmlParserVersion; 336 337LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator; 338LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler; 339LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler; 340LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler; 341 342/* 343 * entity substitution default behaviour. 344 */ 345 346#ifdef VMS 347LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultVal; 348#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal 349#else 350LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue; 351#endif 352LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue; 353 354 355/* 356 * Init/Cleanup 357 */ 358void xmlInitParser (void); 359void xmlCleanupParser (void); 360 361/* 362 * Input functions 363 */ 364int xmlParserInputRead (xmlParserInputPtr in, 365 int len); 366int xmlParserInputGrow (xmlParserInputPtr in, 367 int len); 368 369/* 370 * xmlChar handling 371 */ 372xmlChar * xmlStrdup (const xmlChar *cur); 373xmlChar * xmlStrndup (const xmlChar *cur, 374 int len); 375xmlChar * xmlCharStrndup (const char *cur, 376 int len); 377xmlChar * xmlCharStrdup (const char *cur); 378xmlChar * xmlStrsub (const xmlChar *str, 379 int start, 380 int len); 381const xmlChar * xmlStrchr (const xmlChar *str, 382 xmlChar val); 383const xmlChar * xmlStrstr (const xmlChar *str, 384 xmlChar *val); 385const xmlChar * xmlStrcasestr (const xmlChar *str, 386 xmlChar *val); 387int xmlStrcmp (const xmlChar *str1, 388 const xmlChar *str2); 389int xmlStrncmp (const xmlChar *str1, 390 const xmlChar *str2, 391 int len); 392int xmlStrcasecmp (const xmlChar *str1, 393 const xmlChar *str2); 394int xmlStrncasecmp (const xmlChar *str1, 395 const xmlChar *str2, 396 int len); 397int xmlStrEqual (const xmlChar *str1, 398 const xmlChar *str2); 399int xmlStrlen (const xmlChar *str); 400xmlChar * xmlStrcat (xmlChar *cur, 401 const xmlChar *add); 402xmlChar * xmlStrncat (xmlChar *cur, 403 const xmlChar *add, 404 int len); 405 406/* 407 * Basic parsing Interfaces 408 */ 409xmlDocPtr xmlParseDoc (xmlChar *cur); 410xmlDocPtr xmlParseMemory (char *buffer, 411 int size); 412xmlDocPtr xmlParseFile (const char *filename); 413int xmlSubstituteEntitiesDefault(int val); 414int xmlKeepBlanksDefault (int val); 415void xmlStopParser (xmlParserCtxtPtr ctxt); 416int xmlPedanticParserDefault(int val); 417 418/* 419 * Recovery mode 420 */ 421xmlDocPtr xmlRecoverDoc (xmlChar *cur); 422xmlDocPtr xmlRecoverMemory (char *buffer, 423 int size); 424xmlDocPtr xmlRecoverFile (const char *filename); 425 426/* 427 * Less common routines and SAX interfaces 428 */ 429int xmlParseDocument (xmlParserCtxtPtr ctxt); 430int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 431xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax, 432 xmlChar *cur, 433 int recovery); 434int xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 435 void *user_data, 436 const char *filename); 437int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 438 void *user_data, 439 const char *buffer, 440 int size); 441xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax, 442 char *buffer, 443 int size, 444 int recovery); 445xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax, 446 const char *filename, 447 int recovery); 448xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax, 449 const char *filename); 450xmlDocPtr xmlParseEntity (const char *filename); 451xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, 452 const xmlChar *SystemID); 453xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, 454 const xmlChar *ExternalID, 455 const xmlChar *SystemID); 456xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax, 457 xmlParserInputBufferPtr input, 458 xmlCharEncoding enc); 459int xmlParseBalancedChunkMemory(xmlDocPtr doc, 460 xmlSAXHandlerPtr sax, 461 void *user_data, 462 int depth, 463 const xmlChar *string, 464 xmlNodePtr *list); 465int xmlParseExternalEntity (xmlDocPtr doc, 466 xmlSAXHandlerPtr sax, 467 void *user_data, 468 int depth, 469 const xmlChar *URL, 470 const xmlChar *ID, 471 xmlNodePtr *list); 472int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 473 const xmlChar *URL, 474 const xmlChar *ID, 475 xmlNodePtr *list); 476 477/* 478 * SAX initialization routines 479 */ 480void xmlDefaultSAXHandlerInit(void); 481void htmlDefaultSAXHandlerInit(void); 482 483/* 484 * Parser contexts handling. 485 */ 486void xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 487void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 488void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 489void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 490 const xmlChar* buffer, 491 const char* filename); 492xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); 493 494/* 495 * Reading/setting optional parsing features. 496 */ 497 498int xmlGetFeaturesList (int *len, 499 const char **result); 500int xmlGetFeature (xmlParserCtxtPtr ctxt, 501 const char *name, 502 void *result); 503int xmlSetFeature (xmlParserCtxtPtr ctxt, 504 const char *name, 505 void *value); 506 507/* 508 * Interfaces for the Push mode 509 */ 510xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 511 void *user_data, 512 const char *chunk, 513 int size, 514 const char *filename); 515int xmlParseChunk (xmlParserCtxtPtr ctxt, 516 const char *chunk, 517 int size, 518 int terminate); 519 520/* 521 * Special I/O mode 522 */ 523 524xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 525 void *user_data, 526 xmlInputReadCallback ioread, 527 xmlInputCloseCallback ioclose, 528 void *ioctx, 529 xmlCharEncoding enc); 530 531xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 532 xmlParserInputBufferPtr input, 533 xmlCharEncoding enc); 534 535/* 536 * Node infos 537 */ 538const xmlParserNodeInfo* 539 xmlParserFindNodeInfo (const xmlParserCtxt* ctxt, 540 const xmlNode* node); 541void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 542void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 543unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq, 544 const xmlNode* node); 545void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 546 const xmlParserNodeInfo* info); 547 548/* 549 * External entities handling actually implemented in xmlIO 550 */ 551 552void xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 553xmlExternalEntityLoader 554 xmlGetExternalEntityLoader(void); 555xmlParserInputPtr 556 xmlLoadExternalEntity (const char *URL, 557 const char *ID, 558 xmlParserCtxtPtr context); 559 560#ifdef __cplusplus 561} 562#endif 563 564#endif /* __XML_PARSER_H__ */ 565 566