parser.h revision 5e2dace1ca6fbb023d1ce848d4e98deefbbfec31
1/* 2 * parser.h : Interfaces, constants and types related to the XML parser. 3 * 4 * See Copyright for the status of this software. 5 * 6 * daniel@veillard.com 7 */ 8 9#ifndef __XML_PARSER_H__ 10#define __XML_PARSER_H__ 11 12#include <libxml/tree.h> 13#include <libxml/valid.h> 14#include <libxml/xmlIO.h> 15#include <libxml/entities.h> 16 17 18#ifdef __cplusplus 19extern "C" { 20#endif 21 22/** 23 * XML_DEFAULT_VERSION: 24 * 25 * The default version of XML used: 1.0 26 */ 27#define XML_DEFAULT_VERSION "1.0" 28 29/** 30 * xmlParserInput: 31 * 32 * an xmlParserInput is an input flow for the XML processor. 33 * Each entity parsed is associated an xmlParserInput (except the 34 * few predefined ones). This is the case both for internal entities 35 * - in which case the flow is already completely in memory - or 36 * external entities - in which case we use the buf structure for 37 * progressive reading and I18N conversions to the internal UTF-8 format. 38 */ 39 40typedef void (* xmlParserInputDeallocate)(xmlChar *); 41 42typedef struct _xmlParserInput xmlParserInput; 43typedef xmlParserInput *xmlParserInputPtr; 44struct _xmlParserInput { 45 /* Input buffer */ 46 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 47 48 const char *filename; /* The file analyzed, if any */ 49 const char *directory; /* the directory/base of teh file */ 50 const xmlChar *base; /* Base of the array to parse */ 51 const xmlChar *cur; /* Current char being parsed */ 52 const xmlChar *end; /* end of the arry to parse */ 53 int length; /* length if known */ 54 int line; /* Current line */ 55 int col; /* Current column */ 56 int consumed; /* How many xmlChars already consumed */ 57 xmlParserInputDeallocate free; /* function to deallocate the base */ 58 const xmlChar *encoding; /* the encoding string for entity */ 59 const xmlChar *version; /* the version string for entity */ 60 int standalone; /* Was that entity marked standalone */ 61}; 62 63/** 64 * xmlParserNodeInfo: 65 * 66 * the parser can be asked to collect Node informations, i.e. at what 67 * place in the file they were detected. 68 * NOTE: This is off by default and not very well tested. 69 */ 70typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 71typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 72 73struct _xmlParserNodeInfo { 74 const struct _xmlNode* node; 75 /* Position & line # that text that created the node begins & ends on */ 76 unsigned long begin_pos; 77 unsigned long begin_line; 78 unsigned long end_pos; 79 unsigned long end_line; 80}; 81 82typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 83typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 84struct _xmlParserNodeInfoSeq { 85 unsigned long maximum; 86 unsigned long length; 87 xmlParserNodeInfo* buffer; 88}; 89 90/** 91 * xmlParserInputState: 92 * 93 * The parser is now working also as a state based parser 94 * The recursive one use the stagte info for entities processing 95 */ 96typedef enum { 97 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 98 XML_PARSER_START = 0, /* nothing has been parsed */ 99 XML_PARSER_MISC, /* Misc* before int subset */ 100 XML_PARSER_PI, /* Whithin a processing instruction */ 101 XML_PARSER_DTD, /* within some DTD content */ 102 XML_PARSER_PROLOG, /* Misc* after internal subset */ 103 XML_PARSER_COMMENT, /* within a comment */ 104 XML_PARSER_START_TAG, /* within a start tag */ 105 XML_PARSER_CONTENT, /* within the content */ 106 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 107 XML_PARSER_END_TAG, /* within a closing tag */ 108 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 109 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 110 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 111 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 112 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 113 XML_PARSER_IGNORE /* within an IGNORED section */ 114} xmlParserInputState; 115 116/** 117 * XML_DETECT_IDS: 118 * 119 * Bit in the loadsubset context field to tell to do ID/REFs lookups 120 * Use it to initialize xmlLoadExtDtdDefaultValue 121 */ 122#define XML_DETECT_IDS 2 123 124/** 125 * XML_COMPLETE_ATTRS: 126 * 127 * Bit in the loadsubset context field to tell to do complete the 128 * elements attributes lists with the ones defaulted from the DTDs 129 * Use it to initialize xmlLoadExtDtdDefaultValue 130 */ 131#define XML_COMPLETE_ATTRS 4 132 133/** 134 * xmlParserCtxt: 135 * 136 * The parser context. 137 * NOTE This doesn't completely defines the parser state, the (current ?) 138 * design of the parser uses recursive function calls since this allow 139 * and easy mapping from the production rules of the specification 140 * to the actual code. The drawback is that the actual function call 141 * also reflect the parser state. However most of the parsing routines 142 * takes as the only argument the parser context pointer, so migrating 143 * to a state based parser for progressive parsing shouldn't be too hard. 144 */ 145typedef struct _xmlParserCtxt xmlParserCtxt; 146typedef xmlParserCtxt *xmlParserCtxtPtr; 147struct _xmlParserCtxt { 148 struct _xmlSAXHandler *sax; /* The SAX handler */ 149 void *userData; /* For SAX interface only, used by DOM build */ 150 xmlDocPtr myDoc; /* the document being built */ 151 int wellFormed; /* is the document well formed */ 152 int replaceEntities; /* shall we replace entities ? */ 153 const xmlChar *version; /* the XML version string */ 154 const xmlChar *encoding; /* the declared encoding, if any */ 155 int standalone; /* standalone document */ 156 int html; /* an HTML(1)/Docbook(2) document */ 157 158 /* Input stream stack */ 159 xmlParserInputPtr input; /* Current input stream */ 160 int inputNr; /* Number of current input streams */ 161 int inputMax; /* Max number of input streams */ 162 xmlParserInputPtr *inputTab; /* stack of inputs */ 163 164 /* Node analysis stack only used for DOM building */ 165 xmlNodePtr node; /* Current parsed Node */ 166 int nodeNr; /* Depth of the parsing stack */ 167 int nodeMax; /* Max depth of the parsing stack */ 168 xmlNodePtr *nodeTab; /* array of nodes */ 169 170 int record_info; /* Whether node info should be kept */ 171 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 172 173 int errNo; /* error code */ 174 175 int hasExternalSubset; /* reference and external subset */ 176 int hasPErefs; /* the internal subset has PE refs */ 177 int external; /* are we parsing an external entity */ 178 179 int valid; /* is the document valid */ 180 int validate; /* shall we try to validate ? */ 181 xmlValidCtxt vctxt; /* The validity context */ 182 183 xmlParserInputState instate; /* current type of input */ 184 int token; /* next char look-ahead */ 185 186 char *directory; /* the data directory */ 187 188 /* Node name stack */ 189 xmlChar *name; /* Current parsed Node */ 190 int nameNr; /* Depth of the parsing stack */ 191 int nameMax; /* Max depth of the parsing stack */ 192 xmlChar * *nameTab; /* array of nodes */ 193 194 long nbChars; /* number of xmlChar processed */ 195 long checkIndex; /* used by progressive parsing lookup */ 196 int keepBlanks; /* ugly but ... */ 197 int disableSAX; /* SAX callbacks are disabled */ 198 int inSubset; /* Parsing is in int 1/ext 2 subset */ 199 xmlChar * intSubName; /* name of subset */ 200 xmlChar * extSubURI; /* URI of external subset */ 201 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 202 203 /* xml:space values */ 204 int * space; /* Should the parser preserve spaces */ 205 int spaceNr; /* Depth of the parsing stack */ 206 int spaceMax; /* Max depth of the parsing stack */ 207 int * spaceTab; /* array of space infos */ 208 209 int depth; /* to prevent entity substitution loops */ 210 xmlParserInputPtr entity; /* used to check entities boundaries */ 211 int charset; /* encoding of the in-memory content 212 actually an xmlCharEncoding */ 213 int nodelen; /* Those two fields are there to */ 214 int nodemem; /* Speed up large node parsing */ 215 int pedantic; /* signal pedantic warnings */ 216 void *_private; /* For user data, libxml won't touch it */ 217 218 int loadsubset; /* should the external subset be loaded */ 219}; 220 221/** 222 * xmlSAXLocator: 223 * 224 * a SAX Locator. 225 */ 226typedef struct _xmlSAXLocator xmlSAXLocator; 227typedef xmlSAXLocator *xmlSAXLocatorPtr; 228struct _xmlSAXLocator { 229 const xmlChar *(*getPublicId)(void *ctx); 230 const xmlChar *(*getSystemId)(void *ctx); 231 int (*getLineNumber)(void *ctx); 232 int (*getColumnNumber)(void *ctx); 233}; 234 235/** 236 * xmlSAXHandler: 237 * 238 * a SAX handler is bunch of callbacks called by the parser when processing 239 * of the input generate data or structure informations. 240 */ 241 242typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 243 const xmlChar *publicId, const xmlChar *systemId); 244typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, 245 const xmlChar *ExternalID, const xmlChar *SystemID); 246typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name, 247 const xmlChar *ExternalID, const xmlChar *SystemID); 248typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 249 const xmlChar *name); 250typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 251 const xmlChar *name); 252typedef void (*entityDeclSAXFunc) (void *ctx, 253 const xmlChar *name, int type, const xmlChar *publicId, 254 const xmlChar *systemId, xmlChar *content); 255typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name, 256 const xmlChar *publicId, const xmlChar *systemId); 257typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem, 258 const xmlChar *name, int type, int def, 259 const xmlChar *defaultValue, xmlEnumerationPtr tree); 260typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name, 261 int type, xmlElementContentPtr content); 262typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 263 const xmlChar *name, const xmlChar *publicId, 264 const xmlChar *systemId, const xmlChar *notationName); 265typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 266 xmlSAXLocatorPtr loc); 267typedef void (*startDocumentSAXFunc) (void *ctx); 268typedef void (*endDocumentSAXFunc) (void *ctx); 269typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name, 270 const xmlChar **atts); 271typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name); 272typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name, 273 const xmlChar *value); 274typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name); 275typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch, 276 int len); 277typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 278 const xmlChar *ch, int len); 279typedef void (*processingInstructionSAXFunc) (void *ctx, 280 const xmlChar *target, const xmlChar *data); 281typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value); 282typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len); 283typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...); 284typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...); 285typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...); 286typedef int (*isStandaloneSAXFunc) (void *ctx); 287typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 288typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 289 290typedef struct _xmlSAXHandler xmlSAXHandler; 291typedef xmlSAXHandler *xmlSAXHandlerPtr; 292struct _xmlSAXHandler { 293 internalSubsetSAXFunc internalSubset; 294 isStandaloneSAXFunc isStandalone; 295 hasInternalSubsetSAXFunc hasInternalSubset; 296 hasExternalSubsetSAXFunc hasExternalSubset; 297 resolveEntitySAXFunc resolveEntity; 298 getEntitySAXFunc getEntity; 299 entityDeclSAXFunc entityDecl; 300 notationDeclSAXFunc notationDecl; 301 attributeDeclSAXFunc attributeDecl; 302 elementDeclSAXFunc elementDecl; 303 unparsedEntityDeclSAXFunc unparsedEntityDecl; 304 setDocumentLocatorSAXFunc setDocumentLocator; 305 startDocumentSAXFunc startDocument; 306 endDocumentSAXFunc endDocument; 307 startElementSAXFunc startElement; 308 endElementSAXFunc endElement; 309 referenceSAXFunc reference; 310 charactersSAXFunc characters; 311 ignorableWhitespaceSAXFunc ignorableWhitespace; 312 processingInstructionSAXFunc processingInstruction; 313 commentSAXFunc comment; 314 warningSAXFunc warning; 315 errorSAXFunc error; 316 fatalErrorSAXFunc fatalError; 317 getParameterEntitySAXFunc getParameterEntity; 318 cdataBlockSAXFunc cdataBlock; 319 externalSubsetSAXFunc externalSubset; 320}; 321 322/** 323 * xmlExternalEntityLoader: 324 * @URL: The System ID of the resource requested 325 * @ID: The Public ID of the resource requested 326 * @xmlParserCtxtPtr: the XML parser context 327 * 328 * External entity loaders types 329 */ 330typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL, 331 const char *ID, 332 xmlParserCtxtPtr context); 333 334/* 335 * Global variables: just the default SAX interface tables and XML 336 * version infos. 337 */ 338LIBXML_DLL_IMPORT extern const char *xmlParserVersion; 339 340LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator; 341LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler; 342LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler; 343LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler; 344 345/* 346 * entity substitution default behaviour. 347 */ 348 349#ifdef VMS 350/** 351 * xmlSubstituteEntitiesDefaultValue: 352 * 353 * global variable controlling the entity substitution default behaviour 354 */ 355LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultVal; 356#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal 357#else 358LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue; 359#endif 360LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue; 361 362 363/* 364 * Init/Cleanup 365 */ 366void xmlInitParser (void); 367void xmlCleanupParser (void); 368 369/* 370 * Input functions 371 */ 372int xmlParserInputRead (xmlParserInputPtr in, 373 int len); 374int xmlParserInputGrow (xmlParserInputPtr in, 375 int len); 376 377/* 378 * xmlChar handling 379 */ 380xmlChar * xmlStrdup (const xmlChar *cur); 381xmlChar * xmlStrndup (const xmlChar *cur, 382 int len); 383xmlChar * xmlCharStrndup (const char *cur, 384 int len); 385xmlChar * xmlCharStrdup (const char *cur); 386xmlChar * xmlStrsub (const xmlChar *str, 387 int start, 388 int len); 389const xmlChar * xmlStrchr (const xmlChar *str, 390 xmlChar val); 391const xmlChar * xmlStrstr (const xmlChar *str, 392 const xmlChar *val); 393const xmlChar * xmlStrcasestr (const xmlChar *str, 394 xmlChar *val); 395int xmlStrcmp (const xmlChar *str1, 396 const xmlChar *str2); 397int xmlStrncmp (const xmlChar *str1, 398 const xmlChar *str2, 399 int len); 400int xmlStrcasecmp (const xmlChar *str1, 401 const xmlChar *str2); 402int xmlStrncasecmp (const xmlChar *str1, 403 const xmlChar *str2, 404 int len); 405int xmlStrEqual (const xmlChar *str1, 406 const xmlChar *str2); 407int xmlStrlen (const xmlChar *str); 408xmlChar * xmlStrcat (xmlChar *cur, 409 const xmlChar *add); 410xmlChar * xmlStrncat (xmlChar *cur, 411 const xmlChar *add, 412 int len); 413 414/* 415 * Basic parsing Interfaces 416 */ 417xmlDocPtr xmlParseDoc (xmlChar *cur); 418xmlDocPtr xmlParseMemory (char *buffer, 419 int size); 420xmlDocPtr xmlParseFile (const char *filename); 421int xmlSubstituteEntitiesDefault(int val); 422int xmlKeepBlanksDefault (int val); 423void xmlStopParser (xmlParserCtxtPtr ctxt); 424int xmlPedanticParserDefault(int val); 425 426/* 427 * Recovery mode 428 */ 429xmlDocPtr xmlRecoverDoc (xmlChar *cur); 430xmlDocPtr xmlRecoverMemory (char *buffer, 431 int size); 432xmlDocPtr xmlRecoverFile (const char *filename); 433 434/* 435 * Less common routines and SAX interfaces 436 */ 437int xmlParseDocument (xmlParserCtxtPtr ctxt); 438int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 439xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax, 440 xmlChar *cur, 441 int recovery); 442int xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 443 void *user_data, 444 const char *filename); 445int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 446 void *user_data, 447 const char *buffer, 448 int size); 449xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax, 450 char *buffer, 451 int size, 452 int recovery); 453xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax, 454 const char *filename, 455 int recovery); 456xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax, 457 const char *filename); 458xmlDocPtr xmlParseEntity (const char *filename); 459xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, 460 const xmlChar *SystemID); 461xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, 462 const xmlChar *ExternalID, 463 const xmlChar *SystemID); 464xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax, 465 xmlParserInputBufferPtr input, 466 xmlCharEncoding enc); 467int xmlParseBalancedChunkMemory(xmlDocPtr doc, 468 xmlSAXHandlerPtr sax, 469 void *user_data, 470 int depth, 471 const xmlChar *string, 472 xmlNodePtr *list); 473int xmlParseExternalEntity (xmlDocPtr doc, 474 xmlSAXHandlerPtr sax, 475 void *user_data, 476 int depth, 477 const xmlChar *URL, 478 const xmlChar *ID, 479 xmlNodePtr *list); 480int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 481 const xmlChar *URL, 482 const xmlChar *ID, 483 xmlNodePtr *list); 484 485/* 486 * SAX initialization routines 487 */ 488void xmlDefaultSAXHandlerInit(void); 489void htmlDefaultSAXHandlerInit(void); 490 491/* 492 * Parser contexts handling. 493 */ 494void xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 495void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 496void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 497void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 498 const xmlChar* buffer, 499 const char* filename); 500xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); 501 502/* 503 * Reading/setting optional parsing features. 504 */ 505 506int xmlGetFeaturesList (int *len, 507 const char **result); 508int xmlGetFeature (xmlParserCtxtPtr ctxt, 509 const char *name, 510 void *result); 511int xmlSetFeature (xmlParserCtxtPtr ctxt, 512 const char *name, 513 void *value); 514 515/* 516 * Interfaces for the Push mode 517 */ 518xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 519 void *user_data, 520 const char *chunk, 521 int size, 522 const char *filename); 523int xmlParseChunk (xmlParserCtxtPtr ctxt, 524 const char *chunk, 525 int size, 526 int terminate); 527 528/* 529 * Special I/O mode 530 */ 531 532xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 533 void *user_data, 534 xmlInputReadCallback ioread, 535 xmlInputCloseCallback ioclose, 536 void *ioctx, 537 xmlCharEncoding enc); 538 539xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 540 xmlParserInputBufferPtr input, 541 xmlCharEncoding enc); 542 543/* 544 * Node infos 545 */ 546const xmlParserNodeInfo* 547 xmlParserFindNodeInfo (const xmlParserCtxt* ctxt, 548 const xmlNode* node); 549void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 550void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 551unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq, 552 const xmlNode* node); 553void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 554 const xmlParserNodeInfo* info); 555 556/* 557 * External entities handling actually implemented in xmlIO 558 */ 559 560void xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 561xmlExternalEntityLoader 562 xmlGetExternalEntityLoader(void); 563xmlParserInputPtr 564 xmlLoadExternalEntity (const char *URL, 565 const char *ID, 566 xmlParserCtxtPtr context); 567 568#ifdef __cplusplus 569} 570#endif 571 572#endif /* __XML_PARSER_H__ */ 573 574