1/*
2 * Summary: the core parser module
3 * Description: Interfaces, constants and types related to the XML parser
4 *
5 * Copy: See Copyright for the status of this software.
6 *
7 * Author: Daniel Veillard
8 */
9
10#ifndef __XML_PARSER_H__
11#define __XML_PARSER_H__
12
13#include <stdarg.h>
14
15#include <libxml/xmlversion.h>
16#include <libxml/tree.h>
17#include <libxml/dict.h>
18#include <libxml/hash.h>
19#include <libxml/valid.h>
20#include <libxml/entities.h>
21#include <libxml/xmlerror.h>
22#include <libxml/xmlstring.h>
23
24#ifdef __cplusplus
25extern "C" {
26#endif
27
28/**
29 * XML_DEFAULT_VERSION:
30 *
31 * The default version of XML used: 1.0
32 */
33#define XML_DEFAULT_VERSION	"1.0"
34
35/**
36 * xmlParserInput:
37 *
38 * An xmlParserInput is an input flow for the XML processor.
39 * Each entity parsed is associated an xmlParserInput (except the
40 * few predefined ones). This is the case both for internal entities
41 * - in which case the flow is already completely in memory - or
42 * external entities - in which case we use the buf structure for
43 * progressive reading and I18N conversions to the internal UTF-8 format.
44 */
45
46/**
47 * xmlParserInputDeallocate:
48 * @str:  the string to deallocate
49 *
50 * Callback for freeing some parser input allocations.
51 */
52typedef void (* xmlParserInputDeallocate)(xmlChar *str);
53
54struct _xmlParserInput {
55    /* Input buffer */
56    xmlParserInputBufferPtr buf;      /* UTF-8 encoded buffer */
57
58    const char *filename;             /* The file analyzed, if any */
59    const char *directory;            /* the directory/base of the file */
60    const xmlChar *base;              /* Base of the array to parse */
61    const xmlChar *cur;               /* Current char being parsed */
62    const xmlChar *end;               /* end of the array to parse */
63    int length;                       /* length if known */
64    int line;                         /* Current line */
65    int col;                          /* Current column */
66    /*
67     * NOTE: consumed is only tested for equality in the parser code,
68     *       so even if there is an overflow this should not give troubles
69     *       for parsing very large instances.
70     */
71    unsigned long consumed;           /* How many xmlChars already consumed */
72    xmlParserInputDeallocate free;    /* function to deallocate the base */
73    const xmlChar *encoding;          /* the encoding string for entity */
74    const xmlChar *version;           /* the version string for entity */
75    int standalone;                   /* Was that entity marked standalone */
76    int id;                           /* an unique identifier for the entity */
77};
78
79/**
80 * xmlParserNodeInfo:
81 *
82 * The parser can be asked to collect Node informations, i.e. at what
83 * place in the file they were detected.
84 * NOTE: This is off by default and not very well tested.
85 */
86typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
87typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
88
89struct _xmlParserNodeInfo {
90  const struct _xmlNode* node;
91  /* Position & line # that text that created the node begins & ends on */
92  unsigned long begin_pos;
93  unsigned long begin_line;
94  unsigned long end_pos;
95  unsigned long end_line;
96};
97
98typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
99typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
100struct _xmlParserNodeInfoSeq {
101  unsigned long maximum;
102  unsigned long length;
103  xmlParserNodeInfo* buffer;
104};
105
106/**
107 * xmlParserInputState:
108 *
109 * The parser is now working also as a state based parser.
110 * The recursive one use the state info for entities processing.
111 */
112typedef enum {
113    XML_PARSER_EOF = -1,	/* nothing is to be parsed */
114    XML_PARSER_START = 0,	/* nothing has been parsed */
115    XML_PARSER_MISC,		/* Misc* before int subset */
116    XML_PARSER_PI,		/* Within a processing instruction */
117    XML_PARSER_DTD,		/* within some DTD content */
118    XML_PARSER_PROLOG,		/* Misc* after internal subset */
119    XML_PARSER_COMMENT,		/* within a comment */
120    XML_PARSER_START_TAG,	/* within a start tag */
121    XML_PARSER_CONTENT,		/* within the content */
122    XML_PARSER_CDATA_SECTION,	/* within a CDATA section */
123    XML_PARSER_END_TAG,		/* within a closing tag */
124    XML_PARSER_ENTITY_DECL,	/* within an entity declaration */
125    XML_PARSER_ENTITY_VALUE,	/* within an entity value in a decl */
126    XML_PARSER_ATTRIBUTE_VALUE,	/* within an attribute value */
127    XML_PARSER_SYSTEM_LITERAL,	/* within a SYSTEM value */
128    XML_PARSER_EPILOG,		/* the Misc* after the last end tag */
129    XML_PARSER_IGNORE,		/* within an IGNORED section */
130    XML_PARSER_PUBLIC_LITERAL	/* within a PUBLIC value */
131} xmlParserInputState;
132
133/**
134 * XML_DETECT_IDS:
135 *
136 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
137 * Use it to initialize xmlLoadExtDtdDefaultValue.
138 */
139#define XML_DETECT_IDS		2
140
141/**
142 * XML_COMPLETE_ATTRS:
143 *
144 * Bit in the loadsubset context field to tell to do complete the
145 * elements attributes lists with the ones defaulted from the DTDs.
146 * Use it to initialize xmlLoadExtDtdDefaultValue.
147 */
148#define XML_COMPLETE_ATTRS	4
149
150/**
151 * XML_SKIP_IDS:
152 *
153 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
155 */
156#define XML_SKIP_IDS		8
157
158/**
159 * xmlParserMode:
160 *
161 * A parser can operate in various modes
162 */
163typedef enum {
164    XML_PARSE_UNKNOWN = 0,
165    XML_PARSE_DOM = 1,
166    XML_PARSE_SAX = 2,
167    XML_PARSE_PUSH_DOM = 3,
168    XML_PARSE_PUSH_SAX = 4,
169    XML_PARSE_READER = 5
170} xmlParserMode;
171
172/**
173 * xmlParserCtxt:
174 *
175 * The parser context.
176 * NOTE This doesn't completely define the parser state, the (current ?)
177 *      design of the parser uses recursive function calls since this allow
178 *      and easy mapping from the production rules of the specification
179 *      to the actual code. The drawback is that the actual function call
180 *      also reflect the parser state. However most of the parsing routines
181 *      takes as the only argument the parser context pointer, so migrating
182 *      to a state based parser for progressive parsing shouldn't be too hard.
183 */
184struct _xmlParserCtxt {
185    struct _xmlSAXHandler *sax;       /* The SAX handler */
186    void            *userData;        /* For SAX interface only, used by DOM build */
187    xmlDocPtr           myDoc;        /* the document being built */
188    int            wellFormed;        /* is the document well formed */
189    int       replaceEntities;        /* shall we replace entities ? */
190    const xmlChar    *version;        /* the XML version string */
191    const xmlChar   *encoding;        /* the declared encoding, if any */
192    int            standalone;        /* standalone document */
193    int                  html;        /* an HTML(1)/Docbook(2) document
194                                       * 3 is HTML after <head>
195                                       * 10 is HTML after <body>
196                                       */
197
198    /* Input stream stack */
199    xmlParserInputPtr  input;         /* Current input stream */
200    int                inputNr;       /* Number of current input streams */
201    int                inputMax;      /* Max number of input streams */
202    xmlParserInputPtr *inputTab;      /* stack of inputs */
203
204    /* Node analysis stack only used for DOM building */
205    xmlNodePtr         node;          /* Current parsed Node */
206    int                nodeNr;        /* Depth of the parsing stack */
207    int                nodeMax;       /* Max depth of the parsing stack */
208    xmlNodePtr        *nodeTab;       /* array of nodes */
209
210    int record_info;                  /* Whether node info should be kept */
211    xmlParserNodeInfoSeq node_seq;    /* info about each node parsed */
212
213    int errNo;                        /* error code */
214
215    int     hasExternalSubset;        /* reference and external subset */
216    int             hasPErefs;        /* the internal subset has PE refs */
217    int              external;        /* are we parsing an external entity */
218
219    int                 valid;        /* is the document valid */
220    int              validate;        /* shall we try to validate ? */
221    xmlValidCtxt        vctxt;        /* The validity context */
222
223    xmlParserInputState instate;      /* current type of input */
224    int                 token;        /* next char look-ahead */
225
226    char           *directory;        /* the data directory */
227
228    /* Node name stack */
229    const xmlChar     *name;          /* Current parsed Node */
230    int                nameNr;        /* Depth of the parsing stack */
231    int                nameMax;       /* Max depth of the parsing stack */
232    const xmlChar *   *nameTab;       /* array of nodes */
233
234    long               nbChars;       /* number of xmlChar processed */
235    long            checkIndex;       /* used by progressive parsing lookup */
236    int             keepBlanks;       /* ugly but ... */
237    int             disableSAX;       /* SAX callbacks are disabled */
238    int               inSubset;       /* Parsing is in int 1/ext 2 subset */
239    const xmlChar *    intSubName;    /* name of subset */
240    xmlChar *          extSubURI;     /* URI of external subset */
241    xmlChar *          extSubSystem;  /* SYSTEM ID of external subset */
242
243    /* xml:space values */
244    int *              space;         /* Should the parser preserve spaces */
245    int                spaceNr;       /* Depth of the parsing stack */
246    int                spaceMax;      /* Max depth of the parsing stack */
247    int *              spaceTab;      /* array of space infos */
248
249    int                depth;         /* to prevent entity substitution loops */
250    xmlParserInputPtr  entity;        /* used to check entities boundaries */
251    int                charset;       /* encoding of the in-memory content
252				         actually an xmlCharEncoding */
253    int                nodelen;       /* Those two fields are there to */
254    int                nodemem;       /* Speed up large node parsing */
255    int                pedantic;      /* signal pedantic warnings */
256    void              *_private;      /* For user data, libxml won't touch it */
257
258    int                loadsubset;    /* should the external subset be loaded */
259    int                linenumbers;   /* set line number in element content */
260    void              *catalogs;      /* document's own catalog */
261    int                recovery;      /* run in recovery mode */
262    int                progressive;   /* is this a progressive parsing */
263    xmlDictPtr         dict;          /* dictionnary for the parser */
264    const xmlChar *   *atts;          /* array for the attributes callbacks */
265    int                maxatts;       /* the size of the array */
266    int                docdict;       /* use strings from dict to build tree */
267
268    /*
269     * pre-interned strings
270     */
271    const xmlChar *str_xml;
272    const xmlChar *str_xmlns;
273    const xmlChar *str_xml_ns;
274
275    /*
276     * Everything below is used only by the new SAX mode
277     */
278    int                sax2;          /* operating in the new SAX mode */
279    int                nsNr;          /* the number of inherited namespaces */
280    int                nsMax;         /* the size of the arrays */
281    const xmlChar *   *nsTab;         /* the array of prefix/namespace name */
282    int               *attallocs;     /* which attribute were allocated */
283    void *            *pushTab;       /* array of data for push */
284    xmlHashTablePtr    attsDefault;   /* defaulted attributes if any */
285    xmlHashTablePtr    attsSpecial;   /* non-CDATA attributes if any */
286    int                nsWellFormed;  /* is the document XML Nanespace okay */
287    int                options;       /* Extra options */
288
289    /*
290     * Those fields are needed only for treaming parsing so far
291     */
292    int               dictNames;    /* Use dictionary names for the tree */
293    int               freeElemsNr;  /* number of freed element nodes */
294    xmlNodePtr        freeElems;    /* List of freed element nodes */
295    int               freeAttrsNr;  /* number of freed attributes nodes */
296    xmlAttrPtr        freeAttrs;    /* List of freed attributes nodes */
297
298    /*
299     * the complete error informations for the last error.
300     */
301    xmlError          lastError;
302    xmlParserMode     parseMode;    /* the parser mode */
303    unsigned long    nbentities;    /* number of entities references */
304    unsigned long  sizeentities;    /* size of parsed entities */
305
306    /* for use by HTML non-recursive parser */
307    xmlParserNodeInfo *nodeInfo;      /* Current NodeInfo */
308    int                nodeInfoNr;    /* Depth of the parsing stack */
309    int                nodeInfoMax;   /* Max depth of the parsing stack */
310    xmlParserNodeInfo *nodeInfoTab;   /* array of nodeInfos */
311
312    int                input_id;      /* we need to label inputs */
313    unsigned long      sizeentcopy;   /* volume of entity copy */
314};
315
316/**
317 * xmlSAXLocator:
318 *
319 * A SAX Locator.
320 */
321struct _xmlSAXLocator {
322    const xmlChar *(*getPublicId)(void *ctx);
323    const xmlChar *(*getSystemId)(void *ctx);
324    int (*getLineNumber)(void *ctx);
325    int (*getColumnNumber)(void *ctx);
326};
327
328/**
329 * xmlSAXHandler:
330 *
331 * A SAX handler is bunch of callbacks called by the parser when processing
332 * of the input generate data or structure informations.
333 */
334
335/**
336 * resolveEntitySAXFunc:
337 * @ctx:  the user data (XML parser context)
338 * @publicId: The public ID of the entity
339 * @systemId: The system ID of the entity
340 *
341 * Callback:
342 * The entity loader, to control the loading of external entities,
343 * the application can either:
344 *    - override this resolveEntity() callback in the SAX block
345 *    - or better use the xmlSetExternalEntityLoader() function to
346 *      set up it's own entity resolution routine
347 *
348 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
349 */
350typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
351				const xmlChar *publicId,
352				const xmlChar *systemId);
353/**
354 * internalSubsetSAXFunc:
355 * @ctx:  the user data (XML parser context)
356 * @name:  the root element name
357 * @ExternalID:  the external ID
358 * @SystemID:  the SYSTEM ID (e.g. filename or URL)
359 *
360 * Callback on internal subset declaration.
361 */
362typedef void (*internalSubsetSAXFunc) (void *ctx,
363				const xmlChar *name,
364				const xmlChar *ExternalID,
365				const xmlChar *SystemID);
366/**
367 * externalSubsetSAXFunc:
368 * @ctx:  the user data (XML parser context)
369 * @name:  the root element name
370 * @ExternalID:  the external ID
371 * @SystemID:  the SYSTEM ID (e.g. filename or URL)
372 *
373 * Callback on external subset declaration.
374 */
375typedef void (*externalSubsetSAXFunc) (void *ctx,
376				const xmlChar *name,
377				const xmlChar *ExternalID,
378				const xmlChar *SystemID);
379/**
380 * getEntitySAXFunc:
381 * @ctx:  the user data (XML parser context)
382 * @name: The entity name
383 *
384 * Get an entity by name.
385 *
386 * Returns the xmlEntityPtr if found.
387 */
388typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
389				const xmlChar *name);
390/**
391 * getParameterEntitySAXFunc:
392 * @ctx:  the user data (XML parser context)
393 * @name: The entity name
394 *
395 * Get a parameter entity by name.
396 *
397 * Returns the xmlEntityPtr if found.
398 */
399typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
400				const xmlChar *name);
401/**
402 * entityDeclSAXFunc:
403 * @ctx:  the user data (XML parser context)
404 * @name:  the entity name
405 * @type:  the entity type
406 * @publicId: The public ID of the entity
407 * @systemId: The system ID of the entity
408 * @content: the entity value (without processing).
409 *
410 * An entity definition has been parsed.
411 */
412typedef void (*entityDeclSAXFunc) (void *ctx,
413				const xmlChar *name,
414				int type,
415				const xmlChar *publicId,
416				const xmlChar *systemId,
417				xmlChar *content);
418/**
419 * notationDeclSAXFunc:
420 * @ctx:  the user data (XML parser context)
421 * @name: The name of the notation
422 * @publicId: The public ID of the entity
423 * @systemId: The system ID of the entity
424 *
425 * What to do when a notation declaration has been parsed.
426 */
427typedef void (*notationDeclSAXFunc)(void *ctx,
428				const xmlChar *name,
429				const xmlChar *publicId,
430				const xmlChar *systemId);
431/**
432 * attributeDeclSAXFunc:
433 * @ctx:  the user data (XML parser context)
434 * @elem:  the name of the element
435 * @fullname:  the attribute name
436 * @type:  the attribute type
437 * @def:  the type of default value
438 * @defaultValue: the attribute default value
439 * @tree:  the tree of enumerated value set
440 *
441 * An attribute definition has been parsed.
442 */
443typedef void (*attributeDeclSAXFunc)(void *ctx,
444				const xmlChar *elem,
445				const xmlChar *fullname,
446				int type,
447				int def,
448				const xmlChar *defaultValue,
449				xmlEnumerationPtr tree);
450/**
451 * elementDeclSAXFunc:
452 * @ctx:  the user data (XML parser context)
453 * @name:  the element name
454 * @type:  the element type
455 * @content: the element value tree
456 *
457 * An element definition has been parsed.
458 */
459typedef void (*elementDeclSAXFunc)(void *ctx,
460				const xmlChar *name,
461				int type,
462				xmlElementContentPtr content);
463/**
464 * unparsedEntityDeclSAXFunc:
465 * @ctx:  the user data (XML parser context)
466 * @name: The name of the entity
467 * @publicId: The public ID of the entity
468 * @systemId: The system ID of the entity
469 * @notationName: the name of the notation
470 *
471 * What to do when an unparsed entity declaration is parsed.
472 */
473typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
474				const xmlChar *name,
475				const xmlChar *publicId,
476				const xmlChar *systemId,
477				const xmlChar *notationName);
478/**
479 * setDocumentLocatorSAXFunc:
480 * @ctx:  the user data (XML parser context)
481 * @loc: A SAX Locator
482 *
483 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
484 * Everything is available on the context, so this is useless in our case.
485 */
486typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
487				xmlSAXLocatorPtr loc);
488/**
489 * startDocumentSAXFunc:
490 * @ctx:  the user data (XML parser context)
491 *
492 * Called when the document start being processed.
493 */
494typedef void (*startDocumentSAXFunc) (void *ctx);
495/**
496 * endDocumentSAXFunc:
497 * @ctx:  the user data (XML parser context)
498 *
499 * Called when the document end has been detected.
500 */
501typedef void (*endDocumentSAXFunc) (void *ctx);
502/**
503 * startElementSAXFunc:
504 * @ctx:  the user data (XML parser context)
505 * @name:  The element name, including namespace prefix
506 * @atts:  An array of name/value attributes pairs, NULL terminated
507 *
508 * Called when an opening tag has been processed.
509 */
510typedef void (*startElementSAXFunc) (void *ctx,
511				const xmlChar *name,
512				const xmlChar **atts);
513/**
514 * endElementSAXFunc:
515 * @ctx:  the user data (XML parser context)
516 * @name:  The element name
517 *
518 * Called when the end of an element has been detected.
519 */
520typedef void (*endElementSAXFunc) (void *ctx,
521				const xmlChar *name);
522/**
523 * attributeSAXFunc:
524 * @ctx:  the user data (XML parser context)
525 * @name:  The attribute name, including namespace prefix
526 * @value:  The attribute value
527 *
528 * Handle an attribute that has been read by the parser.
529 * The default handling is to convert the attribute into an
530 * DOM subtree and past it in a new xmlAttr element added to
531 * the element.
532 */
533typedef void (*attributeSAXFunc) (void *ctx,
534				const xmlChar *name,
535				const xmlChar *value);
536/**
537 * referenceSAXFunc:
538 * @ctx:  the user data (XML parser context)
539 * @name:  The entity name
540 *
541 * Called when an entity reference is detected.
542 */
543typedef void (*referenceSAXFunc) (void *ctx,
544				const xmlChar *name);
545/**
546 * charactersSAXFunc:
547 * @ctx:  the user data (XML parser context)
548 * @ch:  a xmlChar string
549 * @len: the number of xmlChar
550 *
551 * Receiving some chars from the parser.
552 */
553typedef void (*charactersSAXFunc) (void *ctx,
554				const xmlChar *ch,
555				int len);
556/**
557 * ignorableWhitespaceSAXFunc:
558 * @ctx:  the user data (XML parser context)
559 * @ch:  a xmlChar string
560 * @len: the number of xmlChar
561 *
562 * Receiving some ignorable whitespaces from the parser.
563 * UNUSED: by default the DOM building will use characters.
564 */
565typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
566				const xmlChar *ch,
567				int len);
568/**
569 * processingInstructionSAXFunc:
570 * @ctx:  the user data (XML parser context)
571 * @target:  the target name
572 * @data: the PI data's
573 *
574 * A processing instruction has been parsed.
575 */
576typedef void (*processingInstructionSAXFunc) (void *ctx,
577				const xmlChar *target,
578				const xmlChar *data);
579/**
580 * commentSAXFunc:
581 * @ctx:  the user data (XML parser context)
582 * @value:  the comment content
583 *
584 * A comment has been parsed.
585 */
586typedef void (*commentSAXFunc) (void *ctx,
587				const xmlChar *value);
588/**
589 * cdataBlockSAXFunc:
590 * @ctx:  the user data (XML parser context)
591 * @value:  The pcdata content
592 * @len:  the block length
593 *
594 * Called when a pcdata block has been parsed.
595 */
596typedef void (*cdataBlockSAXFunc) (
597	                        void *ctx,
598				const xmlChar *value,
599				int len);
600/**
601 * warningSAXFunc:
602 * @ctx:  an XML parser context
603 * @msg:  the message to display/transmit
604 * @...:  extra parameters for the message display
605 *
606 * Display and format a warning messages, callback.
607 */
608typedef void (XMLCDECL *warningSAXFunc) (void *ctx,
609				const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
610/**
611 * errorSAXFunc:
612 * @ctx:  an XML parser context
613 * @msg:  the message to display/transmit
614 * @...:  extra parameters for the message display
615 *
616 * Display and format an error messages, callback.
617 */
618typedef void (XMLCDECL *errorSAXFunc) (void *ctx,
619				const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
620/**
621 * fatalErrorSAXFunc:
622 * @ctx:  an XML parser context
623 * @msg:  the message to display/transmit
624 * @...:  extra parameters for the message display
625 *
626 * Display and format fatal error messages, callback.
627 * Note: so far fatalError() SAX callbacks are not used, error()
628 *       get all the callbacks for errors.
629 */
630typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx,
631				const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
632/**
633 * isStandaloneSAXFunc:
634 * @ctx:  the user data (XML parser context)
635 *
636 * Is this document tagged standalone?
637 *
638 * Returns 1 if true
639 */
640typedef int (*isStandaloneSAXFunc) (void *ctx);
641/**
642 * hasInternalSubsetSAXFunc:
643 * @ctx:  the user data (XML parser context)
644 *
645 * Does this document has an internal subset.
646 *
647 * Returns 1 if true
648 */
649typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
650
651/**
652 * hasExternalSubsetSAXFunc:
653 * @ctx:  the user data (XML parser context)
654 *
655 * Does this document has an external subset?
656 *
657 * Returns 1 if true
658 */
659typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
660
661/************************************************************************
662 *									*
663 *			The SAX version 2 API extensions		*
664 *									*
665 ************************************************************************/
666/**
667 * XML_SAX2_MAGIC:
668 *
669 * Special constant found in SAX2 blocks initialized fields
670 */
671#define XML_SAX2_MAGIC 0xDEEDBEAF
672
673/**
674 * startElementNsSAX2Func:
675 * @ctx:  the user data (XML parser context)
676 * @localname:  the local name of the element
677 * @prefix:  the element namespace prefix if available
678 * @URI:  the element namespace name if available
679 * @nb_namespaces:  number of namespace definitions on that node
680 * @namespaces:  pointer to the array of prefix/URI pairs namespace definitions
681 * @nb_attributes:  the number of attributes on that node
682 * @nb_defaulted:  the number of defaulted attributes. The defaulted
683 *                  ones are at the end of the array
684 * @attributes:  pointer to the array of (localname/prefix/URI/value/end)
685 *               attribute values.
686 *
687 * SAX2 callback when an element start has been detected by the parser.
688 * It provides the namespace informations for the element, as well as
689 * the new namespace declarations on the element.
690 */
691
692typedef void (*startElementNsSAX2Func) (void *ctx,
693					const xmlChar *localname,
694					const xmlChar *prefix,
695					const xmlChar *URI,
696					int nb_namespaces,
697					const xmlChar **namespaces,
698					int nb_attributes,
699					int nb_defaulted,
700					const xmlChar **attributes);
701
702/**
703 * endElementNsSAX2Func:
704 * @ctx:  the user data (XML parser context)
705 * @localname:  the local name of the element
706 * @prefix:  the element namespace prefix if available
707 * @URI:  the element namespace name if available
708 *
709 * SAX2 callback when an element end has been detected by the parser.
710 * It provides the namespace informations for the element.
711 */
712
713typedef void (*endElementNsSAX2Func)   (void *ctx,
714					const xmlChar *localname,
715					const xmlChar *prefix,
716					const xmlChar *URI);
717
718
719struct _xmlSAXHandler {
720    internalSubsetSAXFunc internalSubset;
721    isStandaloneSAXFunc isStandalone;
722    hasInternalSubsetSAXFunc hasInternalSubset;
723    hasExternalSubsetSAXFunc hasExternalSubset;
724    resolveEntitySAXFunc resolveEntity;
725    getEntitySAXFunc getEntity;
726    entityDeclSAXFunc entityDecl;
727    notationDeclSAXFunc notationDecl;
728    attributeDeclSAXFunc attributeDecl;
729    elementDeclSAXFunc elementDecl;
730    unparsedEntityDeclSAXFunc unparsedEntityDecl;
731    setDocumentLocatorSAXFunc setDocumentLocator;
732    startDocumentSAXFunc startDocument;
733    endDocumentSAXFunc endDocument;
734    startElementSAXFunc startElement;
735    endElementSAXFunc endElement;
736    referenceSAXFunc reference;
737    charactersSAXFunc characters;
738    ignorableWhitespaceSAXFunc ignorableWhitespace;
739    processingInstructionSAXFunc processingInstruction;
740    commentSAXFunc comment;
741    warningSAXFunc warning;
742    errorSAXFunc error;
743    fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
744    getParameterEntitySAXFunc getParameterEntity;
745    cdataBlockSAXFunc cdataBlock;
746    externalSubsetSAXFunc externalSubset;
747    unsigned int initialized;
748    /* The following fields are extensions available only on version 2 */
749    void *_private;
750    startElementNsSAX2Func startElementNs;
751    endElementNsSAX2Func endElementNs;
752    xmlStructuredErrorFunc serror;
753};
754
755/*
756 * SAX Version 1
757 */
758typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1;
759typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;
760struct _xmlSAXHandlerV1 {
761    internalSubsetSAXFunc internalSubset;
762    isStandaloneSAXFunc isStandalone;
763    hasInternalSubsetSAXFunc hasInternalSubset;
764    hasExternalSubsetSAXFunc hasExternalSubset;
765    resolveEntitySAXFunc resolveEntity;
766    getEntitySAXFunc getEntity;
767    entityDeclSAXFunc entityDecl;
768    notationDeclSAXFunc notationDecl;
769    attributeDeclSAXFunc attributeDecl;
770    elementDeclSAXFunc elementDecl;
771    unparsedEntityDeclSAXFunc unparsedEntityDecl;
772    setDocumentLocatorSAXFunc setDocumentLocator;
773    startDocumentSAXFunc startDocument;
774    endDocumentSAXFunc endDocument;
775    startElementSAXFunc startElement;
776    endElementSAXFunc endElement;
777    referenceSAXFunc reference;
778    charactersSAXFunc characters;
779    ignorableWhitespaceSAXFunc ignorableWhitespace;
780    processingInstructionSAXFunc processingInstruction;
781    commentSAXFunc comment;
782    warningSAXFunc warning;
783    errorSAXFunc error;
784    fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
785    getParameterEntitySAXFunc getParameterEntity;
786    cdataBlockSAXFunc cdataBlock;
787    externalSubsetSAXFunc externalSubset;
788    unsigned int initialized;
789};
790
791
792/**
793 * xmlExternalEntityLoader:
794 * @URL: The System ID of the resource requested
795 * @ID: The Public ID of the resource requested
796 * @context: the XML parser context
797 *
798 * External entity loaders types.
799 *
800 * Returns the entity input parser.
801 */
802typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
803					 const char *ID,
804					 xmlParserCtxtPtr context);
805
806#ifdef __cplusplus
807}
808#endif
809
810#include <libxml/encoding.h>
811#include <libxml/xmlIO.h>
812#include <libxml/globals.h>
813
814#ifdef __cplusplus
815extern "C" {
816#endif
817
818
819/*
820 * Init/Cleanup
821 */
822XMLPUBFUN void XMLCALL
823		xmlInitParser		(void);
824XMLPUBFUN void XMLCALL
825		xmlCleanupParser	(void);
826
827/*
828 * Input functions
829 */
830XMLPUBFUN int XMLCALL
831		xmlParserInputRead	(xmlParserInputPtr in,
832					 int len);
833XMLPUBFUN int XMLCALL
834		xmlParserInputGrow	(xmlParserInputPtr in,
835					 int len);
836
837/*
838 * Basic parsing Interfaces
839 */
840#ifdef LIBXML_SAX1_ENABLED
841XMLPUBFUN xmlDocPtr XMLCALL
842		xmlParseDoc		(const xmlChar *cur);
843XMLPUBFUN xmlDocPtr XMLCALL
844		xmlParseFile		(const char *filename);
845XMLPUBFUN xmlDocPtr XMLCALL
846		xmlParseMemory		(const char *buffer,
847					 int size);
848#endif /* LIBXML_SAX1_ENABLED */
849XMLPUBFUN int XMLCALL
850		xmlSubstituteEntitiesDefault(int val);
851XMLPUBFUN int XMLCALL
852		xmlKeepBlanksDefault	(int val);
853XMLPUBFUN void XMLCALL
854		xmlStopParser		(xmlParserCtxtPtr ctxt);
855XMLPUBFUN int XMLCALL
856		xmlPedanticParserDefault(int val);
857XMLPUBFUN int XMLCALL
858		xmlLineNumbersDefault	(int val);
859
860#ifdef LIBXML_SAX1_ENABLED
861/*
862 * Recovery mode
863 */
864XMLPUBFUN xmlDocPtr XMLCALL
865		xmlRecoverDoc		(const xmlChar *cur);
866XMLPUBFUN xmlDocPtr XMLCALL
867		xmlRecoverMemory	(const char *buffer,
868					 int size);
869XMLPUBFUN xmlDocPtr XMLCALL
870		xmlRecoverFile		(const char *filename);
871#endif /* LIBXML_SAX1_ENABLED */
872
873/*
874 * Less common routines and SAX interfaces
875 */
876XMLPUBFUN int XMLCALL
877		xmlParseDocument	(xmlParserCtxtPtr ctxt);
878XMLPUBFUN int XMLCALL
879		xmlParseExtParsedEnt	(xmlParserCtxtPtr ctxt);
880#ifdef LIBXML_SAX1_ENABLED
881XMLPUBFUN int XMLCALL
882		xmlSAXUserParseFile	(xmlSAXHandlerPtr sax,
883					 void *user_data,
884					 const char *filename);
885XMLPUBFUN int XMLCALL
886		xmlSAXUserParseMemory	(xmlSAXHandlerPtr sax,
887					 void *user_data,
888					 const char *buffer,
889					 int size);
890XMLPUBFUN xmlDocPtr XMLCALL
891		xmlSAXParseDoc		(xmlSAXHandlerPtr sax,
892					 const xmlChar *cur,
893					 int recovery);
894XMLPUBFUN xmlDocPtr XMLCALL
895		xmlSAXParseMemory	(xmlSAXHandlerPtr sax,
896					 const char *buffer,
897					 int size,
898					 int recovery);
899XMLPUBFUN xmlDocPtr XMLCALL
900		xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
901					 const char *buffer,
902					 int size,
903					 int recovery,
904					 void *data);
905XMLPUBFUN xmlDocPtr XMLCALL
906		xmlSAXParseFile		(xmlSAXHandlerPtr sax,
907					 const char *filename,
908					 int recovery);
909XMLPUBFUN xmlDocPtr XMLCALL
910		xmlSAXParseFileWithData	(xmlSAXHandlerPtr sax,
911					 const char *filename,
912					 int recovery,
913					 void *data);
914XMLPUBFUN xmlDocPtr XMLCALL
915		xmlSAXParseEntity	(xmlSAXHandlerPtr sax,
916					 const char *filename);
917XMLPUBFUN xmlDocPtr XMLCALL
918		xmlParseEntity		(const char *filename);
919#endif /* LIBXML_SAX1_ENABLED */
920
921#ifdef LIBXML_VALID_ENABLED
922XMLPUBFUN xmlDtdPtr XMLCALL
923		xmlSAXParseDTD		(xmlSAXHandlerPtr sax,
924					 const xmlChar *ExternalID,
925					 const xmlChar *SystemID);
926XMLPUBFUN xmlDtdPtr XMLCALL
927		xmlParseDTD		(const xmlChar *ExternalID,
928					 const xmlChar *SystemID);
929XMLPUBFUN xmlDtdPtr XMLCALL
930		xmlIOParseDTD		(xmlSAXHandlerPtr sax,
931					 xmlParserInputBufferPtr input,
932					 xmlCharEncoding enc);
933#endif /* LIBXML_VALID_ENABLE */
934#ifdef LIBXML_SAX1_ENABLED
935XMLPUBFUN int XMLCALL
936		xmlParseBalancedChunkMemory(xmlDocPtr doc,
937					 xmlSAXHandlerPtr sax,
938					 void *user_data,
939					 int depth,
940					 const xmlChar *string,
941					 xmlNodePtr *lst);
942#endif /* LIBXML_SAX1_ENABLED */
943XMLPUBFUN xmlParserErrors XMLCALL
944		xmlParseInNodeContext	(xmlNodePtr node,
945					 const char *data,
946					 int datalen,
947					 int options,
948					 xmlNodePtr *lst);
949#ifdef LIBXML_SAX1_ENABLED
950XMLPUBFUN int XMLCALL
951		xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
952                     xmlSAXHandlerPtr sax,
953                     void *user_data,
954                     int depth,
955                     const xmlChar *string,
956                     xmlNodePtr *lst,
957                     int recover);
958XMLPUBFUN int XMLCALL
959		xmlParseExternalEntity	(xmlDocPtr doc,
960					 xmlSAXHandlerPtr sax,
961					 void *user_data,
962					 int depth,
963					 const xmlChar *URL,
964					 const xmlChar *ID,
965					 xmlNodePtr *lst);
966#endif /* LIBXML_SAX1_ENABLED */
967XMLPUBFUN int XMLCALL
968		xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
969					 const xmlChar *URL,
970					 const xmlChar *ID,
971					 xmlNodePtr *lst);
972
973/*
974 * Parser contexts handling.
975 */
976XMLPUBFUN xmlParserCtxtPtr XMLCALL
977		xmlNewParserCtxt	(void);
978XMLPUBFUN int XMLCALL
979		xmlInitParserCtxt	(xmlParserCtxtPtr ctxt);
980XMLPUBFUN void XMLCALL
981		xmlClearParserCtxt	(xmlParserCtxtPtr ctxt);
982XMLPUBFUN void XMLCALL
983		xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
984#ifdef LIBXML_SAX1_ENABLED
985XMLPUBFUN void XMLCALL
986		xmlSetupParserForBuffer	(xmlParserCtxtPtr ctxt,
987					 const xmlChar* buffer,
988					 const char *filename);
989#endif /* LIBXML_SAX1_ENABLED */
990XMLPUBFUN xmlParserCtxtPtr XMLCALL
991		xmlCreateDocParserCtxt	(const xmlChar *cur);
992
993#ifdef LIBXML_LEGACY_ENABLED
994/*
995 * Reading/setting optional parsing features.
996 */
997XMLPUBFUN int XMLCALL
998		xmlGetFeaturesList	(int *len,
999					 const char **result);
1000XMLPUBFUN int XMLCALL
1001		xmlGetFeature		(xmlParserCtxtPtr ctxt,
1002					 const char *name,
1003					 void *result);
1004XMLPUBFUN int XMLCALL
1005		xmlSetFeature		(xmlParserCtxtPtr ctxt,
1006					 const char *name,
1007					 void *value);
1008#endif /* LIBXML_LEGACY_ENABLED */
1009
1010#ifdef LIBXML_PUSH_ENABLED
1011/*
1012 * Interfaces for the Push mode.
1013 */
1014XMLPUBFUN xmlParserCtxtPtr XMLCALL
1015		xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
1016					 void *user_data,
1017					 const char *chunk,
1018					 int size,
1019					 const char *filename);
1020XMLPUBFUN int XMLCALL
1021		xmlParseChunk		(xmlParserCtxtPtr ctxt,
1022					 const char *chunk,
1023					 int size,
1024					 int terminate);
1025#endif /* LIBXML_PUSH_ENABLED */
1026
1027/*
1028 * Special I/O mode.
1029 */
1030
1031XMLPUBFUN xmlParserCtxtPtr XMLCALL
1032		xmlCreateIOParserCtxt	(xmlSAXHandlerPtr sax,
1033					 void *user_data,
1034					 xmlInputReadCallback   ioread,
1035					 xmlInputCloseCallback  ioclose,
1036					 void *ioctx,
1037					 xmlCharEncoding enc);
1038
1039XMLPUBFUN xmlParserInputPtr XMLCALL
1040		xmlNewIOInputStream	(xmlParserCtxtPtr ctxt,
1041					 xmlParserInputBufferPtr input,
1042					 xmlCharEncoding enc);
1043
1044/*
1045 * Node infos.
1046 */
1047XMLPUBFUN const xmlParserNodeInfo* XMLCALL
1048		xmlParserFindNodeInfo	(const xmlParserCtxtPtr ctxt,
1049				         const xmlNodePtr node);
1050XMLPUBFUN void XMLCALL
1051		xmlInitNodeInfoSeq	(xmlParserNodeInfoSeqPtr seq);
1052XMLPUBFUN void XMLCALL
1053		xmlClearNodeInfoSeq	(xmlParserNodeInfoSeqPtr seq);
1054XMLPUBFUN unsigned long XMLCALL
1055		xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1056                                         const xmlNodePtr node);
1057XMLPUBFUN void XMLCALL
1058		xmlParserAddNodeInfo	(xmlParserCtxtPtr ctxt,
1059					 const xmlParserNodeInfoPtr info);
1060
1061/*
1062 * External entities handling actually implemented in xmlIO.
1063 */
1064
1065XMLPUBFUN void XMLCALL
1066		xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
1067XMLPUBFUN xmlExternalEntityLoader XMLCALL
1068		xmlGetExternalEntityLoader(void);
1069XMLPUBFUN xmlParserInputPtr XMLCALL
1070		xmlLoadExternalEntity	(const char *URL,
1071					 const char *ID,
1072					 xmlParserCtxtPtr ctxt);
1073
1074/*
1075 * Index lookup, actually implemented in the encoding module
1076 */
1077XMLPUBFUN long XMLCALL
1078		xmlByteConsumed		(xmlParserCtxtPtr ctxt);
1079
1080/*
1081 * New set of simpler/more flexible APIs
1082 */
1083/**
1084 * xmlParserOption:
1085 *
1086 * This is the set of XML parser options that can be passed down
1087 * to the xmlReadDoc() and similar calls.
1088 */
1089typedef enum {
1090    XML_PARSE_RECOVER	= 1<<0,	/* recover on errors */
1091    XML_PARSE_NOENT	= 1<<1,	/* substitute entities */
1092    XML_PARSE_DTDLOAD	= 1<<2,	/* load the external subset */
1093    XML_PARSE_DTDATTR	= 1<<3,	/* default DTD attributes */
1094    XML_PARSE_DTDVALID	= 1<<4,	/* validate with the DTD */
1095    XML_PARSE_NOERROR	= 1<<5,	/* suppress error reports */
1096    XML_PARSE_NOWARNING	= 1<<6,	/* suppress warning reports */
1097    XML_PARSE_PEDANTIC	= 1<<7,	/* pedantic error reporting */
1098    XML_PARSE_NOBLANKS	= 1<<8,	/* remove blank nodes */
1099    XML_PARSE_SAX1	= 1<<9,	/* use the SAX1 interface internally */
1100    XML_PARSE_XINCLUDE	= 1<<10,/* Implement XInclude substitition  */
1101    XML_PARSE_NONET	= 1<<11,/* Forbid network access */
1102    XML_PARSE_NODICT	= 1<<12,/* Do not reuse the context dictionnary */
1103    XML_PARSE_NSCLEAN	= 1<<13,/* remove redundant namespaces declarations */
1104    XML_PARSE_NOCDATA	= 1<<14,/* merge CDATA as text nodes */
1105    XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
1106    XML_PARSE_COMPACT   = 1<<16,/* compact small text nodes; no modification of
1107                                   the tree allowed afterwards (will possibly
1108				   crash if you try to modify the tree) */
1109    XML_PARSE_OLD10	= 1<<17,/* parse using XML-1.0 before update 5 */
1110    XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */
1111    XML_PARSE_HUGE      = 1<<19,/* relax any hardcoded limit from the parser */
1112    XML_PARSE_OLDSAX    = 1<<20,/* parse using SAX2 interface before 2.7.0 */
1113    XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */
1114    XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */
1115} xmlParserOption;
1116
1117XMLPUBFUN void XMLCALL
1118		xmlCtxtReset		(xmlParserCtxtPtr ctxt);
1119XMLPUBFUN int XMLCALL
1120		xmlCtxtResetPush	(xmlParserCtxtPtr ctxt,
1121					 const char *chunk,
1122					 int size,
1123					 const char *filename,
1124					 const char *encoding);
1125XMLPUBFUN int XMLCALL
1126		xmlCtxtUseOptions	(xmlParserCtxtPtr ctxt,
1127					 int options);
1128XMLPUBFUN xmlDocPtr XMLCALL
1129		xmlReadDoc		(const xmlChar *cur,
1130					 const char *URL,
1131					 const char *encoding,
1132					 int options);
1133XMLPUBFUN xmlDocPtr XMLCALL
1134		xmlReadFile		(const char *URL,
1135					 const char *encoding,
1136					 int options);
1137XMLPUBFUN xmlDocPtr XMLCALL
1138		xmlReadMemory		(const char *buffer,
1139					 int size,
1140					 const char *URL,
1141					 const char *encoding,
1142					 int options);
1143XMLPUBFUN xmlDocPtr XMLCALL
1144		xmlReadFd		(int fd,
1145					 const char *URL,
1146					 const char *encoding,
1147					 int options);
1148XMLPUBFUN xmlDocPtr XMLCALL
1149		xmlReadIO		(xmlInputReadCallback ioread,
1150					 xmlInputCloseCallback ioclose,
1151					 void *ioctx,
1152					 const char *URL,
1153					 const char *encoding,
1154					 int options);
1155XMLPUBFUN xmlDocPtr XMLCALL
1156		xmlCtxtReadDoc		(xmlParserCtxtPtr ctxt,
1157					 const xmlChar *cur,
1158					 const char *URL,
1159					 const char *encoding,
1160					 int options);
1161XMLPUBFUN xmlDocPtr XMLCALL
1162		xmlCtxtReadFile		(xmlParserCtxtPtr ctxt,
1163					 const char *filename,
1164					 const char *encoding,
1165					 int options);
1166XMLPUBFUN xmlDocPtr XMLCALL
1167		xmlCtxtReadMemory		(xmlParserCtxtPtr ctxt,
1168					 const char *buffer,
1169					 int size,
1170					 const char *URL,
1171					 const char *encoding,
1172					 int options);
1173XMLPUBFUN xmlDocPtr XMLCALL
1174		xmlCtxtReadFd		(xmlParserCtxtPtr ctxt,
1175					 int fd,
1176					 const char *URL,
1177					 const char *encoding,
1178					 int options);
1179XMLPUBFUN xmlDocPtr XMLCALL
1180		xmlCtxtReadIO		(xmlParserCtxtPtr ctxt,
1181					 xmlInputReadCallback ioread,
1182					 xmlInputCloseCallback ioclose,
1183					 void *ioctx,
1184					 const char *URL,
1185					 const char *encoding,
1186					 int options);
1187
1188/*
1189 * Library wide options
1190 */
1191/**
1192 * xmlFeature:
1193 *
1194 * Used to examine the existance of features that can be enabled
1195 * or disabled at compile-time.
1196 * They used to be called XML_FEATURE_xxx but this clashed with Expat
1197 */
1198typedef enum {
1199    XML_WITH_THREAD = 1,
1200    XML_WITH_TREE = 2,
1201    XML_WITH_OUTPUT = 3,
1202    XML_WITH_PUSH = 4,
1203    XML_WITH_READER = 5,
1204    XML_WITH_PATTERN = 6,
1205    XML_WITH_WRITER = 7,
1206    XML_WITH_SAX1 = 8,
1207    XML_WITH_FTP = 9,
1208    XML_WITH_HTTP = 10,
1209    XML_WITH_VALID = 11,
1210    XML_WITH_HTML = 12,
1211    XML_WITH_LEGACY = 13,
1212    XML_WITH_C14N = 14,
1213    XML_WITH_CATALOG = 15,
1214    XML_WITH_XPATH = 16,
1215    XML_WITH_XPTR = 17,
1216    XML_WITH_XINCLUDE = 18,
1217    XML_WITH_ICONV = 19,
1218    XML_WITH_ISO8859X = 20,
1219    XML_WITH_UNICODE = 21,
1220    XML_WITH_REGEXP = 22,
1221    XML_WITH_AUTOMATA = 23,
1222    XML_WITH_EXPR = 24,
1223    XML_WITH_SCHEMAS = 25,
1224    XML_WITH_SCHEMATRON = 26,
1225    XML_WITH_MODULES = 27,
1226    XML_WITH_DEBUG = 28,
1227    XML_WITH_DEBUG_MEM = 29,
1228    XML_WITH_DEBUG_RUN = 30,
1229    XML_WITH_ZLIB = 31,
1230    XML_WITH_ICU = 32,
1231    XML_WITH_LZMA = 33,
1232    XML_WITH_NONE = 99999 /* just to be sure of allocation size */
1233} xmlFeature;
1234
1235XMLPUBFUN int XMLCALL
1236		xmlHasFeature		(xmlFeature feature);
1237
1238#ifdef __cplusplus
1239}
1240#endif
1241#endif /* __XML_PARSER_H__ */
1242