Gnome XML Library Reference Manual |
---|
parser —
#define XML_DEFAULT_VERSION void (*xmlParserInputDeallocate) (xmlChar *str); struct xmlParserInput; struct xmlParserNodeInfo; typedef xmlParserNodeInfoPtr; struct xmlParserNodeInfoSeq; typedef xmlParserNodeInfoSeqPtr; enum xmlParserInputState; #define XML_DETECT_IDS #define XML_COMPLETE_ATTRS #define XML_SKIP_IDS struct xmlParserCtxt; struct xmlSAXLocator; xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, const xmlChar *publicId, const xmlChar *systemId); void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); xmlEntityPtr (*getEntitySAXFunc) (void *ctx, const xmlChar *name); xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, const xmlChar *name); void (*entityDeclSAXFunc) (void *ctx, const xmlChar *name, int type, const xmlChar *publicId, const xmlChar *systemId, xmlChar *content); void (*notationDeclSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *publicId, const xmlChar *systemId); void (*attributeDeclSAXFunc) (void *ctx, const xmlChar *elem, const xmlChar *fullname, int type, int def, const xmlChar *defaultValue, xmlEnumerationPtr tree); void (*elementDeclSAXFunc) (void *ctx, const xmlChar *name, int type, xmlElementContentPtr content); void (*unparsedEntityDeclSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *publicId, const xmlChar *systemId, const xmlChar *notationName); void (*setDocumentLocatorSAXFunc) (void *ctx, xmlSAXLocatorPtr loc); void (*startDocumentSAXFunc) (void *ctx); void (*endDocumentSAXFunc) (void *ctx); void (*startElementSAXFunc) (void *ctx, const xmlChar *name, const xmlChar **atts); void (*endElementSAXFunc) (void *ctx, const xmlChar *name); void (*attributeSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *value); void (*referenceSAXFunc) (void *ctx, const xmlChar *name); void (*charactersSAXFunc) (void *ctx, const xmlChar *ch, int len); void (*ignorableWhitespaceSAXFunc) (void *ctx, const xmlChar *ch, int len); void (*processingInstructionSAXFunc) (void *ctx, const xmlChar *target, const xmlChar *data); void (*commentSAXFunc) (void *ctx, const xmlChar *value); void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len); void (*warningSAXFunc) (void *ctx, const char *msg, ...); void (*errorSAXFunc) (void *ctx, const char *msg, ...); void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...); int (*isStandaloneSAXFunc) (void *ctx); int (*hasInternalSubsetSAXFunc) (void *ctx); int (*hasExternalSubsetSAXFunc) (void *ctx); struct xmlSAXHandler; xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, const char *ID, xmlParserCtxtPtr context); void xmlInitParser (void); void xmlCleanupParser (void); int xmlParserInputRead (xmlParserInputPtr in, int len); int xmlParserInputGrow (xmlParserInputPtr in, int len); xmlChar* xmlStrdup (const xmlChar *cur); xmlChar* xmlStrndup (const xmlChar *cur, int len); xmlChar* xmlCharStrndup (const char *cur, int len); xmlChar* xmlCharStrdup (const char *cur); xmlChar* xmlStrsub (const xmlChar *str, int start, int len); const xmlChar* xmlStrchr (const xmlChar *str, xmlChar val); const xmlChar* xmlStrstr (const xmlChar *str, const xmlChar *val); const xmlChar* xmlStrcasestr (const xmlChar *str, xmlChar *val); int xmlStrcmp (const xmlChar *str1, const xmlChar *str2); int xmlStrncmp (const xmlChar *str1, const xmlChar *str2, int len); int xmlStrcasecmp (const xmlChar *str1, const xmlChar *str2); int xmlStrncasecmp (const xmlChar *str1, const xmlChar *str2, int len); int xmlStrEqual (const xmlChar *str1, const xmlChar *str2); int xmlStrlen (const xmlChar *str); xmlChar* xmlStrcat (xmlChar *cur, const xmlChar *add); xmlChar* xmlStrncat (xmlChar *cur, const xmlChar *add, int len); xmlDocPtr xmlParseDoc (xmlChar *cur); xmlDocPtr xmlParseMemory (const char *buffer, int size); xmlDocPtr xmlParseFile (const char *filename); int xmlSubstituteEntitiesDefault (int val); int xmlKeepBlanksDefault (int val); void xmlStopParser (xmlParserCtxtPtr ctxt); int xmlPedanticParserDefault (int val); int xmlLineNumbersDefault (int val); xmlDocPtr xmlRecoverDoc (xmlChar *cur); xmlDocPtr xmlRecoverMemory (const char *buffer, int size); xmlDocPtr xmlRecoverFile (const char *filename); int xmlParseDocument (xmlParserCtxtPtr ctxt); int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax, xmlChar *cur, int recovery); int xmlSAXUserParseFile (xmlSAXHandlerPtr sax, void *user_data, const char *filename); int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, void *user_data, const char *buffer, int size); xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax, const char *buffer, int size, int recovery); xmlDocPtr xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, const char *buffer, int size, int recovery, void *data); xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax, const char *filename, int recovery); xmlDocPtr xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, const char *filename, int recovery, void *data); xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax, const char *filename); xmlDocPtr xmlParseEntity (const char *filename); xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, const xmlChar *SystemID); xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, const xmlChar *ExternalID, const xmlChar *SystemID); xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, xmlCharEncoding enc); int xmlParseBalancedChunkMemory (xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst); int xmlParseBalancedChunkMemoryRecover (xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, int recover); int xmlParseExternalEntity (xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst); int xmlParseCtxtExternalEntity (xmlParserCtxtPtr ctx, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst); int xmlInitParserCtxt (xmlParserCtxtPtr ctxt); void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, const xmlChar *buffer, const char *filename); xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); int xmlGetFeaturesList (int *len, const char **result); int xmlGetFeature (xmlParserCtxtPtr ctxt, const char *name, void *result); int xmlSetFeature (xmlParserCtxtPtr ctxt, const char *name, void *value); xmlParserCtxtPtr xmlCreatePushParserCtxt (xmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename); int xmlParseChunk (xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate); xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, void *user_data, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, xmlCharEncoding enc); xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, xmlCharEncoding enc); const xmlParserNodeInfo* xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, const xmlNodePtr node); void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); unsignedlong xmlParserFindNodeInfoIndex (const xmlParserNodeInfoSeqPtr seq, const xmlNodePtr node); void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, const xmlParserNodeInfoPtr info); void xmlSetExternalEntityLoader (xmlExternalEntityLoader f); xmlExternalEntityLoader xmlGetExternalEntityLoader (void); xmlParserInputPtr xmlLoadExternalEntity (const char *URL, const char *ID, xmlParserCtxtPtr ctxt);
void (*xmlParserInputDeallocate) (xmlChar *str);
Callback for freeing some parser input allocations.
str : | the string to deallocate |
struct xmlParserInput { /* Input buffer */ xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ const char *filename; /* The file analyzed, if any */ const char *directory; /* the directory/base of the file */ const xmlChar *base; /* Base of the array to parse */ const xmlChar *cur; /* Current char being parsed */ const xmlChar *end; /* end of the array to parse */ int length; /* length if known */ int line; /* Current line */ int col; /* Current column */ /* * NOTE: consumed is only tested for equality in the parser code, * so even if there is an overflow this should not give troubles * for parsing very large instances. */ unsigned long consumed; /* How many xmlChars already consumed */ xmlParserInputDeallocate free; /* function to deallocate the base */ const xmlChar *encoding; /* the encoding string for entity */ const xmlChar *version; /* the version string for entity */ int standalone; /* Was that entity marked standalone */ };
An xmlParserInput is an input flow for the XML processor. Each entity parsed is associated an xmlParserInput (except the few predefined ones). This is the case both for internal entities - in which case the flow is already completely in memory - or external entities - in which case we use the buf structure for progressive reading and I18N conversions to the internal UTF-8 format.
struct xmlParserNodeInfo { const struct _xmlNode* node; /* Position & line # that text that created the node begins & ends on */ unsigned long begin_pos; unsigned long begin_line; unsigned long end_pos; unsigned long end_line; };
The parser can be asked to collect Node informations, i.e. at what place in the file they were detected. NOTE: This is off by default and not very well tested.
struct xmlParserNodeInfoSeq { unsigned long maximum; unsigned long length; xmlParserNodeInfo* buffer; };
typedef enum { XML_PARSER_EOF = -1, /* nothing is to be parsed */ XML_PARSER_START = 0, /* nothing has been parsed */ XML_PARSER_MISC, /* Misc* before int subset */ XML_PARSER_PI, /* Within a processing instruction */ XML_PARSER_DTD, /* within some DTD content */ XML_PARSER_PROLOG, /* Misc* after internal subset */ XML_PARSER_COMMENT, /* within a comment */ XML_PARSER_START_TAG, /* within a start tag */ XML_PARSER_CONTENT, /* within the content */ XML_PARSER_CDATA_SECTION, /* within a CDATA section */ XML_PARSER_END_TAG, /* within a closing tag */ XML_PARSER_ENTITY_DECL, /* within an entity declaration */ XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ XML_PARSER_EPILOG, /* the Misc* after the last end tag */ XML_PARSER_IGNORE, /* within an IGNORED section */ XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ } xmlParserInputState;
The parser is now working also as a state based parser. The recursive one use the state info for entities processing.
#define XML_DETECT_IDS 2
Bit in the loadsubset context field to tell to do ID/REFs lookups. Use it to initialize xmlLoadExtDtdDefaultValue.
#define XML_COMPLETE_ATTRS 4
Bit in the loadsubset context field to tell to do complete the elements attributes lists with the ones defaulted from the DTDs. Use it to initialize xmlLoadExtDtdDefaultValue.
#define XML_SKIP_IDS 8
Bit in the loadsubset context field to tell to not do ID/REFs registration. Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
struct xmlParserCtxt { struct _xmlSAXHandler *sax; /* The SAX handler */ void *userData; /* For SAX interface only, used by DOM build */ xmlDocPtr myDoc; /* the document being built */ int wellFormed; /* is the document well formed */ int replaceEntities; /* shall we replace entities ? */ const xmlChar *version; /* the XML version string */ const xmlChar *encoding; /* the declared encoding, if any */ int standalone; /* standalone document */ int html; /* an HTML(1)/Docbook(2) document */ /* Input stream stack */ xmlParserInputPtr input; /* Current input stream */ int inputNr; /* Number of current input streams */ int inputMax; /* Max number of input streams */ xmlParserInputPtr *inputTab; /* stack of inputs */ /* Node analysis stack only used for DOM building */ xmlNodePtr node; /* Current parsed Node */ int nodeNr; /* Depth of the parsing stack */ int nodeMax; /* Max depth of the parsing stack */ xmlNodePtr *nodeTab; /* array of nodes */ int record_info; /* Whether node info should be kept */ xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ int errNo; /* error code */ int hasExternalSubset; /* reference and external subset */ int hasPErefs; /* the internal subset has PE refs */ int external; /* are we parsing an external entity */ int valid; /* is the document valid */ int validate; /* shall we try to validate ? */ xmlValidCtxt vctxt; /* The validity context */ xmlParserInputState instate; /* current type of input */ int token; /* next char look-ahead */ char *directory; /* the data directory */ /* Node name stack */ xmlChar *name; /* Current parsed Node */ int nameNr; /* Depth of the parsing stack */ int nameMax; /* Max depth of the parsing stack */ xmlChar * *nameTab; /* array of nodes */ long nbChars; /* number of xmlChar processed */ long checkIndex; /* used by progressive parsing lookup */ int keepBlanks; /* ugly but ... */ int disableSAX; /* SAX callbacks are disabled */ int inSubset; /* Parsing is in int 1/ext 2 subset */ xmlChar * intSubName; /* name of subset */ xmlChar * extSubURI; /* URI of external subset */ xmlChar * extSubSystem; /* SYSTEM ID of external subset */ /* xml:space values */ int * space; /* Should the parser preserve spaces */ int spaceNr; /* Depth of the parsing stack */ int spaceMax; /* Max depth of the parsing stack */ int * spaceTab; /* array of space infos */ int depth; /* to prevent entity substitution loops */ xmlParserInputPtr entity; /* used to check entities boundaries */ int charset; /* encoding of the in-memory content actually an xmlCharEncoding */ int nodelen; /* Those two fields are there to */ int nodemem; /* Speed up large node parsing */ int pedantic; /* signal pedantic warnings */ void *_private; /* For user data, libxml won't touch it */ int loadsubset; /* should the external subset be loaded */ int linenumbers; /* set line number in element content */ void *catalogs; /* document's own catalog */ int recovery; /* run in recovery mode */ int progressive; /* is this a progressive parsing */ };
The parser context. NOTE This doesn't completely define the parser state, the (current ?) design of the parser uses recursive function calls since this allow and easy mapping from the production rules of the specification to the actual code. The drawback is that the actual function call also reflect the parser state. However most of the parsing routines takes as the only argument the parser context pointer, so migrating to a state based parser for progressive parsing shouldn't be too hard.
struct xmlSAXLocator { const xmlChar *(*getPublicId)(void *ctx); const xmlChar *(*getSystemId)(void *ctx); int (*getLineNumber)(void *ctx); int (*getColumnNumber)(void *ctx); };
A SAX Locator.
xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, const xmlChar *publicId, const xmlChar *systemId);
Callback: The entity loader, to control the loading of external entities, the application can either: - override this resolveEntity() callback in the SAX block - or better use the xmlSetExternalEntityLoader() function to set up it's own entity resolution routine
ctx : | the user data (XML parser context) |
publicId : | The public ID of the entity |
systemId : | The system ID of the entity |
Returns : | the xmlParserInputPtr if inlined or NULL for DOM behaviour. |
void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
Callback on internal subset declaration.
ctx : | the user data (XML parser context) |
name : | the root element name |
ExternalID : | the external ID |
SystemID : | the SYSTEM ID (e.g. filename or URL) |
void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
Callback on external subset declaration.
ctx : | the user data (XML parser context) |
name : | the root element name |
ExternalID : | the external ID |
SystemID : | the SYSTEM ID (e.g. filename or URL) |
xmlEntityPtr (*getEntitySAXFunc) (void *ctx, const xmlChar *name);
Get an entity by name.
ctx : | the user data (XML parser context) |
name : | The entity name |
Returns : | the xmlEntityPtr if found. |
xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, const xmlChar *name);
Get a parameter entity by name.
ctx : | the user data (XML parser context) |
name : | The entity name |
Returns : | the xmlEntityPtr if found. |
void (*entityDeclSAXFunc) (void *ctx, const xmlChar *name, int type, const xmlChar *publicId, const xmlChar *systemId, xmlChar *content);
An entity definition has been parsed.
ctx : | the user data (XML parser context) |
name : | the entity name |
type : | the entity type |
publicId : | The public ID of the entity |
systemId : | The system ID of the entity |
content : | the entity value (without processing). |
void (*notationDeclSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *publicId, const xmlChar *systemId);
What to do when a notation declaration has been parsed.
ctx : | the user data (XML parser context) |
name : | The name of the notation |
publicId : | The public ID of the entity |
systemId : | The system ID of the entity |
void (*attributeDeclSAXFunc) (void *ctx, const xmlChar *elem, const xmlChar *fullname, int type, int def, const xmlChar *defaultValue, xmlEnumerationPtr tree);
An attribute definition has been parsed.
ctx : | the user data (XML parser context) |
elem : | the name of the element |
fullname : | the attribute name |
type : | the attribute type |
def : | the type of default value |
defaultValue : | the attribute default value |
tree : | the tree of enumerated value set |
void (*elementDeclSAXFunc) (void *ctx, const xmlChar *name, int type, xmlElementContentPtr content);
An element definition has been parsed.
ctx : | the user data (XML parser context) |
name : | the element name |
type : | the element type |
content : | the element value tree |
void (*unparsedEntityDeclSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *publicId, const xmlChar *systemId, const xmlChar *notationName);
What to do when an unparsed entity declaration is parsed.
ctx : | the user data (XML parser context) |
name : | The name of the entity |
publicId : | The public ID of the entity |
systemId : | The system ID of the entity |
notationName : | the name of the notation |
void (*setDocumentLocatorSAXFunc) (void *ctx, xmlSAXLocatorPtr loc);
Receive the document locator at startup, actually xmlDefaultSAXLocator. Everything is available on the context, so this is useless in our case.
ctx : | the user data (XML parser context) |
loc : | A SAX Locator |
void (*startDocumentSAXFunc) (void *ctx);
Called when the document start being processed.
ctx : | the user data (XML parser context) |
void (*endDocumentSAXFunc) (void *ctx);
Called when the document end has been detected.
ctx : | the user data (XML parser context) |
void (*startElementSAXFunc) (void *ctx, const xmlChar *name, const xmlChar **atts);
Called when an opening tag has been processed.
ctx : | the user data (XML parser context) |
name : | The element name, including namespace prefix |
atts : | An array of name/value attributes pairs, NULL terminated |
void (*endElementSAXFunc) (void *ctx, const xmlChar *name);
Called when the end of an element has been detected.
ctx : | the user data (XML parser context) |
name : | The element name |
void (*attributeSAXFunc) (void *ctx, const xmlChar *name, const xmlChar *value);
Handle an attribute that has been read by the parser. The default handling is to convert the attribute into an DOM subtree and past it in a new xmlAttr element added to the element.
ctx : | the user data (XML parser context) |
name : | The attribute name, including namespace prefix |
value : | The attribute value |
void (*referenceSAXFunc) (void *ctx, const xmlChar *name);
Called when an entity reference is detected.
ctx : | the user data (XML parser context) |
name : | The entity name |
void (*charactersSAXFunc) (void *ctx, const xmlChar *ch, int len);
Receiving some chars from the parser.
ctx : | the user data (XML parser context) |
ch : | a xmlChar string |
len : | the number of xmlChar |
void (*ignorableWhitespaceSAXFunc) (void *ctx, const xmlChar *ch, int len);
Receiving some ignorable whitespaces from the parser. UNUSED: by default the DOM building will use characters.
ctx : | the user data (XML parser context) |
ch : | a xmlChar string |
len : | the number of xmlChar |
void (*processingInstructionSAXFunc) (void *ctx, const xmlChar *target, const xmlChar *data);
A processing instruction has been parsed.
ctx : | the user data (XML parser context) |
target : | the target name |
data : | the PI data's |
void (*commentSAXFunc) (void *ctx, const xmlChar *value);
A comment has been parsed.
ctx : | the user data (XML parser context) |
value : | the comment content |
void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len);
Called when a pcdata block has been parsed.
ctx : | the user data (XML parser context) |
value : | The pcdata content |
len : | the block length |
void (*warningSAXFunc) (void *ctx, const char *msg, ...);
Display and format a warning messages, callback.
ctx : | an XML parser context |
msg : | the message to display/transmit |
... : | extra parameters for the message display |
void (*errorSAXFunc) (void *ctx, const char *msg, ...);
Display and format an error messages, callback.
ctx : | an XML parser context |
msg : | the message to display/transmit |
... : | extra parameters for the message display |
void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
Display and format fatal error messages, callback.
Note: so far
ctx : | an XML parser context |
msg : | the message to display/transmit |
... : | extra parameters for the message display |
int (*isStandaloneSAXFunc) (void *ctx);
Is this document tagged standalone?
ctx : | the user data (XML parser context) |
Returns : | 1 if true |
int (*hasInternalSubsetSAXFunc) (void *ctx);
Does this document has an internal subset.
ctx : | the user data (XML parser context) |
Returns : | 1 if true |
int (*hasExternalSubsetSAXFunc) (void *ctx);
Does this document has an external subset?
ctx : | the user data (XML parser context) |
Returns : | 1 if true |
struct xmlSAXHandler { internalSubsetSAXFunc internalSubset; isStandaloneSAXFunc isStandalone; hasInternalSubsetSAXFunc hasInternalSubset; hasExternalSubsetSAXFunc hasExternalSubset; resolveEntitySAXFunc resolveEntity; getEntitySAXFunc getEntity; entityDeclSAXFunc entityDecl; notationDeclSAXFunc notationDecl; attributeDeclSAXFunc attributeDecl; elementDeclSAXFunc elementDecl; unparsedEntityDeclSAXFunc unparsedEntityDecl; setDocumentLocatorSAXFunc setDocumentLocator; startDocumentSAXFunc startDocument; endDocumentSAXFunc endDocument; startElementSAXFunc startElement; endElementSAXFunc endElement; referenceSAXFunc reference; charactersSAXFunc characters; ignorableWhitespaceSAXFunc ignorableWhitespace; processingInstructionSAXFunc processingInstruction; commentSAXFunc comment; warningSAXFunc warning; errorSAXFunc error; fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ getParameterEntitySAXFunc getParameterEntity; cdataBlockSAXFunc cdataBlock; externalSubsetSAXFunc externalSubset; int initialized; };
A SAX handler is bunch of callbacks called by the parser when processing of the input generate data or structure informations.
xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, const char *ID, xmlParserCtxtPtr context);
External entity loaders types.
URL : | The System ID of the resource requested |
ID : | The Public ID of the resource requested |
context : | the XML parser context |
Returns : | the entity input parser. |
void xmlInitParser (void);
Initialization function for the XML parser. This is not reentrant. Call once before processing in case of use in multithreaded programs.
void xmlCleanupParser (void);
Cleanup function for the XML parser. It tries to reclaim all parsing related global memory allocated for the parser processing. It doesn't deallocate any document related memory. Calling this function should not prevent reusing the parser. One should call xmlCleanupParser() only when the process has finished using the library or XML document built with it.
int xmlParserInputRead (xmlParserInputPtr in, int len);
This function refresh the input for the parser. It doesn't try to preserve pointers to the input buffer, and discard already read data
in : | an XML parser input |
len : | an indicative size for the lookahead |
Returns : | the number of xmlChars read, or -1 in case of error, 0 indicate the end of this entity |
int xmlParserInputGrow (xmlParserInputPtr in, int len);
This function increase the input for the parser. It tries to preserve pointers to the input buffer, and keep already read data
in : | an XML parser input |
len : | an indicative size for the lookahead |
Returns : | the number of xmlChars read, or -1 in case of error, 0 indicate the end of this entity |
xmlChar* xmlStrdup (const xmlChar *cur);
a strdup for array of xmlChar's. Since they are supposed to be encoded in UTF-8 or an encoding with 8bit based chars, we assume a termination mark of '0'.
cur : | the input xmlChar * |
Returns : | a new xmlChar * or NULL |
xmlChar* xmlStrndup (const xmlChar *cur, int len);
a strndup for array of xmlChar's
cur : | the input xmlChar * |
len : | the len of cur |
Returns : | a new xmlChar * or NULL |
xmlChar* xmlCharStrndup (const char *cur, int len);
a strndup for char's to xmlChar's
cur : | the input char * |
len : | the len of cur |
Returns : | a new xmlChar * or NULL |
xmlChar* xmlCharStrdup (const char *cur);
a strdup for char's to xmlChar's
cur : | the input char * |
Returns : | a new xmlChar * or NULL |
xmlChar* xmlStrsub (const xmlChar *str, int start, int len);
Extract a substring of a given string
str : | the xmlChar * array (haystack) |
start : | the index of the first char (zero based) |
len : | the length of the substring |
Returns : | the xmlChar * for the first occurrence or NULL. |
const xmlChar* xmlStrchr (const xmlChar *str, xmlChar val);
a strchr for xmlChar's
str : | the xmlChar * array |
val : | the xmlChar to search |
Returns : | the xmlChar * for the first occurrence or NULL. |
const xmlChar* xmlStrstr (const xmlChar *str, const xmlChar *val);
a strstr for xmlChar's
str : | the xmlChar * array (haystack) |
val : | the xmlChar to search (needle) |
Returns : | the xmlChar * for the first occurrence or NULL. |
const xmlChar* xmlStrcasestr (const xmlChar *str, xmlChar *val);
a case-ignoring strstr for xmlChar's
str : | the xmlChar * array (haystack) |
val : | the xmlChar to search (needle) |
Returns : | the xmlChar * for the first occurrence or NULL. |
int xmlStrcmp (const xmlChar *str1, const xmlChar *str2);
a strcmp for xmlChar's
str1 : | the first xmlChar * |
str2 : | the second xmlChar * |
Returns : | the integer result of the comparison |
int xmlStrncmp (const xmlChar *str1, const xmlChar *str2, int len);
a strncmp for xmlChar's
str1 : | the first xmlChar * |
str2 : | the second xmlChar * |
len : | the max comparison length |
Returns : | the integer result of the comparison |
int xmlStrcasecmp (const xmlChar *str1, const xmlChar *str2);
a strcasecmp for xmlChar's
str1 : | the first xmlChar * |
str2 : | the second xmlChar * |
Returns : | the integer result of the comparison |
int xmlStrncasecmp (const xmlChar *str1, const xmlChar *str2, int len);
a strncasecmp for xmlChar's
str1 : | the first xmlChar * |
str2 : | the second xmlChar * |
len : | the max comparison length |
Returns : | the integer result of the comparison |
int xmlStrEqual (const xmlChar *str1, const xmlChar *str2);
Check if both string are equal of have same content Should be a bit more readable and faster than xmlStrEqual()
str1 : | the first xmlChar * |
str2 : | the second xmlChar * |
Returns : | 1 if they are equal, 0 if they are different |
int xmlStrlen (const xmlChar *str);
length of a xmlChar's string
str : | the xmlChar * array |
Returns : | the number of xmlChar contained in the ARRAY. |
xmlChar* xmlStrcat (xmlChar *cur, const xmlChar *add);
a strcat for array of xmlChar's. Since they are supposed to be encoded in UTF-8 or an encoding with 8bit based chars, we assume a termination mark of '0'.
cur : | the original xmlChar * array |
add : | the xmlChar * array added |
Returns : | a new xmlChar * containing the concatenated string. |
xmlChar* xmlStrncat (xmlChar *cur, const xmlChar *add, int len);
a strncat for array of xmlChar's, it will extend cur with the len first bytes of add.
cur : | the original xmlChar * array |
add : | the xmlChar * array added |
len : | the length of add |
Returns : | a new xmlChar *, the original cur is reallocated if needed and should not be freed |
xmlDocPtr xmlParseDoc (xmlChar *cur);
parse an XML in-memory document and build a tree.
cur : | a pointer to an array of xmlChar |
Returns : | the resulting document tree |
xmlDocPtr xmlParseMemory (const char *buffer, int size);
parse an XML in-memory block and build a tree.
buffer : | an pointer to a char array |
size : | the size of the array |
Returns : | the resulting document tree |
xmlDocPtr xmlParseFile (const char *filename);
parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
filename : | the filename |
Returns : | the resulting document tree if the file was wellformed, NULL otherwise. |
int xmlSubstituteEntitiesDefault (int val);
Set and return the previous value for default entity support.
Initially the parser always keep entity references instead of substituting
entity values in the output. This function has to be used to change the
default parser behavior
SAX::
val : | int 0 or 1 |
Returns : | the last value for 0 for no substitution, 1 for substitution. |
int xmlKeepBlanksDefault (int val);
Set and return the previous value for default blanks text nodes support. The 1.x version of the parser used an heuristic to try to detect ignorable white spaces. As a result the SAX callback was generating ignorableWhitespace() callbacks instead of characters() one, and when using the DOM output text nodes containing those blanks were not generated. The 2.x and later version will switch to the XML standard way and ignorableWhitespace() are only generated when running the parser in validating mode and when the current element doesn't allow CDATA or mixed content. This function is provided as a way to force the standard behavior on 1.X libs and to switch back to the old mode for compatibility when running 1.X client code on 2.X . Upgrade of 1.X code should be done by using xmlIsBlankNode() commodity function to detect the "empty" nodes generated. This value also affect autogeneration of indentation when saving code if blanks sections are kept, indentation is not generated.
val : | int 0 or 1 |
Returns : | the last value for 0 for no substitution, 1 for substitution. |
void xmlStopParser (xmlParserCtxtPtr ctxt);
Blocks further parser processing
ctxt : | an XML parser context |
int xmlPedanticParserDefault (int val);
Set and return the previous value for enabling pedantic warnings.
val : | int 0 or 1 |
Returns : | the last value for 0 for no substitution, 1 for substitution. |
int xmlLineNumbersDefault (int val);
Set and return the previous value for enabling line numbers in elements contents. This may break on old application and is turned off by default.
val : | int 0 or 1 |
Returns : | the last value for 0 for no substitution, 1 for substitution. |
xmlDocPtr xmlRecoverDoc (xmlChar *cur);
parse an XML in-memory document and build a tree. In the case the document is not Well Formed, a tree is built anyway
cur : | a pointer to an array of xmlChar |
Returns : | the resulting document tree |
xmlDocPtr xmlRecoverMemory (const char *buffer, int size);
parse an XML in-memory block and build a tree. In the case the document is not Well Formed, a tree is built anyway
buffer : | an pointer to a char array |
size : | the size of the array |
Returns : | the resulting document tree |
xmlDocPtr xmlRecoverFile (const char *filename);
parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. In the case the document is not Well Formed, a tree is built anyway
filename : | the filename |
Returns : | the resulting document tree |
int xmlParseDocument (xmlParserCtxtPtr ctxt);
parse an XML document (and build a tree if using the standard SAX interface).
[1] document ::= prolog element Misc*
[22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
ctxt : | an XML parser context |
Returns : | 0, -1 in case of error. the parser context is augmented as a result of the parsing. |
int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
parse a general parsed entity An external general parsed entity is well-formed if it matches the production labeled extParsedEnt.
[78] extParsedEnt ::= TextDecl? content
ctxt : | an XML parser context |
Returns : | 0, -1 in case of error. the parser context is augmented as a result of the parsing. |
xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax, xmlChar *cur, int recovery);
parse an XML in-memory document and build a tree. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
sax : | the SAX handler block |
cur : | a pointer to an array of xmlChar |
recovery : | work in recovery mode, i.e. tries to read no Well Formed documents |
Returns : | the resulting document tree |
int xmlSAXUserParseFile (xmlSAXHandlerPtr sax, void *user_data, const char *filename);
parse an XML file and call the given SAX handler routines. Automatic support for ZLIB/Compress compressed document is provided
sax : | a SAX handler |
user_data : | The user data returned on SAX callbacks |
filename : | a file name |
Returns : | 0 in case of success or a error number otherwise |
int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, void *user_data, const char *buffer, int size);
A better SAX parsing routine. parse an XML in-memory buffer and call the given SAX handler routines.
sax : | a SAX handler |
user_data : | The user data returned on SAX callbacks |
buffer : | an in-memory XML document input |
size : | the length of the XML document in bytes |
Returns : | 0 in case of success or a error number otherwise |
xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax, const char *buffer, int size, int recovery);
parse an XML in-memory block and use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
sax : | the SAX handler block |
buffer : | an pointer to a char array |
size : | the size of the array |
recovery : | work in recovery mode, i.e. tries to read not Well Formed documents |
Returns : | the resulting document tree |
xmlDocPtr xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, const char *buffer, int size, int recovery, void *data);
parse an XML in-memory block and use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
User data (void *) is stored within the parser context in the context's _private member, so it is available nearly everywhere in libxml
sax : | the SAX handler block |
buffer : | an pointer to a char array |
size : | the size of the array |
recovery : | work in recovery mode, i.e. tries to read no Well Formed documents |
data : | the userdata |
Returns : | the resulting document tree |
xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax, const char *filename, int recovery);
parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
sax : | the SAX handler block |
filename : | the filename |
recovery : | work in recovery mode, i.e. tries to read no Well Formed documents |
Returns : | the resulting document tree |
xmlDocPtr xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, const char *filename, int recovery, void *data);
parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
User data (void *) is stored within the parser context in the context's _private member, so it is available nearly everywhere in libxml
sax : | the SAX handler block |
filename : | the filename |
recovery : | work in recovery mode, i.e. tries to read no Well Formed documents |
data : | the userdata |
Returns : | the resulting document tree |
xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax, const char *filename);
parse an XML external entity out of context and build a tree. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
[78] extParsedEnt ::= TextDecl? content
This correspond to a "Well Balanced" chunk
sax : | the SAX handler block |
filename : | the filename |
Returns : | the resulting document tree |
xmlDocPtr xmlParseEntity (const char *filename);
parse an XML external entity out of context and build a tree.
[78] extParsedEnt ::= TextDecl? content
This correspond to a "Well Balanced" chunk
filename : | the filename |
Returns : | the resulting document tree |
xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, const xmlChar *SystemID);
Load and parse an external subset.
ExternalID : | a NAME* containing the External ID of the DTD |
SystemID : | a NAME* containing the URL to the DTD |
Returns : | the resulting xmlDtdPtr or NULL in case of error. |
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, const xmlChar *ExternalID, const xmlChar *SystemID);
Load and parse an external subset.
sax : | the SAX handler block |
ExternalID : | a NAME* containing the External ID of the DTD |
SystemID : | a NAME* containing the URL to the DTD |
Returns : | the resulting xmlDtdPtr or NULL in case of error. |
xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, xmlCharEncoding enc);
Load and parse a DTD
sax : | the SAX handler block or NULL |
input : | an Input Buffer |
enc : | the charset encoding if known |
Returns : | the resulting xmlDtdPtr or NULL in case of error. input will be freed at parsing end. |
int xmlParseBalancedChunkMemory (xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst);
Parse a well-balanced chunk of an XML document called by the parser The allowed sequence for the Well Balanced Chunk is the one defined by the content production in the XML grammar:
[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
doc : | the document the chunk pertains to |
sax : | the SAX handler bloc (possibly NULL) |
user_data : | The user data returned on SAX callbacks (possibly NULL) |
depth : | Used for loop detection, use 0 |
string : | the input string in UTF8 or ISO-Latin (zero terminated) |
lst : | the return value for the set of parsed nodes |
Returns : | 0 if the chunk is well balanced, -1 in case of args problem and the parser error code otherwise |
int xmlParseBalancedChunkMemoryRecover (xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, int recover);
Parse a well-balanced chunk of an XML document called by the parser The allowed sequence for the Well Balanced Chunk is the one defined by the content production in the XML grammar:
[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
doc : | the document the chunk pertains to |
sax : | the SAX handler bloc (possibly NULL) |
user_data : | The user data returned on SAX callbacks (possibly NULL) |
depth : | Used for loop detection, use 0 |
string : | the input string in UTF8 or ISO-Latin (zero terminated) |
lst : | the return value for the set of parsed nodes |
recover : | return nodes even if the data is broken (use 0) |
Returns : | 0 if the chunk is well balanced, -1 in case of args problem and the parser error code otherwise In case recover is set to 1, the nodelist will not be empty even if the parsed chunk is not well balanced. |
int xmlParseExternalEntity (xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst);
Parse an external general entity An external general parsed entity is well-formed if it matches the production labeled extParsedEnt.
[78] extParsedEnt ::= TextDecl? content
doc : | the document the chunk pertains to |
sax : | the SAX handler bloc (possibly NULL) |
user_data : | The user data returned on SAX callbacks (possibly NULL) |
depth : | Used for loop detection, use 0 |
URL : | the URL for the entity to load |
ID : | the System ID for the entity to load |
lst : | the return value for the set of parsed nodes |
Returns : | 0 if the entity is well formed, -1 in case of args problem and the parser error code otherwise |
int xmlParseCtxtExternalEntity (xmlParserCtxtPtr ctx, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst);
Parse an external general entity within an existing parsing context An external general parsed entity is well-formed if it matches the production labeled extParsedEnt.
[78] extParsedEnt ::= TextDecl? content
ctx : | the existing parsing context |
URL : | the URL for the entity to load |
ID : | the System ID for the entity to load |
lst : | the return value for the set of parsed nodes |
Returns : | 0 if the entity is well formed, -1 in case of args problem and the parser error code otherwise |
int xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
Initialize a parser context
ctxt : | an XML parser context |
Returns : | 0 in case of success and -1 in case of error |
void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
Clear (release owned resources) and reinitialize a parser context
ctxt : | an XML parser context |
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.
ctxt : | an XML parser context |
void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, const xmlChar *buffer, const char *filename);
Setup the parser context to parse a new buffer; Clears any prior contents from the parser context. The buffer parameter must not be NULL, but the filename parameter can be
ctxt : | an XML parser context |
buffer : | a xmlChar * buffer |
filename : | a file name |
xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
Creates a parser context for an XML in-memory document.
cur : | a pointer to an array of xmlChar |
Returns : | the new parser context or NULL |
int xmlGetFeaturesList (int *len, const char **result);
Copy at most *len feature names into the result array
len : | the length of the features name array (input/output) |
result : | an array of string to be filled with the features name. |
Returns : | -1 in case or error, or the total number of features, len is updated with the number of strings copied, strings must not be deallocated |
int xmlGetFeature (xmlParserCtxtPtr ctxt, const char *name, void *result);
Read the current value of one feature of this parser instance
ctxt : | an XML/HTML parser context |
name : | the feature name |
result : | location to store the result |
Returns : | -1 in case or error, 0 otherwise |
int xmlSetFeature (xmlParserCtxtPtr ctxt, const char *name, void *value);
Change the current value of one feature of this parser instance
ctxt : | an XML/HTML parser context |
name : | the feature name |
value : | pointer to the location of the new value |
Returns : | -1 in case or error, 0 otherwise |
xmlParserCtxtPtr xmlCreatePushParserCtxt (xmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename);
Create a parser context for using the XML parser in push mode. If buffer and size are non-NULL, the data is used to detect the encoding. The remaining characters will be parsed so they don't need to be fed in again through xmlParseChunk. To allow content encoding detection, size should be >= 4 The value of filename is used for fetching external entities and error/warning reports.
sax : | a SAX handler |
user_data : | The user data returned on SAX callbacks |
chunk : | a pointer to an array of chars |
size : | number of chars in the array |
filename : | an optional file name or URI |
Returns : | the new parser context or NULL |
int xmlParseChunk (xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate);
Parse a Chunk of memory
ctxt : | an XML parser context |
chunk : | an char array |
size : | the size in byte of the chunk |
terminate : | last chunk indicator |
Returns : | zero if no error, the xmlParserErrors otherwise. |
xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, void *user_data, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, xmlCharEncoding enc);
Create a parser context for using the XML parser with an existing I/O stream
sax : | a SAX handler |
user_data : | The user data returned on SAX callbacks |
ioread : | an I/O read function |
ioclose : | an I/O close function |
ioctx : | an I/O handler |
enc : | the charset encoding if known |
Returns : | the new parser context or NULL |
xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, xmlCharEncoding enc);
Create a new input stream structure encapsulating the input into a stream suitable for the parser.
ctxt : | an XML parser context |
input : | an I/O Input |
enc : | the charset encoding if known |
Returns : | the new input stream or NULL |
const xmlParserNodeInfo* xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, const xmlNodePtr node);
Find the parser node info struct for a given node
ctxt : | |
node : | an XML node within the tree |
Returns : | an xmlParserNodeInfo block pointer or NULL |
void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
-- Initialize (set to initial state) node info sequence
seq : | a node info sequence pointer |
void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
-- Clear (release memory and reinitialize) node info sequence
seq : | a node info sequence pointer |
unsignedlong xmlParserFindNodeInfoIndex (const xmlParserNodeInfoSeqPtr seq, const xmlNodePtr node);
xmlParserFindNodeInfoIndex : Find the index that the info record for the given node is or should be at in a sorted sequence
seq : | a node info sequence pointer |
node : | an XML node pointer |
Returns : | a long indicating the position of the record |
void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, const xmlParserNodeInfoPtr info);
Insert node info record into the sorted sequence
ctxt : | an XML parser context |
info : | a node info sequence pointer |
void xmlSetExternalEntityLoader (xmlExternalEntityLoader f);
Changes the defaultexternal entity resolver function for the application
f : | the new entity resolver function |
xmlExternalEntityLoader xmlGetExternalEntityLoader (void);
Get the default external entity resolver function for the application
Returns : | the xmlExternalEntityLoader function pointer |
xmlParserInputPtr xmlLoadExternalEntity (const char *URL, const char *ID, xmlParserCtxtPtr ctxt);
Load an external entity, note that the use of this function for unparsed entities may generate problems TODO: a more generic External entity API must be designed
URL : | the URL for the entity to load |
ID : | the Public ID for the entity to load |
ctxt : | the context in which the entity is called or NULL |
Returns : | the xmlParserInputPtr or NULL |
<< Libxml Library Reference | xmlreader >> |