15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Summary: interface for an HTML 4.0 non-verifying parser 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Description: this module implements an HTML 4.0 non-verifying parser 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * with API compatible with the XML parser ones. It should 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * be able to parse "real world" HTML, even if severely 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * broken from a specification point of view. 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copy: See Copyright for the status of this software. 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Author: Daniel Veillard 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef __HTML_PARSER_H__ 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define __HTML_PARSER_H__ 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlversion.h> 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/parser.h> 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_HTML_ENABLED 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef __cplusplus 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)extern "C" { 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Most of the back-end structures from XML and HTML are shared. 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlParserCtxt htmlParserCtxt; 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlParserCtxtPtr htmlParserCtxtPtr; 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlParserNodeInfo htmlParserNodeInfo; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlSAXHandler htmlSAXHandler; 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlParserInput htmlParserInput; 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlParserInputPtr htmlParserInputPtr; 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlDocPtr htmlDocPtr; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef xmlNodePtr htmlNodePtr; 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Internal description of an HTML element, representing HTML 4.01 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and XHTML 1.0 (which share the same structure). 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct _htmlElemDesc htmlElemDesc; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef htmlElemDesc *htmlElemDescPtr; 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct _htmlElemDesc { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *name; /* The tag name */ 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char startTag; /* Whether the start tag can be implied */ 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char endTag; /* Whether the end tag can be implied */ 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char saveEndTag; /* Whether the end tag should be saved */ 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char empty; /* Is this an empty element ? */ 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char depr; /* Is this a deprecated element ? */ 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char isinline; /* is this a block 0 or inline 1 element */ 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *desc; /* the description */ 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* NRK Jan.2003 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * New fields encapsulating HTML structure 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Bugs: 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is a very limited representation. It fails to tell us when 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * an element *requires* subelements (we only have whether they're 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * allowed or not), and it doesn't tell us where CDATA and PCDATA 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * are allowed. Some element relationships are not fully represented: 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * these are flagged with the word MODIFIER 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** subelts; /* allowed sub-elements of this element */ 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* defaultsubelt; /* subelement for suggested auto-repair 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if necessary or NULL */ 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** attrs_opt; /* Optional Attributes */ 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** attrs_depr; /* Additional deprecated attributes */ 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** attrs_req; /* Required attributes */ 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Internal description of an HTML entity. 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct _htmlEntityDesc htmlEntityDesc; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef htmlEntityDesc *htmlEntityDescPtr; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct _htmlEntityDesc { 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int value; /* the UNICODE value for the character */ 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *name; /* The entity name */ 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *desc; /* the description */ 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * There is only few public functions. 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN const htmlElemDesc * XMLCALL 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlTagLookup (const xmlChar *tag); 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN const htmlEntityDesc * XMLCALL 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlEntityLookup(const xmlChar *name); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN const htmlEntityDesc * XMLCALL 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlEntityValueLookup(unsigned int value); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlIsAutoClosed(htmlDocPtr doc, 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodePtr elem); 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseTag(htmlDocPtr doc, 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name, 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodePtr elem); 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN const htmlEntityDesc * XMLCALL 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseEntityRef(htmlParserCtxtPtr ctxt, 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar **str); 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseCharRef(htmlParserCtxtPtr ctxt); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN void XMLCALL 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseElement(htmlParserCtxtPtr ctxt); 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlParserCtxtPtr XMLCALL 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNewParserCtxt(void); 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlParserCtxtPtr XMLCALL 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCreateMemoryParserCtxt(const char *buffer, 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size); 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocument(htmlParserCtxtPtr ctxt); 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlSAXParseDoc (xmlChar *cur, 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlSAXHandlerPtr sax, 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *userData); 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDoc (xmlChar *cur, 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding); 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlSAXParseFile(const char *filename, 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlSAXHandlerPtr sax, 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *userData); 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseFile (const char *filename, 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UTF8ToHtml (unsigned char *out, 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *outlen, 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char *in, 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *inlen); 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlEncodeEntities(unsigned char *out, 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *outlen, 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char *in, 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *inlen, int quoteChar); 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlIsScriptAttribute(const xmlChar *name); 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlHandleOmittedElem(int val); 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_PUSH_ENABLED 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Interfaces for the Push mode. 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlParserCtxtPtr XMLCALL 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *user_data, 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *chunk, 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size, 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *filename, 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncoding enc); 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseChunk (htmlParserCtxtPtr ctxt, 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *chunk, 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size, 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int terminate); 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* LIBXML_PUSH_ENABLED */ 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN void XMLCALL 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * New set of simpler/more flexible APIs 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * xmlParserOption: 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is the set of XML parser options that can be passed down 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to the xmlReadDoc() and similar calls. 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef enum { 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_NONET = 1<<11,/* Forbid network access */ 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */ 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */ 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} htmlParserOption; 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN void XMLCALL 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReset (htmlParserCtxtPtr ctxt); 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtUseOptions (htmlParserCtxtPtr ctxt, 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlReadDoc (const xmlChar *cur, 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlReadFile (const char *URL, 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlReadMemory (const char *buffer, 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size, 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlReadFd (int fd, 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlReadIO (xmlInputReadCallback ioread, 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInputCloseCallback ioclose, 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *ioctx, 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *cur, 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReadFile (xmlParserCtxtPtr ctxt, 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *filename, 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *buffer, 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size, 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReadFd (xmlParserCtxtPtr ctxt, 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int fd, 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlDocPtr XMLCALL 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReadIO (xmlParserCtxtPtr ctxt, 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInputReadCallback ioread, 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInputCloseCallback ioclose, 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *ioctx, 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options); 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* NRK/Jan2003: further knowledge of HTML structure 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef enum { 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_NA = 0 , /* something we don't check at all */ 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_INVALID = 0x1 , 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_DEPRECATED = 0x2 , 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_VALID = 0x4 , 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} htmlStatus ; 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Using htmlElemDesc rather than name here, to emphasise the fact 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) that otherwise there's a lookup overhead 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ; 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlDefaultSubelement: 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the default subelement for this element 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define htmlDefaultSubelement(elt) elt->defaultsubelt 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlElementAllowedHereDesc: 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @parent: HTML parent element 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an HTML element description may be a 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * direct child of the specified element. 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if allowed; 0 otherwise. 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define htmlElementAllowedHereDesc(parent,elt) \ 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlElementAllowedHere((parent), (elt)->name) 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlRequiredAttrs: 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the attributes required for the specified element. 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define htmlRequiredAttrs(elt) (elt)->attrs_req 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef __cplusplus 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* LIBXML_HTML_ENABLED */ 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* __HTML_PARSER_H__ */ 305