libxml-HTMLparser.html revision d7cec926292b83b46af271285b0a305e99f66cd0
1<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css">
2        .synopsis, .classsynopsis {
3            background: #eeeeee;
4            border: solid 1px #aaaaaa;
5            padding: 0.5em;
6        }
7        .programlisting {
8            background: #eeeeff;
9            border: solid 1px #aaaaff;
10            padding: 0.5em;
11        }
12        .variablelist {
13            padding: 4px;
14            margin-left: 3em;
15        }
16        .navigation {
17            background: #ffeeee;
18            border: solid 1px #ffaaaa;
19            margin-top: 0.5em;
20            margin-bottom: 0.5em;
21        }
22        .navigation a {
23            color: #770000;
24        }
25        .navigation a:visited {
26            color: #550000;
27        }
28        .navigation .title {
29            font-size: 200%;
30        }
31      </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-xmlerror.html" title="xmlerror"><link rel="next" href="libxml-HTMLtree.html" title="HTMLtree"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-xmlerror.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-HTMLtree.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-HTMLparser"></a><div class="titlepage"></div><div class="refnamediv"><h2>HTMLparser</h2><p>HTMLparser &#8212; </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis">
32
33
34
35typedef     <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>;
36typedef     <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>;
37typedef     <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>;
38typedef     <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>;
39typedef     <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>;
40typedef     <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>;
41typedef     <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>;
42typedef     <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>;
43typedef     <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>;
44struct      <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>;
45typedef     <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>;
46struct      <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>;
47typedef     <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>;
48const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a>           (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);
49const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a>      (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
50const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value);
51int         <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a>                (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
52                                             <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
53int         <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a>                (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
54                                             const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
55                                             <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
56const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a>    (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
57                                             <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);
58int         <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a>                (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
59void        <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a>                (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
60<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a>
61                                            (const char *buffer,
62                                             int size);
63int         <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a>               (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
64<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a>                 (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
65                                             const char *encoding,
66                                             <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
67                                             void *userData);
68<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a>                    (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
69                                             const char *encoding);
70<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a>                (const char *filename,
71                                             const char *encoding,
72                                             <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
73                                             void *userData);
74<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a>                   (const char *filename,
75                                             const char *encoding);
76int         <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a>                      (unsigned char *out,
77                                             int *outlen,
78                                             unsigned char *in,
79                                             int *inlen);
80int         <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a>              (unsigned char *out,
81                                             int *outlen,
82                                             unsigned char *in,
83                                             int *inlen,
84                                             int quoteChar);
85int         <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a>           (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
86int         <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a>           (int val);
87void        <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a>              (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
88<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a>  (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
89                                             void *user_data,
90                                             const char *chunk,
91                                             int size,
92                                             const char *filename,
93                                             <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
94int         <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a>                  (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
95                                             const char *chunk,
96                                             int size,
97                                             int terminate);
98</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p>
99
100</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><pre class="programlisting">typedef xmlParserCtxt htmlParserCtxt;
101</pre><p>
102
103</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><pre class="programlisting">typedef xmlParserCtxtPtr htmlParserCtxtPtr;
104</pre><p>
105
106</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><pre class="programlisting">typedef xmlParserNodeInfo htmlParserNodeInfo;
107</pre><p>
108
109</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><pre class="programlisting">typedef xmlSAXHandler htmlSAXHandler;
110</pre><p>
111
112</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><pre class="programlisting">typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
113</pre><p>
114
115</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><pre class="programlisting">typedef xmlParserInput htmlParserInput;
116</pre><p>
117
118</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><pre class="programlisting">typedef xmlParserInputPtr htmlParserInputPtr;
119</pre><p>
120
121</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><pre class="programlisting">typedef xmlDocPtr htmlDocPtr;
122</pre><p>
123
124</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><pre class="programlisting">typedef xmlNodePtr htmlNodePtr;
125</pre><p>
126
127</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc {
128    const char *name;	/* The tag name */
129    char startTag;      /* Whether the start tag can be implied */
130    char endTag;        /* Whether the end tag can be implied */
131    char saveEndTag;    /* Whether the end tag should be saved */
132    char empty;         /* Is this an empty element ? */
133    char depr;          /* Is this a deprecated element ? */
134    char dtd;           /* 1: only in Loose DTD, 2: only Frameset one */
135    char isinline;      /* is this a block 0 or inline 1 element */
136    const char *desc;   /* the description */
137
138/* NRK Jan.2003
139 * New fields encapsulating HTML structure
140 *
141 * Bugs:
142 *	This is a very limited representation.  It fails to tell us when
143 *	an element *requires* subelements (we only have whether they're
144 *	allowed or not), and it doesn't tell us where CDATA and PCDATA
145 *	are allowed.  Some element relationships are not fully represented:
146 *	these are flagged with the word MODIFIER
147 */
148    const char** subelts;		/* allowed sub-elements of this element */
149    const char* defaultsubelt;	/* subelement for suggested auto-repair
150					   if necessary or NULL */
151    const char** attrs_opt;		/* Optional Attributes */
152    const char** attrs_depr;		/* Additional deprecated attributes */
153    const char** attrs_req;		/* Required attributes */
154};
155</pre><p>
156
157</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><pre class="programlisting">typedef htmlElemDesc *htmlElemDescPtr;
158</pre><p>
159
160</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc {
161    unsigned int value;	/* the UNICODE value for the character */
162    const char *name;	/* The entity name */
163    const char *desc;   /* the description */
164};
165</pre><p>
166
167</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><pre class="programlisting">typedef htmlEntityDesc *htmlEntityDescPtr;
168</pre><p>
169
170</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup           (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p>
171Lookup the HTML tag in the ElementTable</p><p>
172
173</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>tag</tt></i>�:</span></td><td>  The tag name in lowercase
174</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the related htmlElemDescPtr or NULL if not found.
175</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup      (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
176Lookup the given entity in EntitiesTable
177</p><p>
178TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
179
180</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> the entity name
181</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise.
182</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p>
183Lookup the given entity in EntitiesTable
184</p><p>
185TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
186
187</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>value</tt></i>�:</span></td><td> the entity's unicode value
188</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise.
189</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int         htmlIsAutoClosed                (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
190                                             <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
191The HTML DTD allows a tag to implicitly close other tags.
192The list is kept in htmlStartClose array. This function checks
193if a tag is autoclosed by one of it's child</p><p>
194
195</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>�:</span></td><td>  the HTML document
196</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>�:</span></td><td>  the HTML element
197</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 if autoclosed, 0 otherwise
198</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int         htmlAutoCloseTag                (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
199                                             const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
200                                             <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
201The HTML DTD allows a tag to implicitly close other tags.
202The list is kept in htmlStartClose array. This function checks
203if the element or one of it's children would autoclose the
204given tag.</p><p>
205
206</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>�:</span></td><td>  the HTML document
207</td></tr><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td>  The tag name
208</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>�:</span></td><td>  the HTML element
209</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 if autoclose, 0 otherwise
210</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef    (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
211                                             <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p>
212parse an HTML ENTITY references
213</p><p>
214[68] EntityRef ::= '&amp;' Name ';'</p><p>
215
216</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td>  an HTML parser context
217</td></tr><tr><td><span class="term"><i><tt>str</tt></i>�:</span></td><td>  location to store the entity name
218</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, or NULL otherwise,
219        if non-NULL *str will have to be freed by the caller.
220</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int         htmlParseCharRef                (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
221parse Reference declarations
222</p><p>
223[66] CharRef ::= '&amp;#' [0-9]+ ';' |
224                 '&amp;<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p>
225
226</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td>  an HTML parser context
227</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the value parsed (as an int)
228</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void        htmlParseElement                (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
229parse an HTML element, this is highly recursive
230</p><p>
231[39] element ::= EmptyElemTag | STag content ETag
232</p><p>
233[41] Attribute ::= Name Eq AttValue</p><p>
234
235</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td>  an HTML parser context
236</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt
237                                            (const char *buffer,
238                                             int size);</pre><p>
239Create a parser context for an HTML in-memory document.</p><p>
240
241</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>�:</span></td><td>  a pointer to a char array
242</td></tr><tr><td><span class="term"><i><tt>size</tt></i>�:</span></td><td>  the size of the array
243</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the new parser context or NULL
244</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int         htmlParseDocument               (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
245parse an HTML document (and build a tree if using the standard SAX
246interface).</p><p>
247
248</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td>  an HTML parser context
249</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0, -1 in case of error. the parser context is augmented
250               as a result of the parsing.
251</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  htmlSAXParseDoc                 (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
252                                             const char *encoding,
253                                             <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
254                                             void *userData);</pre><p>
255Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
256to handle parse events. If sax is NULL, fallback to the default DOM
257behavior and return a tree.</p><p>
258
259</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>�:</span></td><td>  a pointer to an array of xmlChar
260</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td>  a free form C string describing the HTML document encoding, or NULL
261</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>�:</span></td><td>  the SAX handler block
262</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>�:</span></td><td> if using SAX, this pointer will be provided on callbacks. 
263</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is
264    not well formed.
265</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  htmlParseDoc                    (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
266                                             const char *encoding);</pre><p>
267parse an HTML in-memory document and build a tree.</p><p>
268
269</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>�:</span></td><td>  a pointer to an array of xmlChar
270</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td>  a free form C string describing the HTML document encoding, or NULL
271</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree
272</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  htmlSAXParseFile                (const char *filename,
273                                             const char *encoding,
274                                             <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
275                                             void *userData);</pre><p>
276parse an HTML file and build a tree. Automatic support for ZLIB/Compress
277compressed document is provided by default if found at compile-time.
278It use the given SAX function block to handle the parsing callback.
279If sax is NULL, fallback to the default DOM tree building routines.</p><p>
280
281</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>�:</span></td><td>  the filename
282</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td>  a free form C string describing the HTML document encoding, or NULL
283</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>�:</span></td><td>  the SAX handler block
284</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>�:</span></td><td> if using SAX, this pointer will be provided on callbacks. 
285</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is
286    not well formed.
287</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>  htmlParseFile                   (const char *filename,
288                                             const char *encoding);</pre><p>
289parse an HTML file and build a tree. Automatic support for ZLIB/Compress
290compressed document is provided by default if found at compile-time.</p><p>
291
292</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>�:</span></td><td>  the filename
293</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td>  a free form C string describing the HTML document encoding, or NULL
294</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree
295</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int         UTF8ToHtml                      (unsigned char *out,
296                                             int *outlen,
297                                             unsigned char *in,
298                                             int *inlen);</pre><p>
299Take a block of UTF-8 chars in and try to convert it to an ASCII
300plus HTML entities block of chars out.</p><p>
301
302</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  a pointer to an array of bytes to store the result
303</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td>  the length of <i><tt>out</tt></i>
304</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to an array of UTF-8 chars
305</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td>  the length of <i><tt>in</tt></i>
306</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
307The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
308    as the return value is positive, else unpredictable.
309The value of <i><tt>outlen</tt></i> after return is the number of octets consumed.
310</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int         htmlEncodeEntities              (unsigned char *out,
311                                             int *outlen,
312                                             unsigned char *in,
313                                             int *inlen,
314                                             int quoteChar);</pre><p>
315Take a block of UTF-8 chars in and try to convert it to an ASCII
316plus HTML entities block of chars out.</p><p>
317
318</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  a pointer to an array of bytes to store the result
319</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td>  the length of <i><tt>out</tt></i>
320</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to an array of UTF-8 chars
321</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td>  the length of <i><tt>in</tt></i>
322</td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i>�:</span></td><td> the quote character to escape (' or &quot;) or zero.
323</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
324The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
325    as the return value is positive, else unpredictable.
326The value of <i><tt>outlen</tt></i> after return is the number of octets consumed.
327</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int         htmlIsScriptAttribute           (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
328Check if an attribute is of content type Script</p><p>
329
330</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td>  an attribute name
331</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 is the attribute is a script 0 otherwise
332</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int         htmlHandleOmittedElem           (int val);</pre><p>
333Set and return the previous value for handling HTML omitted tags.</p><p>
334
335</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>val</tt></i>�:</span></td><td>  int 0 or 1 
336</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the last value for 0 for no handling, 1 for auto insertion.
337</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void        htmlFreeParserCtxt              (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
338Free all the memory used by a parser context. However the parsed
339document in ctxt-&gt;myDoc is not freed.</p><p>
340
341</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td>  an HTML parser context
342</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt  (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
343                                             void *user_data,
344                                             const char *chunk,
345                                             int size,
346                                             const char *filename,
347                                             <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
348Create a parser context for using the HTML parser in push mode
349The value of <i><tt>filename</tt></i> is used for fetching external entities
350and error/warning reports.</p><p>
351
352</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>sax</tt></i>�:</span></td><td>  a SAX handler
353</td></tr><tr><td><span class="term"><i><tt>user_data</tt></i>�:</span></td><td>  The user data returned on SAX callbacks
354</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>�:</span></td><td>  a pointer to an array of chars
355</td></tr><tr><td><span class="term"><i><tt>size</tt></i>�:</span></td><td>  number of chars in the array
356</td></tr><tr><td><span class="term"><i><tt>filename</tt></i>�:</span></td><td>  an optional file name or URI
357</td></tr><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td>  an optional encoding
358</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the new parser context or NULL
359</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int         htmlParseChunk                  (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
360                                             const char *chunk,
361                                             int size,
362                                             int terminate);</pre><p>
363Parse a Chunk of memory</p><p>
364
365</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td>  an XML parser context
366</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>�:</span></td><td>  an char array
367</td></tr><tr><td><span class="term"><i><tt>size</tt></i>�:</span></td><td>  the size in byte of the chunk
368</td></tr><tr><td><span class="term"><i><tt>terminate</tt></i>�:</span></td><td>  last chunk indicator
369</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>zero if no error, the xmlParserErrors otherwise.
370</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-xmlerror.html"><b>&lt;&lt;�xmlerror</b></a></td><td align="right"><a accesskey="n" href="libxml-HTMLtree.html"><b>HTMLtree�&gt;&gt;</b></a></td></tr></table></body></html>
371