libxml-HTMLparser.html revision d7cec926292b83b46af271285b0a305e99f66cd0
1<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css"> 2 .synopsis, .classsynopsis { 3 background: #eeeeee; 4 border: solid 1px #aaaaaa; 5 padding: 0.5em; 6 } 7 .programlisting { 8 background: #eeeeff; 9 border: solid 1px #aaaaff; 10 padding: 0.5em; 11 } 12 .variablelist { 13 padding: 4px; 14 margin-left: 3em; 15 } 16 .navigation { 17 background: #ffeeee; 18 border: solid 1px #ffaaaa; 19 margin-top: 0.5em; 20 margin-bottom: 0.5em; 21 } 22 .navigation a { 23 color: #770000; 24 } 25 .navigation a:visited { 26 color: #550000; 27 } 28 .navigation .title { 29 font-size: 200%; 30 } 31 </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-xmlerror.html" title="xmlerror"><link rel="next" href="libxml-HTMLtree.html" title="HTMLtree"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-xmlerror.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-HTMLtree.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-HTMLparser"></a><div class="titlepage"></div><div class="refnamediv"><h2>HTMLparser</h2><p>HTMLparser — </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis"> 32 33 34 35typedef <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>; 36typedef <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>; 37typedef <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>; 38typedef <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>; 39typedef <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>; 40typedef <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>; 41typedef <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>; 42typedef <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>; 43typedef <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>; 44struct <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>; 45typedef <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>; 46struct <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>; 47typedef <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>; 48const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag); 49const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name); 50const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value); 51int <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, 52 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem); 53int <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, 54 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name, 55 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem); 56const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, 57 <a href="libxml-tree.html#xmlChar">xmlChar</a> **str); 58int <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); 59void <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); 60<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a> 61 (const char *buffer, 62 int size); 63int <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); 64<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, 65 const char *encoding, 66 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, 67 void *userData); 68<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, 69 const char *encoding); 70<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a> (const char *filename, 71 const char *encoding, 72 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, 73 void *userData); 74<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a> (const char *filename, 75 const char *encoding); 76int <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a> (unsigned char *out, 77 int *outlen, 78 unsigned char *in, 79 int *inlen); 80int <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char *out, 81 int *outlen, 82 unsigned char *in, 83 int *inlen, 84 int quoteChar); 85int <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name); 86int <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val); 87void <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); 88<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, 89 void *user_data, 90 const char *chunk, 91 int size, 92 const char *filename, 93 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); 94int <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, 95 const char *chunk, 96 int size, 97 int terminate); 98</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p> 99 100</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><pre class="programlisting">typedef xmlParserCtxt htmlParserCtxt; 101</pre><p> 102 103</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><pre class="programlisting">typedef xmlParserCtxtPtr htmlParserCtxtPtr; 104</pre><p> 105 106</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><pre class="programlisting">typedef xmlParserNodeInfo htmlParserNodeInfo; 107</pre><p> 108 109</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><pre class="programlisting">typedef xmlSAXHandler htmlSAXHandler; 110</pre><p> 111 112</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><pre class="programlisting">typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; 113</pre><p> 114 115</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><pre class="programlisting">typedef xmlParserInput htmlParserInput; 116</pre><p> 117 118</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><pre class="programlisting">typedef xmlParserInputPtr htmlParserInputPtr; 119</pre><p> 120 121</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><pre class="programlisting">typedef xmlDocPtr htmlDocPtr; 122</pre><p> 123 124</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><pre class="programlisting">typedef xmlNodePtr htmlNodePtr; 125</pre><p> 126 127</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc { 128 const char *name; /* The tag name */ 129 char startTag; /* Whether the start tag can be implied */ 130 char endTag; /* Whether the end tag can be implied */ 131 char saveEndTag; /* Whether the end tag should be saved */ 132 char empty; /* Is this an empty element ? */ 133 char depr; /* Is this a deprecated element ? */ 134 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ 135 char isinline; /* is this a block 0 or inline 1 element */ 136 const char *desc; /* the description */ 137 138/* NRK Jan.2003 139 * New fields encapsulating HTML structure 140 * 141 * Bugs: 142 * This is a very limited representation. It fails to tell us when 143 * an element *requires* subelements (we only have whether they're 144 * allowed or not), and it doesn't tell us where CDATA and PCDATA 145 * are allowed. Some element relationships are not fully represented: 146 * these are flagged with the word MODIFIER 147 */ 148 const char** subelts; /* allowed sub-elements of this element */ 149 const char* defaultsubelt; /* subelement for suggested auto-repair 150 if necessary or NULL */ 151 const char** attrs_opt; /* Optional Attributes */ 152 const char** attrs_depr; /* Additional deprecated attributes */ 153 const char** attrs_req; /* Required attributes */ 154}; 155</pre><p> 156 157</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><pre class="programlisting">typedef htmlElemDesc *htmlElemDescPtr; 158</pre><p> 159 160</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc { 161 unsigned int value; /* the UNICODE value for the character */ 162 const char *name; /* The entity name */ 163 const char *desc; /* the description */ 164}; 165</pre><p> 166 167</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><pre class="programlisting">typedef htmlEntityDesc *htmlEntityDescPtr; 168</pre><p> 169 170</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p> 171Lookup the HTML tag in the ElementTable</p><p> 172 173</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>tag</tt></i>�:</span></td><td> The tag name in lowercase 174</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the related htmlElemDescPtr or NULL if not found. 175</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p> 176Lookup the given entity in EntitiesTable 177</p><p> 178TODO: the linear scan is really ugly, an hash table is really needed.</p><p> 179 180</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> the entity name 181</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise. 182</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p> 183Lookup the given entity in EntitiesTable 184</p><p> 185TODO: the linear scan is really ugly, an hash table is really needed.</p><p> 186 187</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>value</tt></i>�:</span></td><td> the entity's unicode value 188</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise. 189</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, 190 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p> 191The HTML DTD allows a tag to implicitly close other tags. 192The list is kept in htmlStartClose array. This function checks 193if a tag is autoclosed by one of it's child</p><p> 194 195</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>�:</span></td><td> the HTML document 196</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>�:</span></td><td> the HTML element 197</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 if autoclosed, 0 otherwise 198</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, 199 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name, 200 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p> 201The HTML DTD allows a tag to implicitly close other tags. 202The list is kept in htmlStartClose array. This function checks 203if the element or one of it's children would autoclose the 204given tag.</p><p> 205 206</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>�:</span></td><td> the HTML document 207</td></tr><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> The tag name 208</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>�:</span></td><td> the HTML element 209</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 if autoclose, 0 otherwise 210</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, 211 <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p> 212parse an HTML ENTITY references 213</p><p> 214[68] EntityRef ::= '&' Name ';'</p><p> 215 216</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td> an HTML parser context 217</td></tr><tr><td><span class="term"><i><tt>str</tt></i>�:</span></td><td> location to store the entity name 218</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, or NULL otherwise, 219 if non-NULL *str will have to be freed by the caller. 220</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> 221parse Reference declarations 222</p><p> 223[66] CharRef ::= '&#' [0-9]+ ';' | 224 '&<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p> 225 226</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td> an HTML parser context 227</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the value parsed (as an int) 228</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> 229parse an HTML element, this is highly recursive 230</p><p> 231[39] element ::= EmptyElemTag | STag content ETag 232</p><p> 233[41] Attribute ::= Name Eq AttValue</p><p> 234 235</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td> an HTML parser context 236</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt 237 (const char *buffer, 238 int size);</pre><p> 239Create a parser context for an HTML in-memory document.</p><p> 240 241</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>�:</span></td><td> a pointer to a char array 242</td></tr><tr><td><span class="term"><i><tt>size</tt></i>�:</span></td><td> the size of the array 243</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the new parser context or NULL 244</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> 245parse an HTML document (and build a tree if using the standard SAX 246interface).</p><p> 247 248</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td> an HTML parser context 249</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0, -1 in case of error. the parser context is augmented 250 as a result of the parsing. 251</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, 252 const char *encoding, 253 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, 254 void *userData);</pre><p> 255Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks 256to handle parse events. If sax is NULL, fallback to the default DOM 257behavior and return a tree.</p><p> 258 259</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>�:</span></td><td> a pointer to an array of xmlChar 260</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td> a free form C string describing the HTML document encoding, or NULL 261</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>�:</span></td><td> the SAX handler block 262</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>�:</span></td><td> if using SAX, this pointer will be provided on callbacks. 263</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is 264 not well formed. 265</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, 266 const char *encoding);</pre><p> 267parse an HTML in-memory document and build a tree.</p><p> 268 269</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>�:</span></td><td> a pointer to an array of xmlChar 270</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td> a free form C string describing the HTML document encoding, or NULL 271</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree 272</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char *filename, 273 const char *encoding, 274 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, 275 void *userData);</pre><p> 276parse an HTML file and build a tree. Automatic support for ZLIB/Compress 277compressed document is provided by default if found at compile-time. 278It use the given SAX function block to handle the parsing callback. 279If sax is NULL, fallback to the default DOM tree building routines.</p><p> 280 281</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>�:</span></td><td> the filename 282</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td> a free form C string describing the HTML document encoding, or NULL 283</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>�:</span></td><td> the SAX handler block 284</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>�:</span></td><td> if using SAX, this pointer will be provided on callbacks. 285</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is 286 not well formed. 287</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char *filename, 288 const char *encoding);</pre><p> 289parse an HTML file and build a tree. Automatic support for ZLIB/Compress 290compressed document is provided by default if found at compile-time.</p><p> 291 292</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>�:</span></td><td> the filename 293</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>�:</span></td><td> a free form C string describing the HTML document encoding, or NULL 294</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree 295</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char *out, 296 int *outlen, 297 unsigned char *in, 298 int *inlen);</pre><p> 299Take a block of UTF-8 chars in and try to convert it to an ASCII 300plus HTML entities block of chars out.</p><p> 301 302</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> a pointer to an array of bytes to store the result 303</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> the length of <i><tt>out</tt></i> 304</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to an array of UTF-8 chars 305</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> the length of <i><tt>in</tt></i> 306</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise 307The value of <i><tt>inlen</tt></i> after return is the number of octets consumed 308 as the return value is positive, else unpredictable. 309The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. 310</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char *out, 311 int *outlen, 312 unsigned char *in, 313 int *inlen, 314 int quoteChar);</pre><p> 315Take a block of UTF-8 chars in and try to convert it to an ASCII 316plus HTML entities block of chars out.</p><p> 317 318</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> a pointer to an array of bytes to store the result 319</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> the length of <i><tt>out</tt></i> 320</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to an array of UTF-8 chars 321</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> the length of <i><tt>in</tt></i> 322</td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i>�:</span></td><td> the quote character to escape (' or ") or zero. 323</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise 324The value of <i><tt>inlen</tt></i> after return is the number of octets consumed 325 as the return value is positive, else unpredictable. 326The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. 327</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p> 328Check if an attribute is of content type Script</p><p> 329 330</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> an attribute name 331</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 is the attribute is a script 0 otherwise 332</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val);</pre><p> 333Set and return the previous value for handling HTML omitted tags.</p><p> 334 335</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>val</tt></i>�:</span></td><td> int 0 or 1 336</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the last value for 0 for no handling, 1 for auto insertion. 337</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> 338Free all the memory used by a parser context. However the parsed 339document in ctxt->myDoc is not freed.</p><p> 340 341</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td> an HTML parser context 342</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, 343 void *user_data, 344 const char *chunk, 345 int size, 346 const char *filename, 347 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> 348Create a parser context for using the HTML parser in push mode 349The value of <i><tt>filename</tt></i> is used for fetching external entities 350and error/warning reports.</p><p> 351 352</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>sax</tt></i>�:</span></td><td> a SAX handler 353</td></tr><tr><td><span class="term"><i><tt>user_data</tt></i>�:</span></td><td> The user data returned on SAX callbacks 354</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>�:</span></td><td> a pointer to an array of chars 355</td></tr><tr><td><span class="term"><i><tt>size</tt></i>�:</span></td><td> number of chars in the array 356</td></tr><tr><td><span class="term"><i><tt>filename</tt></i>�:</span></td><td> an optional file name or URI 357</td></tr><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td> an optional encoding 358</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the new parser context or NULL 359</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, 360 const char *chunk, 361 int size, 362 int terminate);</pre><p> 363Parse a Chunk of memory</p><p> 364 365</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>�:</span></td><td> an XML parser context 366</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>�:</span></td><td> an char array 367</td></tr><tr><td><span class="term"><i><tt>size</tt></i>�:</span></td><td> the size in byte of the chunk 368</td></tr><tr><td><span class="term"><i><tt>terminate</tt></i>�:</span></td><td> last chunk indicator 369</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>zero if no error, the xmlParserErrors otherwise. 370</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-xmlerror.html"><b><<�xmlerror</b></a></td><td align="right"><a accesskey="n" href="libxml-HTMLtree.html"><b>HTMLtree�>></b></a></td></tr></table></body></html> 371