libxml-encoding.html revision 93d95255e58d727fb69683646ec085d424bc34e7
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> 2<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>encoding</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css"> 3 .synopsis, .classsynopsis { 4 background: #eeeeee; 5 border: solid 1px #aaaaaa; 6 padding: 0.5em; 7 } 8 .programlisting { 9 background: #eeeeff; 10 border: solid 1px #aaaaff; 11 padding: 0.5em; 12 } 13 .variablelist { 14 padding: 4px; 15 margin-left: 3em; 16 } 17 .navigation { 18 background: #ffeeee; 19 border: solid 1px #ffaaaa; 20 margin-top: 0.5em; 21 margin-bottom: 0.5em; 22 } 23 .navigation a { 24 color: #770000; 25 } 26 .navigation a:visited { 27 color: #550000; 28 } 29 .navigation .title { 30 font-size: 200%; 31 } 32 </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-parserInternals.html" title="parserInternals"><link rel="next" href="libxml-hash.html" title="hash"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-parserInternals.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-hash.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-encoding"></a><div class="titlepage"></div><div class="refnamediv"><h2>encoding</h2><p>encoding — </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis"> 33 34 35 36enum <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a>; 37int (<a href="libxml-encoding.html#xmlCharEncodingInputFunc">*xmlCharEncodingInputFunc</a>) (unsigned char *out, 38 int *outlen, 39 unsigned char *in, 40 int *inlen); 41int (<a href="libxml-encoding.html#xmlCharEncodingOutputFunc">*xmlCharEncodingOutputFunc</a>) (unsigned char *out, 42 int *outlen, 43 unsigned char *in, 44 int *inlen); 45struct <a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a>; 46typedef <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a>; 47void <a href="libxml-encoding.html#xmlInitCharEncodingHandlers">xmlInitCharEncodingHandlers</a> (void); 48void <a href="libxml-encoding.html#xmlCleanupCharEncodingHandlers">xmlCleanupCharEncodingHandlers</a> (void); 49void <a href="libxml-encoding.html#xmlRegisterCharEncodingHandler">xmlRegisterCharEncodingHandler</a> (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler); 50<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlGetCharEncodingHandler">xmlGetCharEncodingHandler</a> 51 (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); 52<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlFindCharEncodingHandler">xmlFindCharEncodingHandler</a> 53 (const char *name); 54<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlNewCharEncodingHandler">xmlNewCharEncodingHandler</a> 55 (const char *name, 56 <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, 57 <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output); 58int <a href="libxml-encoding.html#xmlAddEncodingAlias">xmlAddEncodingAlias</a> (const char *name, 59 const char *alias); 60int <a href="libxml-encoding.html#xmlDelEncodingAlias">xmlDelEncodingAlias</a> (const char *alias); 61const char* <a href="libxml-encoding.html#xmlGetEncodingAlias">xmlGetEncodingAlias</a> (const char *alias); 62void <a href="libxml-encoding.html#xmlCleanupEncodingAliases">xmlCleanupEncodingAliases</a> (void); 63<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlParseCharEncoding">xmlParseCharEncoding</a> (const char *name); 64const char* <a href="libxml-encoding.html#xmlGetCharEncodingName">xmlGetCharEncodingName</a> (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); 65<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlDetectCharEncoding">xmlDetectCharEncoding</a> (unsigned char *in, 66 int len); 67int <a href="libxml-encoding.html#xmlCharEncOutFunc">xmlCharEncOutFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 68 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 69 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); 70int <a href="libxml-encoding.html#xmlCharEncInFunc">xmlCharEncInFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 71 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 72 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); 73int <a href="libxml-encoding.html#xmlCharEncFirstLine">xmlCharEncFirstLine</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 74 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 75 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); 76int <a href="libxml-encoding.html#xmlCharEncCloseFunc">xmlCharEncCloseFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler); 77int <a href="libxml-encoding.html#UTF8Toisolat1">UTF8Toisolat1</a> (unsigned char *out, 78 int *outlen, 79 unsigned char *in, 80 int *inlen); 81int <a href="libxml-encoding.html#isolat1ToUTF8">isolat1ToUTF8</a> (unsigned char *out, 82 int *outlen, 83 unsigned char *in, 84 int *inlen); 85int <a href="libxml-encoding.html#xmlGetUTF8Char">xmlGetUTF8Char</a> (unsigned char *utf, 86 int *len); 87int <a href="libxml-encoding.html#xmlCheckUTF8">xmlCheckUTF8</a> (unsigned char *utf); 88int <a href="libxml-encoding.html#xmlUTF8Strsize">xmlUTF8Strsize</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 89 int len); 90<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strndup">xmlUTF8Strndup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 91 int len); 92<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strpos">xmlUTF8Strpos</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 93 int pos); 94int <a href="libxml-encoding.html#xmlUTF8Strloc">xmlUTF8Strloc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 95 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar); 96<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strsub">xmlUTF8Strsub</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 97 int start, 98 int len); 99int <a href="libxml-encoding.html#xmlUTF8Strlen">xmlUTF8Strlen</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf); 100</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p> 101 102</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="xmlCharEncoding"></a>enum xmlCharEncoding</h3><pre class="programlisting">typedef enum { 103 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ 104 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ 105 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ 106 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ 107 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ 108 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ 109 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ 110 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ 111 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ 112 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ 113 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ 114 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ 115 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ 116 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ 117 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ 118 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ 119 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ 120 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ 121 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ 122 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ 123 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ 124 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ 125 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ 126 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ 127} xmlCharEncoding; 128</pre><p> 129Predefined values for some standard encodings. 130Libxml don't do beforehand translation on UTF8, ISOLatinX. 131It also support UTF16 (LE and BE) by default. 132</p><p> 133Anything else would have to be translated to UTF8 before being 134given to the parser itself. The BOM for UTF16 and the encoding 135declaration are looked at and a converter is looked for at that 136point. If not found the parser stops here as asked by the XML REC 137Converter can be registered by the user using xmlRegisterCharEncodingHandler 138but the current form doesn't allow stateful transcoding (a serious 139problem agreed !). If iconv has been found it will be used 140automatically and allow stateful transcoding, the simplest is then 141to be sure to enable icon and to provide iconv libs for the encoding 142support needed.</p><p> 143 144</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingInputFunc"></a>xmlCharEncodingInputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingInputFunc) (unsigned char *out, 145 int *outlen, 146 unsigned char *in, 147 int *inlen);</pre><p> 148Take a block of chars in the original encoding and try to convert 149it to an UTF-8 block of chars out.</p><p> 150 151</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 152</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> 153</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 154</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> 155</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 156 157 158</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingOutputFunc"></a>xmlCharEncodingOutputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingOutputFunc) (unsigned char *out, 159 int *outlen, 160 unsigned char *in, 161 int *inlen);</pre><p> 162Take a block of UTF-8 chars in and try to convert it to an other 163encoding. 164Note: a first call designed to produce heading info is called with 165in = NULL. If stateful this should also initialize the encoder state.</p><p> 166 167</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 168</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> 169</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 170</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> 171</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 172 173 174</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandler"></a>struct xmlCharEncodingHandler</h3><pre class="programlisting">struct xmlCharEncodingHandler { 175 char *name; 176 xmlCharEncodingInputFunc input; 177 xmlCharEncodingOutputFunc output; 178#ifdef LIBXML_ICONV_ENABLED 179 iconv_t iconv_in; 180 iconv_t iconv_out; 181#endif /* LIBXML_ICONV_ENABLED */ 182}; 183</pre><p> 184 185</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandlerPtr"></a>xmlCharEncodingHandlerPtr</h3><p> 186 187</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlInitCharEncodingHandlers"></a>xmlInitCharEncodingHandlers ()</h3><pre class="programlisting">void xmlInitCharEncodingHandlers (void);</pre><p> 188Initialize the char encoding support, it registers the default 189encoding supported. 190NOTE: while public, this function usually doesn't need to be called 191 in normal processing.</p><p> 192 193</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupCharEncodingHandlers"></a>xmlCleanupCharEncodingHandlers ()</h3><pre class="programlisting">void xmlCleanupCharEncodingHandlers (void);</pre><p> 194Cleanup the memory allocated for the char encoding support, it 195unregisters all the encoding handlers and the aliases.</p><p> 196 197</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlRegisterCharEncodingHandler"></a>xmlRegisterCharEncodingHandler ()</h3><pre class="programlisting">void xmlRegisterCharEncodingHandler (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);</pre><p> 198Register the char encoding handler, surprising, isn't it ?</p><p> 199 200</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> 201 202 203</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingHandler"></a>xmlGetCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlGetCharEncodingHandler 204 (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> 205Search in the registered set the handler able to read/write that encoding.</p><p> 206 207</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td> 208</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 209 210 211</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlFindCharEncodingHandler"></a>xmlFindCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlFindCharEncodingHandler 212 (const char *name);</pre><p> 213Search in the registered set the handler able to read/write that encoding.</p><p> 214 215</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> 216</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 217 218 219</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlNewCharEncodingHandler"></a>xmlNewCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlNewCharEncodingHandler 220 (const char *name, 221 <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, 222 <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);</pre><p> 223Create and registers an xmlCharEncodingHandler.</p><p> 224 225</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> 226</td></tr><tr><td><span class="term"><i><tt>input</tt></i>�:</span></td><td> 227</td></tr><tr><td><span class="term"><i><tt>output</tt></i>�:</span></td><td> 228</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 229 230 231</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlAddEncodingAlias"></a>xmlAddEncodingAlias ()</h3><pre class="programlisting">int xmlAddEncodingAlias (const char *name, 232 const char *alias);</pre><p> 233Registers and alias <i><tt>alias</tt></i> for an encoding named <i><tt>name</tt></i>. Existing alias 234will be overwritten.</p><p> 235 236</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> 237</td></tr><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td> 238</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 239 240 241</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDelEncodingAlias"></a>xmlDelEncodingAlias ()</h3><pre class="programlisting">int xmlDelEncodingAlias (const char *alias);</pre><p> 242Unregisters an encoding alias <i><tt>alias</tt></i></p><p> 243 244</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td> 245</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 246 247 248</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetEncodingAlias"></a>xmlGetEncodingAlias ()</h3><pre class="programlisting">const char* xmlGetEncodingAlias (const char *alias);</pre><p> 249Lookup an encoding name for the given alias.</p><p> 250 251</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td> 252</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 253 254 255</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupEncodingAliases"></a>xmlCleanupEncodingAliases ()</h3><pre class="programlisting">void xmlCleanupEncodingAliases (void);</pre><p> 256Unregisters all aliases</p><p> 257 258</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlParseCharEncoding"></a>xmlParseCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlParseCharEncoding (const char *name);</pre><p> 259Compare the string to the known encoding schemes already known. Note 260that the comparison is case insensitive accordingly to the section 261[XML] 4.3.3 Character Encoding in Entities.</p><p> 262 263</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> 264</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 265 266 267</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingName"></a>xmlGetCharEncodingName ()</h3><pre class="programlisting">const char* xmlGetCharEncodingName (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> 268The "canonical" name for XML encoding. 269C.f. http://www.w3.org/TR/REC-xml<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="charencoding">charencoding</GTKDOCLINK> 270Section 4.3.3 Character Encoding in Entities</p><p> 271 272</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td> 273</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 274 275 276</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDetectCharEncoding"></a>xmlDetectCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlDetectCharEncoding (unsigned char *in, 277 int len);</pre><p> 278Guess the encoding of the entity using the first bytes of the entity content 279accordingly of the non-normative appendix F of the XML-1.0 recommendation.</p><p> 280 281</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 282</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> 283</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 284 285 286</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncOutFunc"></a>xmlCharEncOutFunc ()</h3><pre class="programlisting">int xmlCharEncOutFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 287 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 288 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> 289Generic front-end for the encoding handler output function 290a first call with <i><tt>in</tt></i> == NULL has to be made firs to initiate the 291output in case of non-stateless encoding needing to initiate their 292state or the output (like the BOM in UTF16). 293In case of UTF8 sequence conversion errors for the given encoder, 294the content will be automatically remapped to a CharRef sequence.</p><p> 295 296</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> 297</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 298</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 299</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 300 301 302</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncInFunc"></a>xmlCharEncInFunc ()</h3><pre class="programlisting">int xmlCharEncInFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 303 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 304 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> 305Generic front-end for the encoding handler input function</p><p> 306 307</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> 308</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 309</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 310</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 311 312 313</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncFirstLine"></a>xmlCharEncFirstLine ()</h3><pre class="programlisting">int xmlCharEncFirstLine (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 314 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 315 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> 316Front-end for the encoding handler input function, but handle only 317the very first line, i.e. limit itself to 45 chars.</p><p> 318 319</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> 320</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 321</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 322</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 323 324 325</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncCloseFunc"></a>xmlCharEncCloseFunc ()</h3><pre class="programlisting">int xmlCharEncCloseFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);</pre><p> 326Generic front-end for encoding handler close function</p><p> 327 328</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> 329</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 330 331 332</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8Toisolat1"></a>UTF8Toisolat1 ()</h3><pre class="programlisting">int UTF8Toisolat1 (unsigned char *out, 333 int *outlen, 334 unsigned char *in, 335 int *inlen);</pre><p> 336Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 337block of chars out.</p><p> 338 339</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 340</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> 341</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 342</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> 343</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 344 345 346</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="isolat1ToUTF8"></a>isolat1ToUTF8 ()</h3><pre class="programlisting">int isolat1ToUTF8 (unsigned char *out, 347 int *outlen, 348 unsigned char *in, 349 int *inlen);</pre><p> 350Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 351block of chars out.</p><p> 352 353</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> 354</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> 355</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> 356</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> 357</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 358 359 360</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetUTF8Char"></a>xmlGetUTF8Char ()</h3><pre class="programlisting">int xmlGetUTF8Char (unsigned char *utf, 361 int *len);</pre><p> 362Read one UTF8 Char from <i><tt>utf</tt></i></p><p> 363 364</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 365</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> 366</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 367 368 369</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCheckUTF8"></a>xmlCheckUTF8 ()</h3><pre class="programlisting">int xmlCheckUTF8 (unsigned char *utf);</pre><p> 370Checks <i><tt>utf</tt></i> for being valid utf-8. <i><tt>utf</tt></i> is assumed to be 371null-terminated. This function is not super-strict, as it will 372allow longer utf-8 sequences than necessary. Note that Java is 373capable of producing these sequences if provoked. Also note, this 374routine checks for the 4-byte maximum size, but does not check for 3750x10ffff maximum value.</p><p> 376 377</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 378</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 379 380 381</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsize"></a>xmlUTF8Strsize ()</h3><pre class="programlisting">int xmlUTF8Strsize (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 382 int len);</pre><p> 383storage size of an UTF8 string</p><p> 384 385</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 386</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> 387</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 388 389 390</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strndup"></a>xmlUTF8Strndup ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strndup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 391 int len);</pre><p> 392a strndup for array of UTF8's</p><p> 393 394</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 395</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> 396</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 397 398 399</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strpos"></a>xmlUTF8Strpos ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strpos (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 400 int pos);</pre><p> 401a function to provide the equivalent of fetching a 402character from a string array</p><p> 403 404</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 405</td></tr><tr><td><span class="term"><i><tt>pos</tt></i>�:</span></td><td> 406</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 407 408 409</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strloc"></a>xmlUTF8Strloc ()</h3><pre class="programlisting">int xmlUTF8Strloc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 410 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);</pre><p> 411a function to provide relative location of a UTF8 char</p><p> 412 413</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 414</td></tr><tr><td><span class="term"><i><tt>utfchar</tt></i>�:</span></td><td> 415</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 416 417 418</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsub"></a>xmlUTF8Strsub ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strsub (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 419 int start, 420 int len);</pre><p> 421Note: positions are given in units of UTF-8 chars</p><p> 422 423</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 424</td></tr><tr><td><span class="term"><i><tt>start</tt></i>�:</span></td><td> 425</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> 426</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 427 428 429</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strlen"></a>xmlUTF8Strlen ()</h3><pre class="programlisting">int xmlUTF8Strlen (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p> 430compute the length of an UTF8 string, it doesn't do a full UTF8 431checking of the content of the string.</p><p> 432 433</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> 434</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> 435 436 437</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-parserInternals.html"><b><<�parserInternals</b></a></td><td align="right"><a accesskey="n" href="libxml-hash.html"><b>hash�>></b></a></td></tr></table></body></html> 438