libxml-encoding.html revision cfba2fe0bbbdd1158666fc1090c7f48c0fdb00e1
1<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>encoding</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css"> 2 .synopsis, .classsynopsis { 3 background: #eeeeee; 4 border: solid 1px #aaaaaa; 5 padding: 0.5em; 6 } 7 .programlisting { 8 background: #eeeeff; 9 border: solid 1px #aaaaff; 10 padding: 0.5em; 11 } 12 .variablelist { 13 padding: 4px; 14 margin-left: 3em; 15 } 16 .navigation { 17 background: #ffeeee; 18 border: solid 1px #ffaaaa; 19 margin-top: 0.5em; 20 margin-bottom: 0.5em; 21 } 22 .navigation a { 23 color: #770000; 24 } 25 .navigation a:visited { 26 color: #550000; 27 } 28 .navigation .title { 29 font-size: 200%; 30 } 31 </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-parserInternals.html" title="parserInternals"><link rel="next" href="libxml-hash.html" title="hash"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-parserInternals.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-hash.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-encoding"></a><div class="titlepage"></div><div class="refnamediv"><h2>encoding</h2><p>encoding — </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis"> 32 33 34 35enum <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a>; 36int (<a href="libxml-encoding.html#xmlCharEncodingInputFunc">*xmlCharEncodingInputFunc</a>) (unsigned char *out, 37 int *outlen, 38 unsigned char *in, 39 int *inlen); 40int (<a href="libxml-encoding.html#xmlCharEncodingOutputFunc">*xmlCharEncodingOutputFunc</a>) (unsigned char *out, 41 int *outlen, 42 unsigned char *in, 43 int *inlen); 44struct <a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a>; 45typedef <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a>; 46void <a href="libxml-encoding.html#xmlInitCharEncodingHandlers">xmlInitCharEncodingHandlers</a> (void); 47void <a href="libxml-encoding.html#xmlCleanupCharEncodingHandlers">xmlCleanupCharEncodingHandlers</a> (void); 48void <a href="libxml-encoding.html#xmlRegisterCharEncodingHandler">xmlRegisterCharEncodingHandler</a> (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler); 49<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlGetCharEncodingHandler">xmlGetCharEncodingHandler</a> 50 (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); 51<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlFindCharEncodingHandler">xmlFindCharEncodingHandler</a> 52 (const char *name); 53<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlNewCharEncodingHandler">xmlNewCharEncodingHandler</a> 54 (const char *name, 55 <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, 56 <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output); 57int <a href="libxml-encoding.html#xmlAddEncodingAlias">xmlAddEncodingAlias</a> (const char *name, 58 const char *alias); 59int <a href="libxml-encoding.html#xmlDelEncodingAlias">xmlDelEncodingAlias</a> (const char *alias); 60const char* <a href="libxml-encoding.html#xmlGetEncodingAlias">xmlGetEncodingAlias</a> (const char *alias); 61void <a href="libxml-encoding.html#xmlCleanupEncodingAliases">xmlCleanupEncodingAliases</a> (void); 62<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlParseCharEncoding">xmlParseCharEncoding</a> (const char *name); 63const char* <a href="libxml-encoding.html#xmlGetCharEncodingName">xmlGetCharEncodingName</a> (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); 64<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlDetectCharEncoding">xmlDetectCharEncoding</a> (unsigned char *in, 65 int len); 66int <a href="libxml-encoding.html#xmlCharEncOutFunc">xmlCharEncOutFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 67 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 68 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); 69int <a href="libxml-encoding.html#xmlCharEncInFunc">xmlCharEncInFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 70 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 71 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); 72int <a href="libxml-encoding.html#xmlCharEncFirstLine">xmlCharEncFirstLine</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 73 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 74 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); 75int <a href="libxml-encoding.html#xmlCharEncCloseFunc">xmlCharEncCloseFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler); 76int <a href="libxml-encoding.html#UTF8Toisolat1">UTF8Toisolat1</a> (unsigned char *out, 77 int *outlen, 78 unsigned char *in, 79 int *inlen); 80int <a href="libxml-encoding.html#isolat1ToUTF8">isolat1ToUTF8</a> (unsigned char *out, 81 int *outlen, 82 unsigned char *in, 83 int *inlen); 84int <a href="libxml-encoding.html#xmlGetUTF8Char">xmlGetUTF8Char</a> (unsigned char *utf, 85 int *len); 86int <a href="libxml-encoding.html#xmlCheckUTF8">xmlCheckUTF8</a> (unsigned char *utf); 87int <a href="libxml-encoding.html#xmlUTF8Strsize">xmlUTF8Strsize</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 88 int len); 89<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strndup">xmlUTF8Strndup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 90 int len); 91<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strpos">xmlUTF8Strpos</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 92 int pos); 93int <a href="libxml-encoding.html#xmlUTF8Strloc">xmlUTF8Strloc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 94 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar); 95<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strsub">xmlUTF8Strsub</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 96 int start, 97 int len); 98int <a href="libxml-encoding.html#xmlUTF8Strlen">xmlUTF8Strlen</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf); 99int <a href="libxml-encoding.html#xmlUTF8Size">xmlUTF8Size</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf); 100int <a href="libxml-encoding.html#xmlUTF8Charcmp">xmlUTF8Charcmp</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1, 101 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2); 102</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p> 103 104</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="xmlCharEncoding"></a>enum xmlCharEncoding</h3><pre class="programlisting">typedef enum { 105 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ 106 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ 107 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ 108 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ 109 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ 110 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ 111 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ 112 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ 113 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ 114 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ 115 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ 116 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ 117 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ 118 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ 119 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ 120 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ 121 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ 122 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ 123 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ 124 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ 125 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ 126 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ 127 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ 128 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ 129} xmlCharEncoding; 130</pre><p> 131Predefined values for some standard encodings. 132Libxml don't do beforehand translation on UTF8, ISOLatinX. 133It also support UTF16 (LE and BE) by default. 134</p><p> 135Anything else would have to be translated to UTF8 before being 136given to the parser itself. The BOM for UTF16 and the encoding 137declaration are looked at and a converter is looked for at that 138point. If not found the parser stops here as asked by the XML REC 139Converter can be registered by the user using xmlRegisterCharEncodingHandler 140but the current form doesn't allow stateful transcoding (a serious 141problem agreed !). If iconv has been found it will be used 142automatically and allow stateful transcoding, the simplest is then 143to be sure to enable icon and to provide iconv libs for the encoding 144support needed.</p><p> 145 146</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingInputFunc"></a>xmlCharEncodingInputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingInputFunc) (unsigned char *out, 147 int *outlen, 148 unsigned char *in, 149 int *inlen);</pre><p> 150Take a block of chars in the original encoding and try to convert 151it to an UTF-8 block of chars out.</p><p> 152 153</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> a pointer to an array of bytes to store the UTF-8 result 154</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> the length of <i><tt>out</tt></i> 155</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to an array of chars in the original encoding 156</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> the length of <i><tt>in</tt></i> 157</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2 158 if the transcoding failed. 159The value of <i><tt>inlen</tt></i> after return is the number of octets consumed 160 as the return value is positive, else unpredictiable. 161The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. 162</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingOutputFunc"></a>xmlCharEncodingOutputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingOutputFunc) (unsigned char *out, 163 int *outlen, 164 unsigned char *in, 165 int *inlen);</pre><p> 166Take a block of UTF-8 chars in and try to convert it to an other 167encoding. 168Note: a first call designed to produce heading info is called with 169in = NULL. If stateful this should also initialize the encoder state.</p><p> 170 171</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> a pointer to an array of bytes to store the result 172</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> the length of <i><tt>out</tt></i> 173</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to an array of UTF-8 chars 174</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> the length of <i><tt>in</tt></i> 175</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2 176 if the transcoding failed. 177The value of <i><tt>inlen</tt></i> after return is the number of octets consumed 178 as the return value is positive, else unpredictiable. 179The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. 180</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandler"></a>struct xmlCharEncodingHandler</h3><pre class="programlisting">struct xmlCharEncodingHandler { 181 char *name; 182 xmlCharEncodingInputFunc input; 183 xmlCharEncodingOutputFunc output; 184#ifdef LIBXML_ICONV_ENABLED 185 iconv_t iconv_in; 186 iconv_t iconv_out; 187#endif /* LIBXML_ICONV_ENABLED */ 188}; 189</pre><p> 190 191</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandlerPtr"></a>xmlCharEncodingHandlerPtr</h3><pre class="programlisting">typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; 192</pre><p> 193 194</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlInitCharEncodingHandlers"></a>xmlInitCharEncodingHandlers ()</h3><pre class="programlisting">void xmlInitCharEncodingHandlers (void);</pre><p> 195Initialize the char encoding support, it registers the default 196encoding supported. 197NOTE: while public, this function usually doesn't need to be called 198 in normal processing.</p><p> 199 200</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupCharEncodingHandlers"></a>xmlCleanupCharEncodingHandlers ()</h3><pre class="programlisting">void xmlCleanupCharEncodingHandlers (void);</pre><p> 201Cleanup the memory allocated for the char encoding support, it 202unregisters all the encoding handlers and the aliases.</p><p> 203 204</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlRegisterCharEncodingHandler"></a>xmlRegisterCharEncodingHandler ()</h3><pre class="programlisting">void xmlRegisterCharEncodingHandler (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);</pre><p> 205Register the char encoding handler, surprising, isn't it ?</p><p> 206 207</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> the xmlCharEncodingHandlerPtr handler block 208</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingHandler"></a>xmlGetCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlGetCharEncodingHandler 209 (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> 210Search in the registered set the handler able to read/write that encoding.</p><p> 211 212</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td> an xmlCharEncoding value. 213</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found 214</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlFindCharEncodingHandler"></a>xmlFindCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlFindCharEncodingHandler 215 (const char *name);</pre><p> 216Search in the registered set the handler able to read/write that encoding.</p><p> 217 218</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> a string describing the char encoding. 219</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found 220</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlNewCharEncodingHandler"></a>xmlNewCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlNewCharEncodingHandler 221 (const char *name, 222 <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, 223 <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);</pre><p> 224Create and registers an xmlCharEncodingHandler.</p><p> 225 226</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> the encoding name, in UTF-8 format (ASCII actually) 227</td></tr><tr><td><span class="term"><i><tt>input</tt></i>�:</span></td><td> the xmlCharEncodingInputFunc to read that encoding 228</td></tr><tr><td><span class="term"><i><tt>output</tt></i>�:</span></td><td> the xmlCharEncodingOutputFunc to write that encoding 229</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the xmlCharEncodingHandlerPtr created (or NULL in case of error). 230</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlAddEncodingAlias"></a>xmlAddEncodingAlias ()</h3><pre class="programlisting">int xmlAddEncodingAlias (const char *name, 231 const char *alias);</pre><p> 232Registers and alias <i><tt>alias</tt></i> for an encoding named <i><tt>name</tt></i>. Existing alias 233will be overwritten.</p><p> 234 235</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually) 236</td></tr><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) 237</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error 238</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDelEncodingAlias"></a>xmlDelEncodingAlias ()</h3><pre class="programlisting">int xmlDelEncodingAlias (const char *alias);</pre><p> 239Unregisters an encoding alias <i><tt>alias</tt></i></p><p> 240 241</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) 242</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error 243</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetEncodingAlias"></a>xmlGetEncodingAlias ()</h3><pre class="programlisting">const char* xmlGetEncodingAlias (const char *alias);</pre><p> 244Lookup an encoding name for the given alias.</p><p> 245 246</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) 247</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>NULL if not found the original name otherwise 248</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupEncodingAliases"></a>xmlCleanupEncodingAliases ()</h3><pre class="programlisting">void xmlCleanupEncodingAliases (void);</pre><p> 249Unregisters all aliases</p><p> 250 251</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlParseCharEncoding"></a>xmlParseCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlParseCharEncoding (const char *name);</pre><p> 252Compare the string to the known encoding schemes already known. Note 253that the comparison is case insensitive accordingly to the section 254[XML] 4.3.3 Character Encoding in Entities.</p><p> 255 256</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually) 257</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 258if not recognized. 259</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingName"></a>xmlGetCharEncodingName ()</h3><pre class="programlisting">const char* xmlGetCharEncodingName (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> 260The "canonical" name for XML encoding. 261C.f. http://www.w3.org/TR/REC-xml<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="charencoding">charencoding</GTKDOCLINK> 262Section 4.3.3 Character Encoding in Entities</p><p> 263 264</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td> the encoding 265</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the canonical name for the given encoding 266</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDetectCharEncoding"></a>xmlDetectCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlDetectCharEncoding (unsigned char *in, 267 int len);</pre><p> 268Guess the encoding of the entity using the first bytes of the entity content 269accordingly of the non-normative appendix F of the XML-1.0 recommendation.</p><p> 270 271</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to the first bytes of the XML entity, must be at least 272 4 bytes long. 273</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> pointer to the length of the buffer 274</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values. 275</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncOutFunc"></a>xmlCharEncOutFunc ()</h3><pre class="programlisting">int xmlCharEncOutFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 276 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 277 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> 278Generic front-end for the encoding handler output function 279a first call with <i><tt>in</tt></i> == NULL has to be made firs to initiate the 280output in case of non-stateless encoding needing to initiate their 281state or the output (like the BOM in UTF16). 282In case of UTF8 sequence conversion errors for the given encoder, 283the content will be automatically remapped to a CharRef sequence.</p><p> 284 285</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> char enconding transformation data structure 286</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> an xmlBuffer for the output. 287</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> an xmlBuffer for the input 288</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or 289 -1 general error 290 -2 if the transcoding fails (for *in is not valid utf8 string or 291 the result of transformation can't fit into the encoding we want), or 292</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncInFunc"></a>xmlCharEncInFunc ()</h3><pre class="programlisting">int xmlCharEncInFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 293 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 294 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> 295Generic front-end for the encoding handler input function</p><p> 296 297</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> char encoding transformation data structure 298</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> an xmlBuffer for the output. 299</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> an xmlBuffer for the input 300</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or 301 -1 general error 302 -2 if the transcoding fails (for *in is not valid utf8 string or 303 the result of transformation can't fit into the encoding we want), or 304</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncFirstLine"></a>xmlCharEncFirstLine ()</h3><pre class="programlisting">int xmlCharEncFirstLine (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, 305 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, 306 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> 307Front-end for the encoding handler input function, but handle only 308the very first line, i.e. limit itself to 45 chars.</p><p> 309 310</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> char enconding transformation data structure 311</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> an xmlBuffer for the output. 312</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> an xmlBuffer for the input 313</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or 314 -1 general error 315 -2 if the transcoding fails (for *in is not valid utf8 string or 316 the result of transformation can't fit into the encoding we want), or 317</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncCloseFunc"></a>xmlCharEncCloseFunc ()</h3><pre class="programlisting">int xmlCharEncCloseFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);</pre><p> 318Generic front-end for encoding handler close function</p><p> 319 320</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td> char enconding transformation data structure 321</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 in case of error 322</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8Toisolat1"></a>UTF8Toisolat1 ()</h3><pre class="programlisting">int UTF8Toisolat1 (unsigned char *out, 323 int *outlen, 324 unsigned char *in, 325 int *inlen);</pre><p> 326Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 327block of chars out.</p><p> 328 329</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> a pointer to an array of bytes to store the result 330</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> the length of <i><tt>out</tt></i> 331</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to an array of UTF-8 chars 332</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> the length of <i><tt>in</tt></i> 333</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise 334The value of <i><tt>inlen</tt></i> after return is the number of octets consumed 335 as the return value is positive, else unpredictable. 336The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. 337</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="isolat1ToUTF8"></a>isolat1ToUTF8 ()</h3><pre class="programlisting">int isolat1ToUTF8 (unsigned char *out, 338 int *outlen, 339 unsigned char *in, 340 int *inlen);</pre><p> 341Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 342block of chars out.</p><p> 343 344</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td> a pointer to an array of bytes to store the result 345</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td> the length of <i><tt>out</tt></i> 346</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td> a pointer to an array of ISO Latin 1 chars 347</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td> the length of <i><tt>in</tt></i> 348</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 otherwise 349The value of <i><tt>inlen</tt></i> after return is the number of octets consumed 350 as the return value is positive, else unpredictable. 351The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. 352</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetUTF8Char"></a>xmlGetUTF8Char ()</h3><pre class="programlisting">int xmlGetUTF8Char (unsigned char *utf, 353 int *len);</pre><p> 354Read one UTF8 Char from <i><tt>utf</tt></i></p><p> 355 356</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> a sequence of UTF-8 encoded bytes 357</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> a pointer to <i><tt>bytes</tt></i> len 358</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the char value or -1 in case of error and update <i><tt>len</tt></i> with the 359 number of bytes used 360</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCheckUTF8"></a>xmlCheckUTF8 ()</h3><pre class="programlisting">int xmlCheckUTF8 (unsigned char *utf);</pre><p> 361Checks <i><tt>utf</tt></i> for being valid utf-8. <i><tt>utf</tt></i> is assumed to be 362null-terminated. This function is not super-strict, as it will 363allow longer utf-8 sequences than necessary. Note that Java is 364capable of producing these sequences if provoked. Also note, this 365routine checks for the 4-byte maximum size, but does not check for 3660x10ffff maximum value.</p><p> 367 368</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> Pointer to putative utf-8 encoded string. 369</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> true if <i><tt>utf</tt></i> is valid. 370</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsize"></a>xmlUTF8Strsize ()</h3><pre class="programlisting">int xmlUTF8Strsize (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 371 int len);</pre><p> 372storage size of an UTF8 string</p><p> 373 374</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> a sequence of UTF-8 encoded bytes 375</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> the number of characters in the array 376</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the storage size of 377the first 'len' characters of ARRAY 378 379</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strndup"></a>xmlUTF8Strndup ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strndup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 380 int len);</pre><p> 381a strndup for array of UTF8's</p><p> 382 383</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> the input UTF8 * 384</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> the len of <i><tt>utf</tt></i> (in chars) 385</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a new UTF8 * or NULL 386</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strpos"></a>xmlUTF8Strpos ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strpos (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 387 int pos);</pre><p> 388a function to provide the equivalent of fetching a 389character from a string array</p><p> 390 391</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> the input UTF8 * 392</td></tr><tr><td><span class="term"><i><tt>pos</tt></i>�:</span></td><td> the position of the desired UTF8 char (in chars) 393</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to the UTF8 character or NULL 394</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strloc"></a>xmlUTF8Strloc ()</h3><pre class="programlisting">int xmlUTF8Strloc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 395 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);</pre><p> 396a function to provide relative location of a UTF8 char</p><p> 397 398</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> the input UTF8 * 399</td></tr><tr><td><span class="term"><i><tt>utfchar</tt></i>�:</span></td><td> the UTF8 character to be found 400</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the relative character position of the desired char 401or -1 if not found 402</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsub"></a>xmlUTF8Strsub ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strsub (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, 403 int start, 404 int len);</pre><p> 405Note: positions are given in units of UTF-8 chars</p><p> 406 407</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> a sequence of UTF-8 encoded bytes 408</td></tr><tr><td><span class="term"><i><tt>start</tt></i>�:</span></td><td> relative pos of first char 409</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td> total number to copy 410</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to a newly created string 411or NULL if any problem 412</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strlen"></a>xmlUTF8Strlen ()</h3><pre class="programlisting">int xmlUTF8Strlen (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p> 413compute the length of an UTF8 string, it doesn't do a full UTF8 414checking of the content of the string.</p><p> 415 416</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> a sequence of UTF-8 encoded bytes 417</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of characters in the string or -1 in case of error 418</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Size"></a>xmlUTF8Size ()</h3><pre class="programlisting">int xmlUTF8Size (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p> 419</p><p> 420 421</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> pointer to the UTF8 character 422</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the numbers of bytes in the character, -1 on format error 423</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Charcmp"></a>xmlUTF8Charcmp ()</h3><pre class="programlisting">int xmlUTF8Charcmp (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1, 424 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2);</pre><p> 425</p><p> 426 427</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf1</tt></i>�:</span></td><td> pointer to first UTF8 char 428</td></tr><tr><td><span class="term"><i><tt>utf2</tt></i>�:</span></td><td> pointer to second UTF8 char 429</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>result of comparing the two UCS4 values 430as with xmlStrncmp 431</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-parserInternals.html"><b><<�parserInternals</b></a></td><td align="right"><a accesskey="n" href="libxml-hash.html"><b>hash�>></b></a></td></tr></table></body></html> 432