libxml-encoding.html revision cfba2fe0bbbdd1158666fc1090c7f48c0fdb00e1
1<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>encoding</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css">
2        .synopsis, .classsynopsis {
3            background: #eeeeee;
4            border: solid 1px #aaaaaa;
5            padding: 0.5em;
6        }
7        .programlisting {
8            background: #eeeeff;
9            border: solid 1px #aaaaff;
10            padding: 0.5em;
11        }
12        .variablelist {
13            padding: 4px;
14            margin-left: 3em;
15        }
16        .navigation {
17            background: #ffeeee;
18            border: solid 1px #ffaaaa;
19            margin-top: 0.5em;
20            margin-bottom: 0.5em;
21        }
22        .navigation a {
23            color: #770000;
24        }
25        .navigation a:visited {
26            color: #550000;
27        }
28        .navigation .title {
29            font-size: 200%;
30        }
31      </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-parserInternals.html" title="parserInternals"><link rel="next" href="libxml-hash.html" title="hash"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-parserInternals.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-hash.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-encoding"></a><div class="titlepage"></div><div class="refnamediv"><h2>encoding</h2><p>encoding &#8212; </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis">
32
33
34
35enum        <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a>;
36int         (<a href="libxml-encoding.html#xmlCharEncodingInputFunc">*xmlCharEncodingInputFunc</a>)     (unsigned char *out,
37                                             int *outlen,
38                                             unsigned char *in,
39                                             int *inlen);
40int         (<a href="libxml-encoding.html#xmlCharEncodingOutputFunc">*xmlCharEncodingOutputFunc</a>)    (unsigned char *out,
41                                             int *outlen,
42                                             unsigned char *in,
43                                             int *inlen);
44struct      <a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a>;
45typedef     <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a>;
46void        <a href="libxml-encoding.html#xmlInitCharEncodingHandlers">xmlInitCharEncodingHandlers</a>     (void);
47void        <a href="libxml-encoding.html#xmlCleanupCharEncodingHandlers">xmlCleanupCharEncodingHandlers</a>  (void);
48void        <a href="libxml-encoding.html#xmlRegisterCharEncodingHandler">xmlRegisterCharEncodingHandler</a>  (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);
49<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlGetCharEncodingHandler">xmlGetCharEncodingHandler</a>
50                                            (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
51<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlFindCharEncodingHandler">xmlFindCharEncodingHandler</a>
52                                            (const char *name);
53<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlNewCharEncodingHandler">xmlNewCharEncodingHandler</a>
54                                            (const char *name,
55                                             <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input,
56                                             <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);
57int         <a href="libxml-encoding.html#xmlAddEncodingAlias">xmlAddEncodingAlias</a>             (const char *name,
58                                             const char *alias);
59int         <a href="libxml-encoding.html#xmlDelEncodingAlias">xmlDelEncodingAlias</a>             (const char *alias);
60const char* <a href="libxml-encoding.html#xmlGetEncodingAlias">xmlGetEncodingAlias</a>             (const char *alias);
61void        <a href="libxml-encoding.html#xmlCleanupEncodingAliases">xmlCleanupEncodingAliases</a>       (void);
62<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlParseCharEncoding">xmlParseCharEncoding</a>        (const char *name);
63const char* <a href="libxml-encoding.html#xmlGetCharEncodingName">xmlGetCharEncodingName</a>          (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
64<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlDetectCharEncoding">xmlDetectCharEncoding</a>       (unsigned char *in,
65                                             int len);
66int         <a href="libxml-encoding.html#xmlCharEncOutFunc">xmlCharEncOutFunc</a>               (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
67                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
68                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);
69int         <a href="libxml-encoding.html#xmlCharEncInFunc">xmlCharEncInFunc</a>                (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
70                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
71                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);
72int         <a href="libxml-encoding.html#xmlCharEncFirstLine">xmlCharEncFirstLine</a>             (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
73                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
74                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);
75int         <a href="libxml-encoding.html#xmlCharEncCloseFunc">xmlCharEncCloseFunc</a>             (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);
76int         <a href="libxml-encoding.html#UTF8Toisolat1">UTF8Toisolat1</a>                   (unsigned char *out,
77                                             int *outlen,
78                                             unsigned char *in,
79                                             int *inlen);
80int         <a href="libxml-encoding.html#isolat1ToUTF8">isolat1ToUTF8</a>                   (unsigned char *out,
81                                             int *outlen,
82                                             unsigned char *in,
83                                             int *inlen);
84int         <a href="libxml-encoding.html#xmlGetUTF8Char">xmlGetUTF8Char</a>                  (unsigned char *utf,
85                                             int *len);
86int         <a href="libxml-encoding.html#xmlCheckUTF8">xmlCheckUTF8</a>                    (unsigned char *utf);
87int         <a href="libxml-encoding.html#xmlUTF8Strsize">xmlUTF8Strsize</a>                  (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
88                                             int len);
89<a href="libxml-tree.html#xmlChar">xmlChar</a>*    <a href="libxml-encoding.html#xmlUTF8Strndup">xmlUTF8Strndup</a>                  (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
90                                             int len);
91<a href="libxml-tree.html#xmlChar">xmlChar</a>*    <a href="libxml-encoding.html#xmlUTF8Strpos">xmlUTF8Strpos</a>                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
92                                             int pos);
93int         <a href="libxml-encoding.html#xmlUTF8Strloc">xmlUTF8Strloc</a>                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
94                                             const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);
95<a href="libxml-tree.html#xmlChar">xmlChar</a>*    <a href="libxml-encoding.html#xmlUTF8Strsub">xmlUTF8Strsub</a>                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
96                                             int start,
97                                             int len);
98int         <a href="libxml-encoding.html#xmlUTF8Strlen">xmlUTF8Strlen</a>                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);
99int         <a href="libxml-encoding.html#xmlUTF8Size">xmlUTF8Size</a>                     (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);
100int         <a href="libxml-encoding.html#xmlUTF8Charcmp">xmlUTF8Charcmp</a>                  (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1,
101                                             const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2);
102</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p>
103
104</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="xmlCharEncoding"></a>enum xmlCharEncoding</h3><pre class="programlisting">typedef enum {
105    XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
106    XML_CHAR_ENCODING_NONE=	0, /* No char encoding detected */
107    XML_CHAR_ENCODING_UTF8=	1, /* UTF-8 */
108    XML_CHAR_ENCODING_UTF16LE=	2, /* UTF-16 little endian */
109    XML_CHAR_ENCODING_UTF16BE=	3, /* UTF-16 big endian */
110    XML_CHAR_ENCODING_UCS4LE=	4, /* UCS-4 little endian */
111    XML_CHAR_ENCODING_UCS4BE=	5, /* UCS-4 big endian */
112    XML_CHAR_ENCODING_EBCDIC=	6, /* EBCDIC uh! */
113    XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
114    XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
115    XML_CHAR_ENCODING_UCS2=	9, /* UCS-2 */
116    XML_CHAR_ENCODING_8859_1=	10,/* ISO-8859-1 ISO Latin 1 */
117    XML_CHAR_ENCODING_8859_2=	11,/* ISO-8859-2 ISO Latin 2 */
118    XML_CHAR_ENCODING_8859_3=	12,/* ISO-8859-3 */
119    XML_CHAR_ENCODING_8859_4=	13,/* ISO-8859-4 */
120    XML_CHAR_ENCODING_8859_5=	14,/* ISO-8859-5 */
121    XML_CHAR_ENCODING_8859_6=	15,/* ISO-8859-6 */
122    XML_CHAR_ENCODING_8859_7=	16,/* ISO-8859-7 */
123    XML_CHAR_ENCODING_8859_8=	17,/* ISO-8859-8 */
124    XML_CHAR_ENCODING_8859_9=	18,/* ISO-8859-9 */
125    XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
126    XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
127    XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
128    XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
129} xmlCharEncoding;
130</pre><p>
131Predefined values for some standard encodings.
132Libxml don't do beforehand translation on UTF8, ISOLatinX.
133It also support UTF16 (LE and BE) by default.
134</p><p>
135Anything else would have to be translated to UTF8 before being
136given to the parser itself. The BOM for UTF16 and the encoding
137declaration are looked at and a converter is looked for at that
138point. If not found the parser stops here as asked by the XML REC
139Converter can be registered by the user using xmlRegisterCharEncodingHandler
140but the current form doesn't allow stateful transcoding (a serious
141problem agreed !). If iconv has been found it will be used
142automatically and allow stateful transcoding, the simplest is then
143to be sure to enable icon and to provide iconv libs for the encoding
144support needed.</p><p>
145
146</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingInputFunc"></a>xmlCharEncodingInputFunc ()</h3><pre class="programlisting">int         (*xmlCharEncodingInputFunc)     (unsigned char *out,
147                                             int *outlen,
148                                             unsigned char *in,
149                                             int *inlen);</pre><p>
150Take a block of chars in the original encoding and try to convert
151it to an UTF-8 block of chars out.</p><p>
152
153</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  a pointer to an array of bytes to store the UTF-8 result
154</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td>  the length of <i><tt>out</tt></i>
155</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to an array of chars in the original encoding
156</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td>  the length of <i><tt>in</tt></i>
157</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2
158    if the transcoding failed.
159The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
160    as the return value is positive, else unpredictiable.
161The value of <i><tt>outlen</tt></i> after return is the number of octets consumed.
162</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingOutputFunc"></a>xmlCharEncodingOutputFunc ()</h3><pre class="programlisting">int         (*xmlCharEncodingOutputFunc)    (unsigned char *out,
163                                             int *outlen,
164                                             unsigned char *in,
165                                             int *inlen);</pre><p>
166Take a block of UTF-8 chars in and try to convert it to an other
167encoding.
168Note: a first call designed to produce heading info is called with
169in = NULL. If stateful this should also initialize the encoder state.</p><p>
170
171</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  a pointer to an array of bytes to store the result
172</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td>  the length of <i><tt>out</tt></i>
173</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to an array of UTF-8 chars
174</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td>  the length of <i><tt>in</tt></i>
175</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2
176    if the transcoding failed.
177The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
178    as the return value is positive, else unpredictiable.
179The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed.
180</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandler"></a>struct xmlCharEncodingHandler</h3><pre class="programlisting">struct xmlCharEncodingHandler {
181    char                       *name;
182    xmlCharEncodingInputFunc   input;
183    xmlCharEncodingOutputFunc  output;
184#ifdef LIBXML_ICONV_ENABLED
185    iconv_t                    iconv_in;
186    iconv_t                    iconv_out;
187#endif /* LIBXML_ICONV_ENABLED */
188};
189</pre><p>
190
191</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandlerPtr"></a>xmlCharEncodingHandlerPtr</h3><pre class="programlisting">typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
192</pre><p>
193
194</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlInitCharEncodingHandlers"></a>xmlInitCharEncodingHandlers ()</h3><pre class="programlisting">void        xmlInitCharEncodingHandlers     (void);</pre><p>
195Initialize the char encoding support, it registers the default
196encoding supported.
197NOTE: while public, this function usually doesn't need to be called
198      in normal processing.</p><p>
199
200</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupCharEncodingHandlers"></a>xmlCleanupCharEncodingHandlers ()</h3><pre class="programlisting">void        xmlCleanupCharEncodingHandlers  (void);</pre><p>
201Cleanup the memory allocated for the char encoding support, it
202unregisters all the encoding handlers and the aliases.</p><p>
203
204</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlRegisterCharEncodingHandler"></a>xmlRegisterCharEncodingHandler ()</h3><pre class="programlisting">void        xmlRegisterCharEncodingHandler  (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);</pre><p>
205Register the char encoding handler, surprising, isn't it ?</p><p>
206
207</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td>  the xmlCharEncodingHandlerPtr handler block
208</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingHandler"></a>xmlGetCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlGetCharEncodingHandler
209                                            (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
210Search in the registered set the handler able to read/write that encoding.</p><p>
211
212</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td>  an xmlCharEncoding value.
213</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found
214</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlFindCharEncodingHandler"></a>xmlFindCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlFindCharEncodingHandler
215                                            (const char *name);</pre><p>
216Search in the registered set the handler able to read/write that encoding.</p><p>
217
218</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td>  a string describing the char encoding.
219</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found
220</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlNewCharEncodingHandler"></a>xmlNewCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlNewCharEncodingHandler
221                                            (const char *name,
222                                             <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input,
223                                             <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);</pre><p>
224Create and registers an xmlCharEncodingHandler.</p><p>
225
226</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td>  the encoding name, in UTF-8 format (ASCII actually)
227</td></tr><tr><td><span class="term"><i><tt>input</tt></i>�:</span></td><td>  the xmlCharEncodingInputFunc to read that encoding
228</td></tr><tr><td><span class="term"><i><tt>output</tt></i>�:</span></td><td>  the xmlCharEncodingOutputFunc to write that encoding
229</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the xmlCharEncodingHandlerPtr created (or NULL in case of error).
230</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlAddEncodingAlias"></a>xmlAddEncodingAlias ()</h3><pre class="programlisting">int         xmlAddEncodingAlias             (const char *name,
231                                             const char *alias);</pre><p>
232Registers and alias <i><tt>alias</tt></i> for an encoding named <i><tt>name</tt></i>. Existing alias
233will be overwritten.</p><p>
234
235</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td>  the encoding name as parsed, in UTF-8 format (ASCII actually)
236</td></tr><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td>  the alias name as parsed, in UTF-8 format (ASCII actually)
237</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error
238</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDelEncodingAlias"></a>xmlDelEncodingAlias ()</h3><pre class="programlisting">int         xmlDelEncodingAlias             (const char *alias);</pre><p>
239Unregisters an encoding alias <i><tt>alias</tt></i></p><p>
240
241</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td>  the alias name as parsed, in UTF-8 format (ASCII actually)
242</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error
243</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetEncodingAlias"></a>xmlGetEncodingAlias ()</h3><pre class="programlisting">const char* xmlGetEncodingAlias             (const char *alias);</pre><p>
244Lookup an encoding name for the given alias.</p><p>
245
246</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i>�:</span></td><td>  the alias name as parsed, in UTF-8 format (ASCII actually)
247</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>NULL if not found the original name otherwise
248</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupEncodingAliases"></a>xmlCleanupEncodingAliases ()</h3><pre class="programlisting">void        xmlCleanupEncodingAliases       (void);</pre><p>
249Unregisters all aliases</p><p>
250
251</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlParseCharEncoding"></a>xmlParseCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlParseCharEncoding        (const char *name);</pre><p>
252Compare the string to the known encoding schemes already known. Note
253that the comparison is case insensitive accordingly to the section
254[XML] 4.3.3 Character Encoding in Entities.</p><p>
255
256</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i>�:</span></td><td>  the encoding name as parsed, in UTF-8 format (ASCII actually)
257</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
258if not recognized.
259</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingName"></a>xmlGetCharEncodingName ()</h3><pre class="programlisting">const char* xmlGetCharEncodingName          (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
260The "canonical" name for XML encoding.
261C.f. http://www.w3.org/TR/REC-xml<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="charencoding">charencoding</GTKDOCLINK>
262Section 4.3.3  Character Encoding in Entities</p><p>
263
264</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i>�:</span></td><td>  the encoding
265</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the canonical name for the given encoding
266</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDetectCharEncoding"></a>xmlDetectCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlDetectCharEncoding       (unsigned char *in,
267                                             int len);</pre><p>
268Guess the encoding of the entity using the first bytes of the entity content
269accordingly of the non-normative appendix F of the XML-1.0 recommendation.</p><p>
270
271</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to the first bytes of the XML entity, must be at least
272      4 bytes long.
273</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td>  pointer to the length of the buffer
274</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values.
275</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncOutFunc"></a>xmlCharEncOutFunc ()</h3><pre class="programlisting">int         xmlCharEncOutFunc               (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
276                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
277                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p>
278Generic front-end for the encoding handler output function
279a first call with <i><tt>in</tt></i> == NULL has to be made firs to initiate the 
280output in case of non-stateless encoding needing to initiate their
281state or the output (like the BOM in UTF16).
282In case of UTF8 sequence conversion errors for the given encoder,
283the content will be automatically remapped to a CharRef sequence.</p><p>
284
285</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td>	char enconding transformation data structure
286</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  an xmlBuffer for the output.
287</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  an xmlBuffer for the input
288</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or 
289    -1 general error
290    -2 if the transcoding fails (for *in is not valid utf8 string or
291       the result of transformation can't fit into the encoding we want), or
292</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncInFunc"></a>xmlCharEncInFunc ()</h3><pre class="programlisting">int         xmlCharEncInFunc                (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
293                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
294                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p>
295Generic front-end for the encoding handler input function</p><p>
296
297</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td>	char encoding transformation data structure
298</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  an xmlBuffer for the output.
299</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  an xmlBuffer for the input
300</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or 
301    -1 general error
302    -2 if the transcoding fails (for *in is not valid utf8 string or
303       the result of transformation can't fit into the encoding we want), or
304</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncFirstLine"></a>xmlCharEncFirstLine ()</h3><pre class="programlisting">int         xmlCharEncFirstLine             (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
305                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
306                                             <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p>
307Front-end for the encoding handler input function, but handle only
308the very first line, i.e. limit itself to 45 chars.</p><p>
309
310</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td>	char enconding transformation data structure
311</td></tr><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  an xmlBuffer for the output.
312</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  an xmlBuffer for the input
313</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or 
314    -1 general error
315    -2 if the transcoding fails (for *in is not valid utf8 string or
316       the result of transformation can't fit into the encoding we want), or
317</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncCloseFunc"></a>xmlCharEncCloseFunc ()</h3><pre class="programlisting">int         xmlCharEncCloseFunc             (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);</pre><p>
318Generic front-end for encoding handler close function</p><p>
319
320</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i>�:</span></td><td>	char enconding transformation data structure
321</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 in case of error
322</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8Toisolat1"></a>UTF8Toisolat1 ()</h3><pre class="programlisting">int         UTF8Toisolat1                   (unsigned char *out,
323                                             int *outlen,
324                                             unsigned char *in,
325                                             int *inlen);</pre><p>
326Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
327block of chars out.</p><p>
328
329</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  a pointer to an array of bytes to store the result
330</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td>  the length of <i><tt>out</tt></i>
331</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to an array of UTF-8 chars
332</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td>  the length of <i><tt>in</tt></i>
333</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
334The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
335    as the return value is positive, else unpredictable.
336The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed.
337</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="isolat1ToUTF8"></a>isolat1ToUTF8 ()</h3><pre class="programlisting">int         isolat1ToUTF8                   (unsigned char *out,
338                                             int *outlen,
339                                             unsigned char *in,
340                                             int *inlen);</pre><p>
341Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
342block of chars out.</p><p>
343
344</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i>�:</span></td><td>  a pointer to an array of bytes to store the result
345</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>�:</span></td><td>  the length of <i><tt>out</tt></i>
346</td></tr><tr><td><span class="term"><i><tt>in</tt></i>�:</span></td><td>  a pointer to an array of ISO Latin 1 chars
347</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>�:</span></td><td>  the length of <i><tt>in</tt></i>
348</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 otherwise
349The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
350    as the return value is positive, else unpredictable.
351The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed.
352</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetUTF8Char"></a>xmlGetUTF8Char ()</h3><pre class="programlisting">int         xmlGetUTF8Char                  (unsigned char *utf,
353                                             int *len);</pre><p>
354Read one UTF8 Char from <i><tt>utf</tt></i></p><p>
355
356</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  a sequence of UTF-8 encoded bytes
357</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td>  a pointer to <i><tt>bytes</tt></i> len
358</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the char value or -1 in case of error and update <i><tt>len</tt></i> with the
359       number of bytes used
360</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCheckUTF8"></a>xmlCheckUTF8 ()</h3><pre class="programlisting">int         xmlCheckUTF8                    (unsigned char *utf);</pre><p>
361Checks <i><tt>utf</tt></i> for being valid utf-8. <i><tt>utf</tt></i> is assumed to be
362null-terminated. This function is not super-strict, as it will
363allow longer utf-8 sequences than necessary. Note that Java is
364capable of producing these sequences if provoked. Also note, this
365routine checks for the 4-byte maximum size, but does not check for
3660x10ffff maximum value.</p><p>
367
368</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> Pointer to putative utf-8 encoded string.
369</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> true if <i><tt>utf</tt></i> is valid.
370</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsize"></a>xmlUTF8Strsize ()</h3><pre class="programlisting">int         xmlUTF8Strsize                  (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
371                                             int len);</pre><p>
372storage size of an UTF8 string</p><p>
373
374</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  a sequence of UTF-8 encoded bytes
375</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td>  the number of characters in the array
376</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the storage size of
377the first 'len' characters of ARRAY
378
379</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strndup"></a>xmlUTF8Strndup ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>*    xmlUTF8Strndup                  (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
380                                             int len);</pre><p>
381a strndup for array of UTF8's</p><p>
382
383</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  the input UTF8 *
384</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td>  the len of <i><tt>utf</tt></i> (in chars)
385</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a new UTF8 * or NULL
386</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strpos"></a>xmlUTF8Strpos ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>*    xmlUTF8Strpos                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
387                                             int pos);</pre><p>
388a function to provide the equivalent of fetching a
389character from a string array</p><p>
390
391</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  the input UTF8 *
392</td></tr><tr><td><span class="term"><i><tt>pos</tt></i>�:</span></td><td>  the position of the desired UTF8 char (in chars)
393</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to the UTF8 character or NULL
394</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strloc"></a>xmlUTF8Strloc ()</h3><pre class="programlisting">int         xmlUTF8Strloc                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
395                                             const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);</pre><p>
396a function to provide relative location of a UTF8 char</p><p>
397
398</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  the input UTF8 *
399</td></tr><tr><td><span class="term"><i><tt>utfchar</tt></i>�:</span></td><td>  the UTF8 character to be found
400</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the relative character position of the desired char
401or -1 if not found
402</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsub"></a>xmlUTF8Strsub ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>*    xmlUTF8Strsub                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
403                                             int start,
404                                             int len);</pre><p>
405Note:  positions are given in units of UTF-8 chars</p><p>
406
407</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  a sequence of UTF-8 encoded bytes
408</td></tr><tr><td><span class="term"><i><tt>start</tt></i>�:</span></td><td> relative pos of first char
409</td></tr><tr><td><span class="term"><i><tt>len</tt></i>�:</span></td><td>   total number to copy
410</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to a newly created string
411or NULL if any problem
412</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strlen"></a>xmlUTF8Strlen ()</h3><pre class="programlisting">int         xmlUTF8Strlen                   (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p>
413compute the length of an UTF8 string, it doesn't do a full UTF8
414checking of the content of the string.</p><p>
415
416</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td>  a sequence of UTF-8 encoded bytes
417</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of characters in the string or -1 in case of error
418</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Size"></a>xmlUTF8Size ()</h3><pre class="programlisting">int         xmlUTF8Size                     (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p>
419</p><p>
420
421</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i>�:</span></td><td> pointer to the UTF8 character
422</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the numbers of bytes in the character, -1 on format error
423</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Charcmp"></a>xmlUTF8Charcmp ()</h3><pre class="programlisting">int         xmlUTF8Charcmp                  (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1,
424                                             const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2);</pre><p>
425</p><p>
426
427</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf1</tt></i>�:</span></td><td> pointer to first UTF8 char
428</td></tr><tr><td><span class="term"><i><tt>utf2</tt></i>�:</span></td><td> pointer to second UTF8 char
429</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>result of comparing the two UCS4 values
430as with xmlStrncmp
431</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-parserInternals.html"><b>&lt;&lt;�parserInternals</b></a></td><td align="right"><a accesskey="n" href="libxml-hash.html"><b>hash�&gt;&gt;</b></a></td></tr></table></body></html>
432