1/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1]   ISO Latin-1 characters codes.
9 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10 *                Worldwide Character Encoding -- Version 1.0", Addison-
11 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12 *                described in Unicode Technical Report #4.
13 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14 *                Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23#define IN_LIBXML
24#include "libxml.h"
25
26#include <string.h>
27#include <limits.h>
28
29#ifdef HAVE_CTYPE_H
30#include <ctype.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
35#ifdef LIBXML_ICONV_ENABLED
36#ifdef HAVE_ERRNO_H
37#include <errno.h>
38#endif
39#endif
40#include <libxml/encoding.h>
41#include <libxml/xmlmemory.h>
42#ifdef LIBXML_HTML_ENABLED
43#include <libxml/HTMLparser.h>
44#endif
45#include <libxml/globals.h>
46#include <libxml/xmlerror.h>
47
48#include "buf.h"
49#include "enc.h"
50
51static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56struct _xmlCharEncodingAlias {
57    const char *name;
58    const char *alias;
59};
60
61static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62static int xmlCharEncodingAliasesNb = 0;
63static int xmlCharEncodingAliasesMax = 0;
64
65#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66#if 0
67#define DEBUG_ENCODING  /* Define this to get encoding traces */
68#endif
69#else
70#ifdef LIBXML_ISO8859X_ENABLED
71static void xmlRegisterCharEncodingHandlersISO8859x (void);
72#endif
73#endif
74
75static int xmlLittleEndian = 1;
76
77/**
78 * xmlEncodingErrMemory:
79 * @extra:  extra informations
80 *
81 * Handle an out of memory condition
82 */
83static void
84xmlEncodingErrMemory(const char *extra)
85{
86    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87}
88
89/**
90 * xmlErrEncoding:
91 * @error:  the error number
92 * @msg:  the error message
93 *
94 * n encoding error
95 */
96static void
97xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98{
99    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100                    XML_FROM_I18N, error, XML_ERR_FATAL,
101                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102}
103
104#ifdef LIBXML_ICU_ENABLED
105static uconv_t*
106openIcuConverter(const char* name, int toUnicode)
107{
108  UErrorCode status = U_ZERO_ERROR;
109  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110  if (conv == NULL)
111    return NULL;
112
113  conv->uconv = ucnv_open(name, &status);
114  if (U_FAILURE(status))
115    goto error;
116
117  status = U_ZERO_ERROR;
118  if (toUnicode) {
119    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120                        NULL, NULL, NULL, &status);
121  }
122  else {
123    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124                        NULL, NULL, NULL, &status);
125  }
126  if (U_FAILURE(status))
127    goto error;
128
129  status = U_ZERO_ERROR;
130  conv->utf8 = ucnv_open("UTF-8", &status);
131  if (U_SUCCESS(status))
132    return conv;
133
134error:
135  if (conv->uconv)
136    ucnv_close(conv->uconv);
137  xmlFree(conv);
138  return NULL;
139}
140
141static void
142closeIcuConverter(uconv_t *conv)
143{
144  if (conv != NULL) {
145    ucnv_close(conv->uconv);
146    ucnv_close(conv->utf8);
147    xmlFree(conv);
148  }
149}
150#endif /* LIBXML_ICU_ENABLED */
151
152/************************************************************************
153 *									*
154 *		Conversions To/From UTF8 encoding			*
155 *									*
156 ************************************************************************/
157
158/**
159 * asciiToUTF8:
160 * @out:  a pointer to an array of bytes to store the result
161 * @outlen:  the length of @out
162 * @in:  a pointer to an array of ASCII chars
163 * @inlen:  the length of @in
164 *
165 * Take a block of ASCII chars in and try to convert it to an UTF-8
166 * block of chars out.
167 * Returns 0 if success, or -1 otherwise
168 * The value of @inlen after return is the number of octets consumed
169 *     if the return value is positive, else unpredictable.
170 * The value of @outlen after return is the number of octets consumed.
171 */
172static int
173asciiToUTF8(unsigned char* out, int *outlen,
174              const unsigned char* in, int *inlen) {
175    unsigned char* outstart = out;
176    const unsigned char* base = in;
177    const unsigned char* processed = in;
178    unsigned char* outend = out + *outlen;
179    const unsigned char* inend;
180    unsigned int c;
181
182    inend = in + (*inlen);
183    while ((in < inend) && (out - outstart + 5 < *outlen)) {
184	c= *in++;
185
186        if (out >= outend)
187	    break;
188        if (c < 0x80) {
189	    *out++ = c;
190	} else {
191	    *outlen = out - outstart;
192	    *inlen = processed - base;
193	    return(-1);
194	}
195
196	processed = (const unsigned char*) in;
197    }
198    *outlen = out - outstart;
199    *inlen = processed - base;
200    return(*outlen);
201}
202
203#ifdef LIBXML_OUTPUT_ENABLED
204/**
205 * UTF8Toascii:
206 * @out:  a pointer to an array of bytes to store the result
207 * @outlen:  the length of @out
208 * @in:  a pointer to an array of UTF-8 chars
209 * @inlen:  the length of @in
210 *
211 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212 * block of chars out.
213 *
214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215 * The value of @inlen after return is the number of octets consumed
216 *     if the return value is positive, else unpredictable.
217 * The value of @outlen after return is the number of octets consumed.
218 */
219static int
220UTF8Toascii(unsigned char* out, int *outlen,
221              const unsigned char* in, int *inlen) {
222    const unsigned char* processed = in;
223    const unsigned char* outend;
224    const unsigned char* outstart = out;
225    const unsigned char* instart = in;
226    const unsigned char* inend;
227    unsigned int c, d;
228    int trailing;
229
230    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
231    if (in == NULL) {
232        /*
233	 * initialization nothing to do
234	 */
235	*outlen = 0;
236	*inlen = 0;
237	return(0);
238    }
239    inend = in + (*inlen);
240    outend = out + (*outlen);
241    while (in < inend) {
242	d = *in++;
243	if      (d < 0x80)  { c= d; trailing= 0; }
244	else if (d < 0xC0) {
245	    /* trailing byte in leading position */
246	    *outlen = out - outstart;
247	    *inlen = processed - instart;
248	    return(-2);
249        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
250        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
251        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
252	else {
253	    /* no chance for this in Ascii */
254	    *outlen = out - outstart;
255	    *inlen = processed - instart;
256	    return(-2);
257	}
258
259	if (inend - in < trailing) {
260	    break;
261	}
262
263	for ( ; trailing; trailing--) {
264	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
265		break;
266	    c <<= 6;
267	    c |= d & 0x3F;
268	}
269
270	/* assertion: c is a single UTF-4 value */
271	if (c < 0x80) {
272	    if (out >= outend)
273		break;
274	    *out++ = c;
275	} else {
276	    /* no chance for this in Ascii */
277	    *outlen = out - outstart;
278	    *inlen = processed - instart;
279	    return(-2);
280	}
281	processed = in;
282    }
283    *outlen = out - outstart;
284    *inlen = processed - instart;
285    return(*outlen);
286}
287#endif /* LIBXML_OUTPUT_ENABLED */
288
289/**
290 * isolat1ToUTF8:
291 * @out:  a pointer to an array of bytes to store the result
292 * @outlen:  the length of @out
293 * @in:  a pointer to an array of ISO Latin 1 chars
294 * @inlen:  the length of @in
295 *
296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297 * block of chars out.
298 * Returns the number of bytes written if success, or -1 otherwise
299 * The value of @inlen after return is the number of octets consumed
300 *     if the return value is positive, else unpredictable.
301 * The value of @outlen after return is the number of octets consumed.
302 */
303int
304isolat1ToUTF8(unsigned char* out, int *outlen,
305              const unsigned char* in, int *inlen) {
306    unsigned char* outstart = out;
307    const unsigned char* base = in;
308    unsigned char* outend;
309    const unsigned char* inend;
310    const unsigned char* instop;
311
312    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
313	return(-1);
314
315    outend = out + *outlen;
316    inend = in + (*inlen);
317    instop = inend;
318
319    while ((in < inend) && (out < outend - 1)) {
320	if (*in >= 0x80) {
321	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
322            *out++ = ((*in) & 0x3F) | 0x80;
323	    ++in;
324	}
325	if ((instop - in) > (outend - out)) instop = in + (outend - out);
326	while ((in < instop) && (*in < 0x80)) {
327	    *out++ = *in++;
328	}
329    }
330    if ((in < inend) && (out < outend) && (*in < 0x80)) {
331        *out++ = *in++;
332    }
333    *outlen = out - outstart;
334    *inlen = in - base;
335    return(*outlen);
336}
337
338/**
339 * UTF8ToUTF8:
340 * @out:  a pointer to an array of bytes to store the result
341 * @outlen:  the length of @out
342 * @inb:  a pointer to an array of UTF-8 chars
343 * @inlenb:  the length of @in in UTF-8 chars
344 *
345 * No op copy operation for UTF8 handling.
346 *
347 * Returns the number of bytes written, or -1 if lack of space.
348 *     The value of *inlen after return is the number of octets consumed
349 *     if the return value is positive, else unpredictable.
350 */
351static int
352UTF8ToUTF8(unsigned char* out, int *outlen,
353           const unsigned char* inb, int *inlenb)
354{
355    int len;
356
357    if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
358	return(-1);
359    if (*outlen > *inlenb) {
360	len = *inlenb;
361    } else {
362	len = *outlen;
363    }
364    if (len < 0)
365	return(-1);
366
367    memcpy(out, inb, len);
368
369    *outlen = len;
370    *inlenb = len;
371    return(*outlen);
372}
373
374
375#ifdef LIBXML_OUTPUT_ENABLED
376/**
377 * UTF8Toisolat1:
378 * @out:  a pointer to an array of bytes to store the result
379 * @outlen:  the length of @out
380 * @in:  a pointer to an array of UTF-8 chars
381 * @inlen:  the length of @in
382 *
383 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
384 * block of chars out.
385 *
386 * Returns the number of bytes written if success, -2 if the transcoding fails,
387           or -1 otherwise
388 * The value of @inlen after return is the number of octets consumed
389 *     if the return value is positive, else unpredictable.
390 * The value of @outlen after return is the number of octets consumed.
391 */
392int
393UTF8Toisolat1(unsigned char* out, int *outlen,
394              const unsigned char* in, int *inlen) {
395    const unsigned char* processed = in;
396    const unsigned char* outend;
397    const unsigned char* outstart = out;
398    const unsigned char* instart = in;
399    const unsigned char* inend;
400    unsigned int c, d;
401    int trailing;
402
403    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
404    if (in == NULL) {
405        /*
406	 * initialization nothing to do
407	 */
408	*outlen = 0;
409	*inlen = 0;
410	return(0);
411    }
412    inend = in + (*inlen);
413    outend = out + (*outlen);
414    while (in < inend) {
415	d = *in++;
416	if      (d < 0x80)  { c= d; trailing= 0; }
417	else if (d < 0xC0) {
418	    /* trailing byte in leading position */
419	    *outlen = out - outstart;
420	    *inlen = processed - instart;
421	    return(-2);
422        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
423        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
424        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
425	else {
426	    /* no chance for this in IsoLat1 */
427	    *outlen = out - outstart;
428	    *inlen = processed - instart;
429	    return(-2);
430	}
431
432	if (inend - in < trailing) {
433	    break;
434	}
435
436	for ( ; trailing; trailing--) {
437	    if (in >= inend)
438		break;
439	    if (((d= *in++) & 0xC0) != 0x80) {
440		*outlen = out - outstart;
441		*inlen = processed - instart;
442		return(-2);
443	    }
444	    c <<= 6;
445	    c |= d & 0x3F;
446	}
447
448	/* assertion: c is a single UTF-4 value */
449	if (c <= 0xFF) {
450	    if (out >= outend)
451		break;
452	    *out++ = c;
453	} else {
454	    /* no chance for this in IsoLat1 */
455	    *outlen = out - outstart;
456	    *inlen = processed - instart;
457	    return(-2);
458	}
459	processed = in;
460    }
461    *outlen = out - outstart;
462    *inlen = processed - instart;
463    return(*outlen);
464}
465#endif /* LIBXML_OUTPUT_ENABLED */
466
467/**
468 * UTF16LEToUTF8:
469 * @out:  a pointer to an array of bytes to store the result
470 * @outlen:  the length of @out
471 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
472 * @inlenb:  the length of @in in UTF-16LE chars
473 *
474 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
475 * block of chars out. This function assumes the endian property
476 * is the same between the native type of this machine and the
477 * inputed one.
478 *
479 * Returns the number of bytes written, or -1 if lack of space, or -2
480 *     if the transcoding fails (if *in is not a valid utf16 string)
481 *     The value of *inlen after return is the number of octets consumed
482 *     if the return value is positive, else unpredictable.
483 */
484static int
485UTF16LEToUTF8(unsigned char* out, int *outlen,
486            const unsigned char* inb, int *inlenb)
487{
488    unsigned char* outstart = out;
489    const unsigned char* processed = inb;
490    unsigned char* outend = out + *outlen;
491    unsigned short* in = (unsigned short*) inb;
492    unsigned short* inend;
493    unsigned int c, d, inlen;
494    unsigned char *tmp;
495    int bits;
496
497    if ((*inlenb % 2) == 1)
498        (*inlenb)--;
499    inlen = *inlenb / 2;
500    inend = in + inlen;
501    while ((in < inend) && (out - outstart + 5 < *outlen)) {
502        if (xmlLittleEndian) {
503	    c= *in++;
504	} else {
505	    tmp = (unsigned char *) in;
506	    c = *tmp++;
507	    c = c | (((unsigned int)*tmp) << 8);
508	    in++;
509	}
510        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
511	    if (in >= inend) {           /* (in > inend) shouldn't happens */
512		break;
513	    }
514	    if (xmlLittleEndian) {
515		d = *in++;
516	    } else {
517		tmp = (unsigned char *) in;
518		d = *tmp++;
519		d = d | (((unsigned int)*tmp) << 8);
520		in++;
521	    }
522            if ((d & 0xFC00) == 0xDC00) {
523                c &= 0x03FF;
524                c <<= 10;
525                c |= d & 0x03FF;
526                c += 0x10000;
527            }
528            else {
529		*outlen = out - outstart;
530		*inlenb = processed - inb;
531	        return(-2);
532	    }
533        }
534
535	/* assertion: c is a single UTF-4 value */
536        if (out >= outend)
537	    break;
538        if      (c <    0x80) {  *out++=  c;                bits= -6; }
539        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
540        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
541        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
542
543        for ( ; bits >= 0; bits-= 6) {
544            if (out >= outend)
545	        break;
546            *out++= ((c >> bits) & 0x3F) | 0x80;
547        }
548	processed = (const unsigned char*) in;
549    }
550    *outlen = out - outstart;
551    *inlenb = processed - inb;
552    return(*outlen);
553}
554
555#ifdef LIBXML_OUTPUT_ENABLED
556/**
557 * UTF8ToUTF16LE:
558 * @outb:  a pointer to an array of bytes to store the result
559 * @outlen:  the length of @outb
560 * @in:  a pointer to an array of UTF-8 chars
561 * @inlen:  the length of @in
562 *
563 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
564 * block of chars out.
565 *
566 * Returns the number of bytes written, or -1 if lack of space, or -2
567 *     if the transcoding failed.
568 */
569static int
570UTF8ToUTF16LE(unsigned char* outb, int *outlen,
571            const unsigned char* in, int *inlen)
572{
573    unsigned short* out = (unsigned short*) outb;
574    const unsigned char* processed = in;
575    const unsigned char *const instart = in;
576    unsigned short* outstart= out;
577    unsigned short* outend;
578    const unsigned char* inend;
579    unsigned int c, d;
580    int trailing;
581    unsigned char *tmp;
582    unsigned short tmp1, tmp2;
583
584    /* UTF16LE encoding has no BOM */
585    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
586    if (in == NULL) {
587	*outlen = 0;
588	*inlen = 0;
589	return(0);
590    }
591    inend= in + *inlen;
592    outend = out + (*outlen / 2);
593    while (in < inend) {
594      d= *in++;
595      if      (d < 0x80)  { c= d; trailing= 0; }
596      else if (d < 0xC0) {
597          /* trailing byte in leading position */
598	  *outlen = (out - outstart) * 2;
599	  *inlen = processed - instart;
600	  return(-2);
601      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
602      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
603      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
604      else {
605	/* no chance for this in UTF-16 */
606	*outlen = (out - outstart) * 2;
607	*inlen = processed - instart;
608	return(-2);
609      }
610
611      if (inend - in < trailing) {
612          break;
613      }
614
615      for ( ; trailing; trailing--) {
616          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
617	      break;
618          c <<= 6;
619          c |= d & 0x3F;
620      }
621
622      /* assertion: c is a single UTF-4 value */
623        if (c < 0x10000) {
624            if (out >= outend)
625	        break;
626	    if (xmlLittleEndian) {
627		*out++ = c;
628	    } else {
629		tmp = (unsigned char *) out;
630		*tmp = c ;
631		*(tmp + 1) = c >> 8 ;
632		out++;
633	    }
634        }
635        else if (c < 0x110000) {
636            if (out+1 >= outend)
637	        break;
638            c -= 0x10000;
639	    if (xmlLittleEndian) {
640		*out++ = 0xD800 | (c >> 10);
641		*out++ = 0xDC00 | (c & 0x03FF);
642	    } else {
643		tmp1 = 0xD800 | (c >> 10);
644		tmp = (unsigned char *) out;
645		*tmp = (unsigned char) tmp1;
646		*(tmp + 1) = tmp1 >> 8;
647		out++;
648
649		tmp2 = 0xDC00 | (c & 0x03FF);
650		tmp = (unsigned char *) out;
651		*tmp  = (unsigned char) tmp2;
652		*(tmp + 1) = tmp2 >> 8;
653		out++;
654	    }
655        }
656        else
657	    break;
658	processed = in;
659    }
660    *outlen = (out - outstart) * 2;
661    *inlen = processed - instart;
662    return(*outlen);
663}
664
665/**
666 * UTF8ToUTF16:
667 * @outb:  a pointer to an array of bytes to store the result
668 * @outlen:  the length of @outb
669 * @in:  a pointer to an array of UTF-8 chars
670 * @inlen:  the length of @in
671 *
672 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
673 * block of chars out.
674 *
675 * Returns the number of bytes written, or -1 if lack of space, or -2
676 *     if the transcoding failed.
677 */
678static int
679UTF8ToUTF16(unsigned char* outb, int *outlen,
680            const unsigned char* in, int *inlen)
681{
682    if (in == NULL) {
683	/*
684	 * initialization, add the Byte Order Mark for UTF-16LE
685	 */
686        if (*outlen >= 2) {
687	    outb[0] = 0xFF;
688	    outb[1] = 0xFE;
689	    *outlen = 2;
690	    *inlen = 0;
691#ifdef DEBUG_ENCODING
692            xmlGenericError(xmlGenericErrorContext,
693		    "Added FFFE Byte Order Mark\n");
694#endif
695	    return(2);
696	}
697	*outlen = 0;
698	*inlen = 0;
699	return(0);
700    }
701    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
702}
703#endif /* LIBXML_OUTPUT_ENABLED */
704
705/**
706 * UTF16BEToUTF8:
707 * @out:  a pointer to an array of bytes to store the result
708 * @outlen:  the length of @out
709 * @inb:  a pointer to an array of UTF-16 passed as a byte array
710 * @inlenb:  the length of @in in UTF-16 chars
711 *
712 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
713 * block of chars out. This function assumes the endian property
714 * is the same between the native type of this machine and the
715 * inputed one.
716 *
717 * Returns the number of bytes written, or -1 if lack of space, or -2
718 *     if the transcoding fails (if *in is not a valid utf16 string)
719 * The value of *inlen after return is the number of octets consumed
720 *     if the return value is positive, else unpredictable.
721 */
722static int
723UTF16BEToUTF8(unsigned char* out, int *outlen,
724            const unsigned char* inb, int *inlenb)
725{
726    unsigned char* outstart = out;
727    const unsigned char* processed = inb;
728    unsigned char* outend = out + *outlen;
729    unsigned short* in = (unsigned short*) inb;
730    unsigned short* inend;
731    unsigned int c, d, inlen;
732    unsigned char *tmp;
733    int bits;
734
735    if ((*inlenb % 2) == 1)
736        (*inlenb)--;
737    inlen = *inlenb / 2;
738    inend= in + inlen;
739    while (in < inend) {
740	if (xmlLittleEndian) {
741	    tmp = (unsigned char *) in;
742	    c = *tmp++;
743	    c = c << 8;
744	    c = c | (unsigned int) *tmp;
745	    in++;
746	} else {
747	    c= *in++;
748	}
749        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
750	    if (in >= inend) {           /* (in > inend) shouldn't happens */
751		*outlen = out - outstart;
752		*inlenb = processed - inb;
753	        return(-2);
754	    }
755	    if (xmlLittleEndian) {
756		tmp = (unsigned char *) in;
757		d = *tmp++;
758		d = d << 8;
759		d = d | (unsigned int) *tmp;
760		in++;
761	    } else {
762		d= *in++;
763	    }
764            if ((d & 0xFC00) == 0xDC00) {
765                c &= 0x03FF;
766                c <<= 10;
767                c |= d & 0x03FF;
768                c += 0x10000;
769            }
770            else {
771		*outlen = out - outstart;
772		*inlenb = processed - inb;
773	        return(-2);
774	    }
775        }
776
777	/* assertion: c is a single UTF-4 value */
778        if (out >= outend)
779	    break;
780        if      (c <    0x80) {  *out++=  c;                bits= -6; }
781        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
782        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
783        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
784
785        for ( ; bits >= 0; bits-= 6) {
786            if (out >= outend)
787	        break;
788            *out++= ((c >> bits) & 0x3F) | 0x80;
789        }
790	processed = (const unsigned char*) in;
791    }
792    *outlen = out - outstart;
793    *inlenb = processed - inb;
794    return(*outlen);
795}
796
797#ifdef LIBXML_OUTPUT_ENABLED
798/**
799 * UTF8ToUTF16BE:
800 * @outb:  a pointer to an array of bytes to store the result
801 * @outlen:  the length of @outb
802 * @in:  a pointer to an array of UTF-8 chars
803 * @inlen:  the length of @in
804 *
805 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
806 * block of chars out.
807 *
808 * Returns the number of byte written, or -1 by lack of space, or -2
809 *     if the transcoding failed.
810 */
811static int
812UTF8ToUTF16BE(unsigned char* outb, int *outlen,
813            const unsigned char* in, int *inlen)
814{
815    unsigned short* out = (unsigned short*) outb;
816    const unsigned char* processed = in;
817    const unsigned char *const instart = in;
818    unsigned short* outstart= out;
819    unsigned short* outend;
820    const unsigned char* inend;
821    unsigned int c, d;
822    int trailing;
823    unsigned char *tmp;
824    unsigned short tmp1, tmp2;
825
826    /* UTF-16BE has no BOM */
827    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
828    if (in == NULL) {
829	*outlen = 0;
830	*inlen = 0;
831	return(0);
832    }
833    inend= in + *inlen;
834    outend = out + (*outlen / 2);
835    while (in < inend) {
836      d= *in++;
837      if      (d < 0x80)  { c= d; trailing= 0; }
838      else if (d < 0xC0)  {
839          /* trailing byte in leading position */
840	  *outlen = out - outstart;
841	  *inlen = processed - instart;
842	  return(-2);
843      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
844      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
845      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
846      else {
847          /* no chance for this in UTF-16 */
848	  *outlen = out - outstart;
849	  *inlen = processed - instart;
850	  return(-2);
851      }
852
853      if (inend - in < trailing) {
854          break;
855      }
856
857      for ( ; trailing; trailing--) {
858          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
859          c <<= 6;
860          c |= d & 0x3F;
861      }
862
863      /* assertion: c is a single UTF-4 value */
864        if (c < 0x10000) {
865            if (out >= outend)  break;
866	    if (xmlLittleEndian) {
867		tmp = (unsigned char *) out;
868		*tmp = c >> 8;
869		*(tmp + 1) = c;
870		out++;
871	    } else {
872		*out++ = c;
873	    }
874        }
875        else if (c < 0x110000) {
876            if (out+1 >= outend)  break;
877            c -= 0x10000;
878	    if (xmlLittleEndian) {
879		tmp1 = 0xD800 | (c >> 10);
880		tmp = (unsigned char *) out;
881		*tmp = tmp1 >> 8;
882		*(tmp + 1) = (unsigned char) tmp1;
883		out++;
884
885		tmp2 = 0xDC00 | (c & 0x03FF);
886		tmp = (unsigned char *) out;
887		*tmp = tmp2 >> 8;
888		*(tmp + 1) = (unsigned char) tmp2;
889		out++;
890	    } else {
891		*out++ = 0xD800 | (c >> 10);
892		*out++ = 0xDC00 | (c & 0x03FF);
893	    }
894        }
895        else
896	    break;
897	processed = in;
898    }
899    *outlen = (out - outstart) * 2;
900    *inlen = processed - instart;
901    return(*outlen);
902}
903#endif /* LIBXML_OUTPUT_ENABLED */
904
905/************************************************************************
906 *									*
907 *		Generic encoding handling routines			*
908 *									*
909 ************************************************************************/
910
911/**
912 * xmlDetectCharEncoding:
913 * @in:  a pointer to the first bytes of the XML entity, must be at least
914 *       2 bytes long (at least 4 if encoding is UTF4 variant).
915 * @len:  pointer to the length of the buffer
916 *
917 * Guess the encoding of the entity using the first bytes of the entity content
918 * according to the non-normative appendix F of the XML-1.0 recommendation.
919 *
920 * Returns one of the XML_CHAR_ENCODING_... values.
921 */
922xmlCharEncoding
923xmlDetectCharEncoding(const unsigned char* in, int len)
924{
925    if (in == NULL)
926        return(XML_CHAR_ENCODING_NONE);
927    if (len >= 4) {
928	if ((in[0] == 0x00) && (in[1] == 0x00) &&
929	    (in[2] == 0x00) && (in[3] == 0x3C))
930	    return(XML_CHAR_ENCODING_UCS4BE);
931	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
932	    (in[2] == 0x00) && (in[3] == 0x00))
933	    return(XML_CHAR_ENCODING_UCS4LE);
934	if ((in[0] == 0x00) && (in[1] == 0x00) &&
935	    (in[2] == 0x3C) && (in[3] == 0x00))
936	    return(XML_CHAR_ENCODING_UCS4_2143);
937	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
938	    (in[2] == 0x00) && (in[3] == 0x00))
939	    return(XML_CHAR_ENCODING_UCS4_3412);
940	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
941	    (in[2] == 0xA7) && (in[3] == 0x94))
942	    return(XML_CHAR_ENCODING_EBCDIC);
943	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
944	    (in[2] == 0x78) && (in[3] == 0x6D))
945	    return(XML_CHAR_ENCODING_UTF8);
946	/*
947	 * Although not part of the recommendation, we also
948	 * attempt an "auto-recognition" of UTF-16LE and
949	 * UTF-16BE encodings.
950	 */
951	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952	    (in[2] == 0x3F) && (in[3] == 0x00))
953	    return(XML_CHAR_ENCODING_UTF16LE);
954	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
955	    (in[2] == 0x00) && (in[3] == 0x3F))
956	    return(XML_CHAR_ENCODING_UTF16BE);
957    }
958    if (len >= 3) {
959	/*
960	 * Errata on XML-1.0 June 20 2001
961	 * We now allow an UTF8 encoded BOM
962	 */
963	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
964	    (in[2] == 0xBF))
965	    return(XML_CHAR_ENCODING_UTF8);
966    }
967    /* For UTF-16 we can recognize by the BOM */
968    if (len >= 2) {
969	if ((in[0] == 0xFE) && (in[1] == 0xFF))
970	    return(XML_CHAR_ENCODING_UTF16BE);
971	if ((in[0] == 0xFF) && (in[1] == 0xFE))
972	    return(XML_CHAR_ENCODING_UTF16LE);
973    }
974    return(XML_CHAR_ENCODING_NONE);
975}
976
977/**
978 * xmlCleanupEncodingAliases:
979 *
980 * Unregisters all aliases
981 */
982void
983xmlCleanupEncodingAliases(void) {
984    int i;
985
986    if (xmlCharEncodingAliases == NULL)
987	return;
988
989    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
990	if (xmlCharEncodingAliases[i].name != NULL)
991	    xmlFree((char *) xmlCharEncodingAliases[i].name);
992	if (xmlCharEncodingAliases[i].alias != NULL)
993	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
994    }
995    xmlCharEncodingAliasesNb = 0;
996    xmlCharEncodingAliasesMax = 0;
997    xmlFree(xmlCharEncodingAliases);
998    xmlCharEncodingAliases = NULL;
999}
1000
1001/**
1002 * xmlGetEncodingAlias:
1003 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1004 *
1005 * Lookup an encoding name for the given alias.
1006 *
1007 * Returns NULL if not found, otherwise the original name
1008 */
1009const char *
1010xmlGetEncodingAlias(const char *alias) {
1011    int i;
1012    char upper[100];
1013
1014    if (alias == NULL)
1015	return(NULL);
1016
1017    if (xmlCharEncodingAliases == NULL)
1018	return(NULL);
1019
1020    for (i = 0;i < 99;i++) {
1021        upper[i] = toupper(alias[i]);
1022	if (upper[i] == 0) break;
1023    }
1024    upper[i] = 0;
1025
1026    /*
1027     * Walk down the list looking for a definition of the alias
1028     */
1029    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1030	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1031	    return(xmlCharEncodingAliases[i].name);
1032	}
1033    }
1034    return(NULL);
1035}
1036
1037/**
1038 * xmlAddEncodingAlias:
1039 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1040 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1041 *
1042 * Registers an alias @alias for an encoding named @name. Existing alias
1043 * will be overwritten.
1044 *
1045 * Returns 0 in case of success, -1 in case of error
1046 */
1047int
1048xmlAddEncodingAlias(const char *name, const char *alias) {
1049    int i;
1050    char upper[100];
1051
1052    if ((name == NULL) || (alias == NULL))
1053	return(-1);
1054
1055    for (i = 0;i < 99;i++) {
1056        upper[i] = toupper(alias[i]);
1057	if (upper[i] == 0) break;
1058    }
1059    upper[i] = 0;
1060
1061    if (xmlCharEncodingAliases == NULL) {
1062	xmlCharEncodingAliasesNb = 0;
1063	xmlCharEncodingAliasesMax = 20;
1064	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1065	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1066	if (xmlCharEncodingAliases == NULL)
1067	    return(-1);
1068    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1069	xmlCharEncodingAliasesMax *= 2;
1070	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1071	      xmlRealloc(xmlCharEncodingAliases,
1072		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1073    }
1074    /*
1075     * Walk down the list looking for a definition of the alias
1076     */
1077    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1078	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1079	    /*
1080	     * Replace the definition.
1081	     */
1082	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1083	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1084	    return(0);
1085	}
1086    }
1087    /*
1088     * Add the definition
1089     */
1090    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1091    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1092    xmlCharEncodingAliasesNb++;
1093    return(0);
1094}
1095
1096/**
1097 * xmlDelEncodingAlias:
1098 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1099 *
1100 * Unregisters an encoding alias @alias
1101 *
1102 * Returns 0 in case of success, -1 in case of error
1103 */
1104int
1105xmlDelEncodingAlias(const char *alias) {
1106    int i;
1107
1108    if (alias == NULL)
1109	return(-1);
1110
1111    if (xmlCharEncodingAliases == NULL)
1112	return(-1);
1113    /*
1114     * Walk down the list looking for a definition of the alias
1115     */
1116    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1117	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1118	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1119	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1120	    xmlCharEncodingAliasesNb--;
1121	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1122		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1123	    return(0);
1124	}
1125    }
1126    return(-1);
1127}
1128
1129/**
1130 * xmlParseCharEncoding:
1131 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1132 *
1133 * Compare the string to the encoding schemes already known. Note
1134 * that the comparison is case insensitive accordingly to the section
1135 * [XML] 4.3.3 Character Encoding in Entities.
1136 *
1137 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1138 * if not recognized.
1139 */
1140xmlCharEncoding
1141xmlParseCharEncoding(const char* name)
1142{
1143    const char *alias;
1144    char upper[500];
1145    int i;
1146
1147    if (name == NULL)
1148	return(XML_CHAR_ENCODING_NONE);
1149
1150    /*
1151     * Do the alias resolution
1152     */
1153    alias = xmlGetEncodingAlias(name);
1154    if (alias != NULL)
1155	name = alias;
1156
1157    for (i = 0;i < 499;i++) {
1158        upper[i] = toupper(name[i]);
1159	if (upper[i] == 0) break;
1160    }
1161    upper[i] = 0;
1162
1163    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1164    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1165    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1166
1167    /*
1168     * NOTE: if we were able to parse this, the endianness of UTF16 is
1169     *       already found and in use
1170     */
1171    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1172    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1173
1174    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1175    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1176    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1177
1178    /*
1179     * NOTE: if we were able to parse this, the endianness of UCS4 is
1180     *       already found and in use
1181     */
1182    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1183    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1184    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1185
1186
1187    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1188    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1189    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1190
1191    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1192    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1193    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1194
1195    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1196    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1197    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1198    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1199    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1200    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1201    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1202
1203    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1204    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1205    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1206
1207#ifdef DEBUG_ENCODING
1208    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1209#endif
1210    return(XML_CHAR_ENCODING_ERROR);
1211}
1212
1213/**
1214 * xmlGetCharEncodingName:
1215 * @enc:  the encoding
1216 *
1217 * The "canonical" name for XML encoding.
1218 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1219 * Section 4.3.3  Character Encoding in Entities
1220 *
1221 * Returns the canonical name for the given encoding
1222 */
1223
1224const char*
1225xmlGetCharEncodingName(xmlCharEncoding enc) {
1226    switch (enc) {
1227        case XML_CHAR_ENCODING_ERROR:
1228	    return(NULL);
1229        case XML_CHAR_ENCODING_NONE:
1230	    return(NULL);
1231        case XML_CHAR_ENCODING_UTF8:
1232	    return("UTF-8");
1233        case XML_CHAR_ENCODING_UTF16LE:
1234	    return("UTF-16");
1235        case XML_CHAR_ENCODING_UTF16BE:
1236	    return("UTF-16");
1237        case XML_CHAR_ENCODING_EBCDIC:
1238            return("EBCDIC");
1239        case XML_CHAR_ENCODING_UCS4LE:
1240            return("ISO-10646-UCS-4");
1241        case XML_CHAR_ENCODING_UCS4BE:
1242            return("ISO-10646-UCS-4");
1243        case XML_CHAR_ENCODING_UCS4_2143:
1244            return("ISO-10646-UCS-4");
1245        case XML_CHAR_ENCODING_UCS4_3412:
1246            return("ISO-10646-UCS-4");
1247        case XML_CHAR_ENCODING_UCS2:
1248            return("ISO-10646-UCS-2");
1249        case XML_CHAR_ENCODING_8859_1:
1250	    return("ISO-8859-1");
1251        case XML_CHAR_ENCODING_8859_2:
1252	    return("ISO-8859-2");
1253        case XML_CHAR_ENCODING_8859_3:
1254	    return("ISO-8859-3");
1255        case XML_CHAR_ENCODING_8859_4:
1256	    return("ISO-8859-4");
1257        case XML_CHAR_ENCODING_8859_5:
1258	    return("ISO-8859-5");
1259        case XML_CHAR_ENCODING_8859_6:
1260	    return("ISO-8859-6");
1261        case XML_CHAR_ENCODING_8859_7:
1262	    return("ISO-8859-7");
1263        case XML_CHAR_ENCODING_8859_8:
1264	    return("ISO-8859-8");
1265        case XML_CHAR_ENCODING_8859_9:
1266	    return("ISO-8859-9");
1267        case XML_CHAR_ENCODING_2022_JP:
1268            return("ISO-2022-JP");
1269        case XML_CHAR_ENCODING_SHIFT_JIS:
1270            return("Shift-JIS");
1271        case XML_CHAR_ENCODING_EUC_JP:
1272            return("EUC-JP");
1273	case XML_CHAR_ENCODING_ASCII:
1274	    return(NULL);
1275    }
1276    return(NULL);
1277}
1278
1279/************************************************************************
1280 *									*
1281 *			Char encoding handlers				*
1282 *									*
1283 ************************************************************************/
1284
1285
1286/* the size should be growable, but it's not a big deal ... */
1287#define MAX_ENCODING_HANDLERS 50
1288static xmlCharEncodingHandlerPtr *handlers = NULL;
1289static int nbCharEncodingHandler = 0;
1290
1291/*
1292 * The default is UTF-8 for XML, that's also the default used for the
1293 * parser internals, so the default encoding handler is NULL
1294 */
1295
1296static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1297
1298/**
1299 * xmlNewCharEncodingHandler:
1300 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1301 * @input:  the xmlCharEncodingInputFunc to read that encoding
1302 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1303 *
1304 * Create and registers an xmlCharEncodingHandler.
1305 *
1306 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1307 */
1308xmlCharEncodingHandlerPtr
1309xmlNewCharEncodingHandler(const char *name,
1310                          xmlCharEncodingInputFunc input,
1311                          xmlCharEncodingOutputFunc output) {
1312    xmlCharEncodingHandlerPtr handler;
1313    const char *alias;
1314    char upper[500];
1315    int i;
1316    char *up = NULL;
1317
1318    /*
1319     * Do the alias resolution
1320     */
1321    alias = xmlGetEncodingAlias(name);
1322    if (alias != NULL)
1323	name = alias;
1324
1325    /*
1326     * Keep only the uppercase version of the encoding.
1327     */
1328    if (name == NULL) {
1329        xmlEncodingErr(XML_I18N_NO_NAME,
1330		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1331	return(NULL);
1332    }
1333    for (i = 0;i < 499;i++) {
1334        upper[i] = toupper(name[i]);
1335	if (upper[i] == 0) break;
1336    }
1337    upper[i] = 0;
1338    up = xmlMemStrdup(upper);
1339    if (up == NULL) {
1340        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1341	return(NULL);
1342    }
1343
1344    /*
1345     * allocate and fill-up an handler block.
1346     */
1347    handler = (xmlCharEncodingHandlerPtr)
1348              xmlMalloc(sizeof(xmlCharEncodingHandler));
1349    if (handler == NULL) {
1350        xmlFree(up);
1351        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1352	return(NULL);
1353    }
1354    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1355    handler->input = input;
1356    handler->output = output;
1357    handler->name = up;
1358
1359#ifdef LIBXML_ICONV_ENABLED
1360    handler->iconv_in = NULL;
1361    handler->iconv_out = NULL;
1362#endif
1363#ifdef LIBXML_ICU_ENABLED
1364    handler->uconv_in = NULL;
1365    handler->uconv_out = NULL;
1366#endif
1367
1368    /*
1369     * registers and returns the handler.
1370     */
1371    xmlRegisterCharEncodingHandler(handler);
1372#ifdef DEBUG_ENCODING
1373    xmlGenericError(xmlGenericErrorContext,
1374	    "Registered encoding handler for %s\n", name);
1375#endif
1376    return(handler);
1377}
1378
1379/**
1380 * xmlInitCharEncodingHandlers:
1381 *
1382 * Initialize the char encoding support, it registers the default
1383 * encoding supported.
1384 * NOTE: while public, this function usually doesn't need to be called
1385 *       in normal processing.
1386 */
1387void
1388xmlInitCharEncodingHandlers(void) {
1389    unsigned short int tst = 0x1234;
1390    unsigned char *ptr = (unsigned char *) &tst;
1391
1392    if (handlers != NULL) return;
1393
1394    handlers = (xmlCharEncodingHandlerPtr *)
1395        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1396
1397    if (*ptr == 0x12) xmlLittleEndian = 0;
1398    else if (*ptr == 0x34) xmlLittleEndian = 1;
1399    else {
1400        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1401	               "Odd problem at endianness detection\n", NULL);
1402    }
1403
1404    if (handlers == NULL) {
1405        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1406	return;
1407    }
1408    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1409#ifdef LIBXML_OUTPUT_ENABLED
1410    xmlUTF16LEHandler =
1411          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1412    xmlUTF16BEHandler =
1413          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1414    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1415    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1416    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1417    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1418#ifdef LIBXML_HTML_ENABLED
1419    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1420#endif
1421#else
1422    xmlUTF16LEHandler =
1423          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1424    xmlUTF16BEHandler =
1425          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1426    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1427    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1428    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1429    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1430#endif /* LIBXML_OUTPUT_ENABLED */
1431#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1432#ifdef LIBXML_ISO8859X_ENABLED
1433    xmlRegisterCharEncodingHandlersISO8859x ();
1434#endif
1435#endif
1436
1437}
1438
1439/**
1440 * xmlCleanupCharEncodingHandlers:
1441 *
1442 * Cleanup the memory allocated for the char encoding support, it
1443 * unregisters all the encoding handlers and the aliases.
1444 */
1445void
1446xmlCleanupCharEncodingHandlers(void) {
1447    xmlCleanupEncodingAliases();
1448
1449    if (handlers == NULL) return;
1450
1451    for (;nbCharEncodingHandler > 0;) {
1452        nbCharEncodingHandler--;
1453	if (handlers[nbCharEncodingHandler] != NULL) {
1454	    if (handlers[nbCharEncodingHandler]->name != NULL)
1455		xmlFree(handlers[nbCharEncodingHandler]->name);
1456	    xmlFree(handlers[nbCharEncodingHandler]);
1457	}
1458    }
1459    xmlFree(handlers);
1460    handlers = NULL;
1461    nbCharEncodingHandler = 0;
1462    xmlDefaultCharEncodingHandler = NULL;
1463}
1464
1465/**
1466 * xmlRegisterCharEncodingHandler:
1467 * @handler:  the xmlCharEncodingHandlerPtr handler block
1468 *
1469 * Register the char encoding handler, surprising, isn't it ?
1470 */
1471void
1472xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1473    if (handlers == NULL) xmlInitCharEncodingHandlers();
1474    if ((handler == NULL) || (handlers == NULL)) {
1475        xmlEncodingErr(XML_I18N_NO_HANDLER,
1476		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1477	return;
1478    }
1479
1480    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1481        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1482	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1483	               "MAX_ENCODING_HANDLERS");
1484	return;
1485    }
1486    handlers[nbCharEncodingHandler++] = handler;
1487}
1488
1489/**
1490 * xmlGetCharEncodingHandler:
1491 * @enc:  an xmlCharEncoding value.
1492 *
1493 * Search in the registered set the handler able to read/write that encoding.
1494 *
1495 * Returns the handler or NULL if not found
1496 */
1497xmlCharEncodingHandlerPtr
1498xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1499    xmlCharEncodingHandlerPtr handler;
1500
1501    if (handlers == NULL) xmlInitCharEncodingHandlers();
1502    switch (enc) {
1503        case XML_CHAR_ENCODING_ERROR:
1504	    return(NULL);
1505        case XML_CHAR_ENCODING_NONE:
1506	    return(NULL);
1507        case XML_CHAR_ENCODING_UTF8:
1508	    return(NULL);
1509        case XML_CHAR_ENCODING_UTF16LE:
1510	    return(xmlUTF16LEHandler);
1511        case XML_CHAR_ENCODING_UTF16BE:
1512	    return(xmlUTF16BEHandler);
1513        case XML_CHAR_ENCODING_EBCDIC:
1514            handler = xmlFindCharEncodingHandler("EBCDIC");
1515            if (handler != NULL) return(handler);
1516            handler = xmlFindCharEncodingHandler("ebcdic");
1517            if (handler != NULL) return(handler);
1518            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1519            if (handler != NULL) return(handler);
1520            handler = xmlFindCharEncodingHandler("IBM-037");
1521            if (handler != NULL) return(handler);
1522	    break;
1523        case XML_CHAR_ENCODING_UCS4BE:
1524            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1525            if (handler != NULL) return(handler);
1526            handler = xmlFindCharEncodingHandler("UCS-4");
1527            if (handler != NULL) return(handler);
1528            handler = xmlFindCharEncodingHandler("UCS4");
1529            if (handler != NULL) return(handler);
1530	    break;
1531        case XML_CHAR_ENCODING_UCS4LE:
1532            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1533            if (handler != NULL) return(handler);
1534            handler = xmlFindCharEncodingHandler("UCS-4");
1535            if (handler != NULL) return(handler);
1536            handler = xmlFindCharEncodingHandler("UCS4");
1537            if (handler != NULL) return(handler);
1538	    break;
1539        case XML_CHAR_ENCODING_UCS4_2143:
1540	    break;
1541        case XML_CHAR_ENCODING_UCS4_3412:
1542	    break;
1543        case XML_CHAR_ENCODING_UCS2:
1544            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1545            if (handler != NULL) return(handler);
1546            handler = xmlFindCharEncodingHandler("UCS-2");
1547            if (handler != NULL) return(handler);
1548            handler = xmlFindCharEncodingHandler("UCS2");
1549            if (handler != NULL) return(handler);
1550	    break;
1551
1552	    /*
1553	     * We used to keep ISO Latin encodings native in the
1554	     * generated data. This led to so many problems that
1555	     * this has been removed. One can still change this
1556	     * back by registering no-ops encoders for those
1557	     */
1558        case XML_CHAR_ENCODING_8859_1:
1559	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1560	    if (handler != NULL) return(handler);
1561	    break;
1562        case XML_CHAR_ENCODING_8859_2:
1563	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1564	    if (handler != NULL) return(handler);
1565	    break;
1566        case XML_CHAR_ENCODING_8859_3:
1567	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1568	    if (handler != NULL) return(handler);
1569	    break;
1570        case XML_CHAR_ENCODING_8859_4:
1571	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1572	    if (handler != NULL) return(handler);
1573	    break;
1574        case XML_CHAR_ENCODING_8859_5:
1575	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1576	    if (handler != NULL) return(handler);
1577	    break;
1578        case XML_CHAR_ENCODING_8859_6:
1579	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1580	    if (handler != NULL) return(handler);
1581	    break;
1582        case XML_CHAR_ENCODING_8859_7:
1583	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1584	    if (handler != NULL) return(handler);
1585	    break;
1586        case XML_CHAR_ENCODING_8859_8:
1587	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1588	    if (handler != NULL) return(handler);
1589	    break;
1590        case XML_CHAR_ENCODING_8859_9:
1591	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1592	    if (handler != NULL) return(handler);
1593	    break;
1594
1595
1596        case XML_CHAR_ENCODING_2022_JP:
1597            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1598            if (handler != NULL) return(handler);
1599	    break;
1600        case XML_CHAR_ENCODING_SHIFT_JIS:
1601            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1602            if (handler != NULL) return(handler);
1603            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1604            if (handler != NULL) return(handler);
1605            handler = xmlFindCharEncodingHandler("Shift_JIS");
1606            if (handler != NULL) return(handler);
1607	    break;
1608        case XML_CHAR_ENCODING_EUC_JP:
1609            handler = xmlFindCharEncodingHandler("EUC-JP");
1610            if (handler != NULL) return(handler);
1611	    break;
1612	default:
1613	    break;
1614    }
1615
1616#ifdef DEBUG_ENCODING
1617    xmlGenericError(xmlGenericErrorContext,
1618	    "No handler found for encoding %d\n", enc);
1619#endif
1620    return(NULL);
1621}
1622
1623/**
1624 * xmlFindCharEncodingHandler:
1625 * @name:  a string describing the char encoding.
1626 *
1627 * Search in the registered set the handler able to read/write that encoding.
1628 *
1629 * Returns the handler or NULL if not found
1630 */
1631xmlCharEncodingHandlerPtr
1632xmlFindCharEncodingHandler(const char *name) {
1633    const char *nalias;
1634    const char *norig;
1635    xmlCharEncoding alias;
1636#ifdef LIBXML_ICONV_ENABLED
1637    xmlCharEncodingHandlerPtr enc;
1638    iconv_t icv_in, icv_out;
1639#endif /* LIBXML_ICONV_ENABLED */
1640#ifdef LIBXML_ICU_ENABLED
1641    xmlCharEncodingHandlerPtr encu;
1642    uconv_t *ucv_in, *ucv_out;
1643#endif /* LIBXML_ICU_ENABLED */
1644    char upper[100];
1645    int i;
1646
1647    if (handlers == NULL) xmlInitCharEncodingHandlers();
1648    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1649    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1650
1651    /*
1652     * Do the alias resolution
1653     */
1654    norig = name;
1655    nalias = xmlGetEncodingAlias(name);
1656    if (nalias != NULL)
1657	name = nalias;
1658
1659    /*
1660     * Check first for directly registered encoding names
1661     */
1662    for (i = 0;i < 99;i++) {
1663        upper[i] = toupper(name[i]);
1664	if (upper[i] == 0) break;
1665    }
1666    upper[i] = 0;
1667
1668    if (handlers != NULL) {
1669        for (i = 0;i < nbCharEncodingHandler; i++) {
1670            if (!strcmp(upper, handlers[i]->name)) {
1671#ifdef DEBUG_ENCODING
1672                xmlGenericError(xmlGenericErrorContext,
1673                        "Found registered handler for encoding %s\n", name);
1674#endif
1675                return(handlers[i]);
1676            }
1677        }
1678    }
1679
1680#ifdef LIBXML_ICONV_ENABLED
1681    /* check whether iconv can handle this */
1682    icv_in = iconv_open("UTF-8", name);
1683    icv_out = iconv_open(name, "UTF-8");
1684    if (icv_in == (iconv_t) -1) {
1685        icv_in = iconv_open("UTF-8", upper);
1686    }
1687    if (icv_out == (iconv_t) -1) {
1688	icv_out = iconv_open(upper, "UTF-8");
1689    }
1690    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1691	    enc = (xmlCharEncodingHandlerPtr)
1692	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1693	    if (enc == NULL) {
1694	        iconv_close(icv_in);
1695	        iconv_close(icv_out);
1696		return(NULL);
1697	    }
1698            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1699	    enc->name = xmlMemStrdup(name);
1700	    enc->input = NULL;
1701	    enc->output = NULL;
1702	    enc->iconv_in = icv_in;
1703	    enc->iconv_out = icv_out;
1704#ifdef DEBUG_ENCODING
1705            xmlGenericError(xmlGenericErrorContext,
1706		    "Found iconv handler for encoding %s\n", name);
1707#endif
1708	    return enc;
1709    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1710	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1711		    "iconv : problems with filters for '%s'\n", name);
1712    }
1713#endif /* LIBXML_ICONV_ENABLED */
1714#ifdef LIBXML_ICU_ENABLED
1715    /* check whether icu can handle this */
1716    ucv_in = openIcuConverter(name, 1);
1717    ucv_out = openIcuConverter(name, 0);
1718    if (ucv_in != NULL && ucv_out != NULL) {
1719	    encu = (xmlCharEncodingHandlerPtr)
1720	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1721	    if (encu == NULL) {
1722                closeIcuConverter(ucv_in);
1723                closeIcuConverter(ucv_out);
1724		return(NULL);
1725	    }
1726            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1727	    encu->name = xmlMemStrdup(name);
1728	    encu->input = NULL;
1729	    encu->output = NULL;
1730	    encu->uconv_in = ucv_in;
1731	    encu->uconv_out = ucv_out;
1732#ifdef DEBUG_ENCODING
1733            xmlGenericError(xmlGenericErrorContext,
1734		    "Found ICU converter handler for encoding %s\n", name);
1735#endif
1736	    return encu;
1737    } else if (ucv_in != NULL || ucv_out != NULL) {
1738            closeIcuConverter(ucv_in);
1739            closeIcuConverter(ucv_out);
1740	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1741		    "ICU converter : problems with filters for '%s'\n", name);
1742    }
1743#endif /* LIBXML_ICU_ENABLED */
1744
1745#ifdef DEBUG_ENCODING
1746    xmlGenericError(xmlGenericErrorContext,
1747	    "No handler found for encoding %s\n", name);
1748#endif
1749
1750    /*
1751     * Fallback using the canonical names
1752     */
1753    alias = xmlParseCharEncoding(norig);
1754    if (alias != XML_CHAR_ENCODING_ERROR) {
1755        const char* canon;
1756        canon = xmlGetCharEncodingName(alias);
1757        if ((canon != NULL) && (strcmp(name, canon))) {
1758	    return(xmlFindCharEncodingHandler(canon));
1759        }
1760    }
1761
1762    /* If "none of the above", give up */
1763    return(NULL);
1764}
1765
1766/************************************************************************
1767 *									*
1768 *		ICONV based generic conversion functions		*
1769 *									*
1770 ************************************************************************/
1771
1772#ifdef LIBXML_ICONV_ENABLED
1773/**
1774 * xmlIconvWrapper:
1775 * @cd:		iconv converter data structure
1776 * @out:  a pointer to an array of bytes to store the result
1777 * @outlen:  the length of @out
1778 * @in:  a pointer to an array of ISO Latin 1 chars
1779 * @inlen:  the length of @in
1780 *
1781 * Returns 0 if success, or
1782 *     -1 by lack of space, or
1783 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1784 *        the result of transformation can't fit into the encoding we want), or
1785 *     -3 if there the last byte can't form a single output char.
1786 *
1787 * The value of @inlen after return is the number of octets consumed
1788 *     as the return value is positive, else unpredictable.
1789 * The value of @outlen after return is the number of ocetes consumed.
1790 */
1791static int
1792xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1793                const unsigned char *in, int *inlen) {
1794    size_t icv_inlen, icv_outlen;
1795    const char *icv_in = (const char *) in;
1796    char *icv_out = (char *) out;
1797    int ret;
1798
1799    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1800        if (outlen != NULL) *outlen = 0;
1801        return(-1);
1802    }
1803    icv_inlen = *inlen;
1804    icv_outlen = *outlen;
1805    ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1806    *inlen -= icv_inlen;
1807    *outlen -= icv_outlen;
1808    if ((icv_inlen != 0) || (ret == -1)) {
1809#ifdef EILSEQ
1810        if (errno == EILSEQ) {
1811            return -2;
1812        } else
1813#endif
1814#ifdef E2BIG
1815        if (errno == E2BIG) {
1816            return -1;
1817        } else
1818#endif
1819#ifdef EINVAL
1820        if (errno == EINVAL) {
1821            return -3;
1822        } else
1823#endif
1824        {
1825            return -3;
1826        }
1827    }
1828    return 0;
1829}
1830#endif /* LIBXML_ICONV_ENABLED */
1831
1832/************************************************************************
1833 *									*
1834 *		ICU based generic conversion functions		*
1835 *									*
1836 ************************************************************************/
1837
1838#ifdef LIBXML_ICU_ENABLED
1839/**
1840 * xmlUconvWrapper:
1841 * @cd: ICU uconverter data structure
1842 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1843 * @out:  a pointer to an array of bytes to store the result
1844 * @outlen:  the length of @out
1845 * @in:  a pointer to an array of ISO Latin 1 chars
1846 * @inlen:  the length of @in
1847 *
1848 * Returns 0 if success, or
1849 *     -1 by lack of space, or
1850 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1851 *        the result of transformation can't fit into the encoding we want), or
1852 *     -3 if there the last byte can't form a single output char.
1853 *
1854 * The value of @inlen after return is the number of octets consumed
1855 *     as the return value is positive, else unpredictable.
1856 * The value of @outlen after return is the number of ocetes consumed.
1857 */
1858static int
1859xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1860                const unsigned char *in, int *inlen) {
1861    const char *ucv_in = (const char *) in;
1862    char *ucv_out = (char *) out;
1863    UErrorCode err = U_ZERO_ERROR;
1864
1865    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1866        if (outlen != NULL) *outlen = 0;
1867        return(-1);
1868    }
1869
1870    /*
1871     * TODO(jungshik)
1872     * 1. is ucnv_convert(To|From)Algorithmic better?
1873     * 2. had we better use an explicit pivot buffer?
1874     * 3. error returned comes from 'fromUnicode' only even
1875     *    when toUnicode is true !
1876     */
1877    if (toUnicode) {
1878        /* encoding => UTF-16 => UTF-8 */
1879        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1880                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1881                       0, TRUE, &err);
1882    } else {
1883        /* UTF-8 => UTF-16 => encoding */
1884        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1885                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1886                       0, TRUE, &err);
1887    }
1888    *inlen = ucv_in - (const char*) in;
1889    *outlen = ucv_out - (char *) out;
1890    if (U_SUCCESS(err))
1891        return 0;
1892    if (err == U_BUFFER_OVERFLOW_ERROR)
1893        return -1;
1894    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1895        return -2;
1896    /* if (err == U_TRUNCATED_CHAR_FOUND) */
1897    return -3;
1898}
1899#endif /* LIBXML_ICU_ENABLED */
1900
1901/************************************************************************
1902 *									*
1903 *		The real API used by libxml for on-the-fly conversion	*
1904 *									*
1905 ************************************************************************/
1906
1907/**
1908 * xmlCharEncFirstLineInt:
1909 * @handler:	char enconding transformation data structure
1910 * @out:  an xmlBuffer for the output.
1911 * @in:  an xmlBuffer for the input
1912 * @len:  number of bytes to convert for the first line, or -1
1913 *
1914 * Front-end for the encoding handler input function, but handle only
1915 * the very first line, i.e. limit itself to 45 chars.
1916 *
1917 * Returns the number of byte written if success, or
1918 *     -1 general error
1919 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1920 *        the result of transformation can't fit into the encoding we want), or
1921 */
1922int
1923xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1924                       xmlBufferPtr in, int len) {
1925    int ret = -2;
1926    int written;
1927    int toconv;
1928
1929    if (handler == NULL) return(-1);
1930    if (out == NULL) return(-1);
1931    if (in == NULL) return(-1);
1932
1933    /* calculate space available */
1934    written = out->size - out->use - 1; /* count '\0' */
1935    toconv = in->use;
1936    /*
1937     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1938     * 45 chars should be sufficient to reach the end of the encoding
1939     * declaration without going too far inside the document content.
1940     * on UTF-16 this means 90bytes, on UCS4 this means 180
1941     * The actual value depending on guessed encoding is passed as @len
1942     * if provided
1943     */
1944    if (len >= 0) {
1945        if (toconv > len)
1946            toconv = len;
1947    } else {
1948        if (toconv > 180)
1949            toconv = 180;
1950    }
1951    if (toconv * 2 >= written) {
1952        xmlBufferGrow(out, toconv * 2);
1953	written = out->size - out->use - 1;
1954    }
1955
1956    if (handler->input != NULL) {
1957	ret = handler->input(&out->content[out->use], &written,
1958	                     in->content, &toconv);
1959	xmlBufferShrink(in, toconv);
1960	out->use += written;
1961	out->content[out->use] = 0;
1962    }
1963#ifdef LIBXML_ICONV_ENABLED
1964    else if (handler->iconv_in != NULL) {
1965	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1966	                      &written, in->content, &toconv);
1967	xmlBufferShrink(in, toconv);
1968	out->use += written;
1969	out->content[out->use] = 0;
1970	if (ret == -1) ret = -3;
1971    }
1972#endif /* LIBXML_ICONV_ENABLED */
1973#ifdef LIBXML_ICU_ENABLED
1974    else if (handler->uconv_in != NULL) {
1975	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1976	                      &written, in->content, &toconv);
1977	xmlBufferShrink(in, toconv);
1978	out->use += written;
1979	out->content[out->use] = 0;
1980	if (ret == -1) ret = -3;
1981    }
1982#endif /* LIBXML_ICU_ENABLED */
1983#ifdef DEBUG_ENCODING
1984    switch (ret) {
1985        case 0:
1986	    xmlGenericError(xmlGenericErrorContext,
1987		    "converted %d bytes to %d bytes of input\n",
1988	            toconv, written);
1989	    break;
1990        case -1:
1991	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1992	            toconv, written, in->use);
1993	    break;
1994        case -2:
1995	    xmlGenericError(xmlGenericErrorContext,
1996		    "input conversion failed due to input error\n");
1997	    break;
1998        case -3:
1999	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2000	            toconv, written, in->use);
2001	    break;
2002	default:
2003	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2004    }
2005#endif /* DEBUG_ENCODING */
2006    /*
2007     * Ignore when input buffer is not on a boundary
2008     */
2009    if (ret == -3) ret = 0;
2010    if (ret == -1) ret = 0;
2011    return(ret);
2012}
2013
2014/**
2015 * xmlCharEncFirstLine:
2016 * @handler:	char enconding transformation data structure
2017 * @out:  an xmlBuffer for the output.
2018 * @in:  an xmlBuffer for the input
2019 *
2020 * Front-end for the encoding handler input function, but handle only
2021 * the very first line, i.e. limit itself to 45 chars.
2022 *
2023 * Returns the number of byte written if success, or
2024 *     -1 general error
2025 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2026 *        the result of transformation can't fit into the encoding we want), or
2027 */
2028int
2029xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2030                 xmlBufferPtr in) {
2031    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2032}
2033
2034/**
2035 * xmlCharEncFirstLineInput:
2036 * @input: a parser input buffer
2037 * @len:  number of bytes to convert for the first line, or -1
2038 *
2039 * Front-end for the encoding handler input function, but handle only
2040 * the very first line. Point is that this is based on autodetection
2041 * of the encoding and once that first line is converted we may find
2042 * out that a different decoder is needed to process the input.
2043 *
2044 * Returns the number of byte written if success, or
2045 *     -1 general error
2046 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2047 *        the result of transformation can't fit into the encoding we want), or
2048 */
2049int
2050xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2051{
2052    int ret = -2;
2053    size_t written;
2054    size_t toconv;
2055    int c_in;
2056    int c_out;
2057    xmlBufPtr in;
2058    xmlBufPtr out;
2059
2060    if ((input == NULL) || (input->encoder == NULL) ||
2061        (input->buffer == NULL) || (input->raw == NULL))
2062        return (-1);
2063    out = input->buffer;
2064    in = input->raw;
2065
2066    toconv = xmlBufUse(in);
2067    if (toconv == 0)
2068        return (0);
2069    written = xmlBufAvail(out) - 1; /* count '\0' */
2070    /*
2071     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2072     * 45 chars should be sufficient to reach the end of the encoding
2073     * declaration without going too far inside the document content.
2074     * on UTF-16 this means 90bytes, on UCS4 this means 180
2075     * The actual value depending on guessed encoding is passed as @len
2076     * if provided
2077     */
2078    if (len >= 0) {
2079        if (toconv > (unsigned int) len)
2080            toconv = len;
2081    } else {
2082        if (toconv > 180)
2083            toconv = 180;
2084    }
2085    if (toconv * 2 >= written) {
2086        xmlBufGrow(out, toconv * 2);
2087        written = xmlBufAvail(out) - 1;
2088    }
2089    if (written > 360)
2090        written = 360;
2091
2092    c_in = toconv;
2093    c_out = written;
2094    if (input->encoder->input != NULL) {
2095        ret = input->encoder->input(xmlBufEnd(out), &c_out,
2096                                    xmlBufContent(in), &c_in);
2097        xmlBufShrink(in, c_in);
2098        xmlBufAddLen(out, c_out);
2099    }
2100#ifdef LIBXML_ICONV_ENABLED
2101    else if (input->encoder->iconv_in != NULL) {
2102        ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2103                              &c_out, xmlBufContent(in), &c_in);
2104        xmlBufShrink(in, c_in);
2105        xmlBufAddLen(out, c_out);
2106        if (ret == -1)
2107            ret = -3;
2108    }
2109#endif /* LIBXML_ICONV_ENABLED */
2110#ifdef LIBXML_ICU_ENABLED
2111    else if (input->encoder->uconv_in != NULL) {
2112        ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2113                              &c_out, xmlBufContent(in), &c_in);
2114        xmlBufShrink(in, c_in);
2115        xmlBufAddLen(out, c_out);
2116        if (ret == -1)
2117            ret = -3;
2118    }
2119#endif /* LIBXML_ICU_ENABLED */
2120    switch (ret) {
2121        case 0:
2122#ifdef DEBUG_ENCODING
2123            xmlGenericError(xmlGenericErrorContext,
2124                            "converted %d bytes to %d bytes of input\n",
2125                            c_in, c_out);
2126#endif
2127            break;
2128        case -1:
2129#ifdef DEBUG_ENCODING
2130            xmlGenericError(xmlGenericErrorContext,
2131                         "converted %d bytes to %d bytes of input, %d left\n",
2132                            c_in, c_out, (int)xmlBufUse(in));
2133#endif
2134            break;
2135        case -3:
2136#ifdef DEBUG_ENCODING
2137            xmlGenericError(xmlGenericErrorContext,
2138                        "converted %d bytes to %d bytes of input, %d left\n",
2139                            c_in, c_out, (int)xmlBufUse(in));
2140#endif
2141            break;
2142        case -2: {
2143            char buf[50];
2144            const xmlChar *content = xmlBufContent(in);
2145
2146	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2147		     content[0], content[1],
2148		     content[2], content[3]);
2149	    buf[49] = 0;
2150	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2151		    "input conversion failed due to input error, bytes %s\n",
2152		           buf);
2153        }
2154    }
2155    /*
2156     * Ignore when input buffer is not on a boundary
2157     */
2158    if (ret == -3) ret = 0;
2159    if (ret == -1) ret = 0;
2160    return(ret);
2161}
2162
2163/**
2164 * xmlCharEncInput:
2165 * @input: a parser input buffer
2166 * @flush: try to flush all the raw buffer
2167 *
2168 * Generic front-end for the encoding handler on parser input
2169 *
2170 * Returns the number of byte written if success, or
2171 *     -1 general error
2172 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2173 *        the result of transformation can't fit into the encoding we want), or
2174 */
2175int
2176xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2177{
2178    int ret = -2;
2179    size_t written;
2180    size_t toconv;
2181    int c_in;
2182    int c_out;
2183    xmlBufPtr in;
2184    xmlBufPtr out;
2185
2186    if ((input == NULL) || (input->encoder == NULL) ||
2187        (input->buffer == NULL) || (input->raw == NULL))
2188        return (-1);
2189    out = input->buffer;
2190    in = input->raw;
2191
2192    toconv = xmlBufUse(in);
2193    if (toconv == 0)
2194        return (0);
2195    if ((toconv > 64 * 1024) && (flush == 0))
2196        toconv = 64 * 1024;
2197    written = xmlBufAvail(out);
2198    if (written > 0)
2199        written--; /* count '\0' */
2200    if (toconv * 2 >= written) {
2201        xmlBufGrow(out, toconv * 2);
2202        written = xmlBufAvail(out);
2203        if (written > 0)
2204            written--; /* count '\0' */
2205    }
2206    if ((written > 128 * 1024) && (flush == 0))
2207        written = 128 * 1024;
2208
2209    c_in = toconv;
2210    c_out = written;
2211    if (input->encoder->input != NULL) {
2212        ret = input->encoder->input(xmlBufEnd(out), &c_out,
2213                                    xmlBufContent(in), &c_in);
2214        xmlBufShrink(in, c_in);
2215        xmlBufAddLen(out, c_out);
2216    }
2217#ifdef LIBXML_ICONV_ENABLED
2218    else if (input->encoder->iconv_in != NULL) {
2219        ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2220                              &c_out, xmlBufContent(in), &c_in);
2221        xmlBufShrink(in, c_in);
2222        xmlBufAddLen(out, c_out);
2223        if (ret == -1)
2224            ret = -3;
2225    }
2226#endif /* LIBXML_ICONV_ENABLED */
2227#ifdef LIBXML_ICU_ENABLED
2228    else if (input->encoder->uconv_in != NULL) {
2229        ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2230                              &c_out, xmlBufContent(in), &c_in);
2231        xmlBufShrink(in, c_in);
2232        xmlBufAddLen(out, c_out);
2233        if (ret == -1)
2234            ret = -3;
2235    }
2236#endif /* LIBXML_ICU_ENABLED */
2237    switch (ret) {
2238        case 0:
2239#ifdef DEBUG_ENCODING
2240            xmlGenericError(xmlGenericErrorContext,
2241                            "converted %d bytes to %d bytes of input\n",
2242                            c_in, c_out);
2243#endif
2244            break;
2245        case -1:
2246#ifdef DEBUG_ENCODING
2247            xmlGenericError(xmlGenericErrorContext,
2248                         "converted %d bytes to %d bytes of input, %d left\n",
2249                            c_in, c_out, (int)xmlBufUse(in));
2250#endif
2251            break;
2252        case -3:
2253#ifdef DEBUG_ENCODING
2254            xmlGenericError(xmlGenericErrorContext,
2255                        "converted %d bytes to %d bytes of input, %d left\n",
2256                            c_in, c_out, (int)xmlBufUse(in));
2257#endif
2258            break;
2259        case -2: {
2260            char buf[50];
2261            const xmlChar *content = xmlBufContent(in);
2262
2263	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2264		     content[0], content[1],
2265		     content[2], content[3]);
2266	    buf[49] = 0;
2267	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2268		    "input conversion failed due to input error, bytes %s\n",
2269		           buf);
2270        }
2271    }
2272    /*
2273     * Ignore when input buffer is not on a boundary
2274     */
2275    if (ret == -3)
2276        ret = 0;
2277    return (c_out? c_out : ret);
2278}
2279
2280/**
2281 * xmlCharEncInFunc:
2282 * @handler:	char encoding transformation data structure
2283 * @out:  an xmlBuffer for the output.
2284 * @in:  an xmlBuffer for the input
2285 *
2286 * Generic front-end for the encoding handler input function
2287 *
2288 * Returns the number of byte written if success, or
2289 *     -1 general error
2290 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2291 *        the result of transformation can't fit into the encoding we want), or
2292 */
2293int
2294xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2295                 xmlBufferPtr in)
2296{
2297    int ret = -2;
2298    int written;
2299    int toconv;
2300
2301    if (handler == NULL)
2302        return (-1);
2303    if (out == NULL)
2304        return (-1);
2305    if (in == NULL)
2306        return (-1);
2307
2308    toconv = in->use;
2309    if (toconv == 0)
2310        return (0);
2311    written = out->size - out->use -1; /* count '\0' */
2312    if (toconv * 2 >= written) {
2313        xmlBufferGrow(out, out->size + toconv * 2);
2314        written = out->size - out->use - 1;
2315    }
2316    if (handler->input != NULL) {
2317        ret = handler->input(&out->content[out->use], &written,
2318                             in->content, &toconv);
2319        xmlBufferShrink(in, toconv);
2320        out->use += written;
2321        out->content[out->use] = 0;
2322    }
2323#ifdef LIBXML_ICONV_ENABLED
2324    else if (handler->iconv_in != NULL) {
2325        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2326                              &written, in->content, &toconv);
2327        xmlBufferShrink(in, toconv);
2328        out->use += written;
2329        out->content[out->use] = 0;
2330        if (ret == -1)
2331            ret = -3;
2332    }
2333#endif /* LIBXML_ICONV_ENABLED */
2334#ifdef LIBXML_ICU_ENABLED
2335    else if (handler->uconv_in != NULL) {
2336        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2337                              &written, in->content, &toconv);
2338        xmlBufferShrink(in, toconv);
2339        out->use += written;
2340        out->content[out->use] = 0;
2341        if (ret == -1)
2342            ret = -3;
2343    }
2344#endif /* LIBXML_ICU_ENABLED */
2345    switch (ret) {
2346        case 0:
2347#ifdef DEBUG_ENCODING
2348            xmlGenericError(xmlGenericErrorContext,
2349                            "converted %d bytes to %d bytes of input\n",
2350                            toconv, written);
2351#endif
2352            break;
2353        case -1:
2354#ifdef DEBUG_ENCODING
2355            xmlGenericError(xmlGenericErrorContext,
2356                         "converted %d bytes to %d bytes of input, %d left\n",
2357                            toconv, written, in->use);
2358#endif
2359            break;
2360        case -3:
2361#ifdef DEBUG_ENCODING
2362            xmlGenericError(xmlGenericErrorContext,
2363                        "converted %d bytes to %d bytes of input, %d left\n",
2364                            toconv, written, in->use);
2365#endif
2366            break;
2367        case -2: {
2368            char buf[50];
2369
2370	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2371		     in->content[0], in->content[1],
2372		     in->content[2], in->content[3]);
2373	    buf[49] = 0;
2374	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2375		    "input conversion failed due to input error, bytes %s\n",
2376		           buf);
2377        }
2378    }
2379    /*
2380     * Ignore when input buffer is not on a boundary
2381     */
2382    if (ret == -3)
2383        ret = 0;
2384    return (written? written : ret);
2385}
2386
2387/**
2388 * xmlCharEncOutput:
2389 * @output: a parser output buffer
2390 * @init: is this an initialization call without data
2391 *
2392 * Generic front-end for the encoding handler on parser output
2393 * a first call with @init == 1 has to be made first to initiate the
2394 * output in case of non-stateless encoding needing to initiate their
2395 * state or the output (like the BOM in UTF16).
2396 * In case of UTF8 sequence conversion errors for the given encoder,
2397 * the content will be automatically remapped to a CharRef sequence.
2398 *
2399 * Returns the number of byte written if success, or
2400 *     -1 general error
2401 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2402 *        the result of transformation can't fit into the encoding we want), or
2403 */
2404int
2405xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2406{
2407    int ret = -2;
2408    size_t written;
2409    size_t writtentot = 0;
2410    size_t toconv;
2411    int c_in;
2412    int c_out;
2413    xmlBufPtr in;
2414    xmlBufPtr out;
2415    int charref_len = 0;
2416
2417    if ((output == NULL) || (output->encoder == NULL) ||
2418        (output->buffer == NULL) || (output->conv == NULL))
2419        return (-1);
2420    out = output->conv;
2421    in = output->buffer;
2422
2423retry:
2424
2425    written = xmlBufAvail(out);
2426    if (written > 0)
2427        written--; /* count '\0' */
2428
2429    /*
2430     * First specific handling of the initialization call
2431     */
2432    if (init) {
2433        c_in = 0;
2434        c_out = written;
2435        if (output->encoder->output != NULL) {
2436            ret = output->encoder->output(xmlBufEnd(out), &c_out,
2437                                          NULL, &c_in);
2438            if (ret > 0) /* Gennady: check return value */
2439                xmlBufAddLen(out, c_out);
2440        }
2441#ifdef LIBXML_ICONV_ENABLED
2442        else if (output->encoder->iconv_out != NULL) {
2443            ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2444                                  &c_out, NULL, &c_in);
2445            xmlBufAddLen(out, c_out);
2446        }
2447#endif /* LIBXML_ICONV_ENABLED */
2448#ifdef LIBXML_ICU_ENABLED
2449        else if (output->encoder->uconv_out != NULL) {
2450            ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2451                                  &c_out, NULL, &c_in);
2452            xmlBufAddLen(out, c_out);
2453        }
2454#endif /* LIBXML_ICU_ENABLED */
2455#ifdef DEBUG_ENCODING
2456	xmlGenericError(xmlGenericErrorContext,
2457		"initialized encoder\n");
2458#endif
2459        return(0);
2460    }
2461
2462    /*
2463     * Conversion itself.
2464     */
2465    toconv = xmlBufUse(in);
2466    if (toconv == 0)
2467        return (0);
2468    if (toconv > 64 * 1024)
2469        toconv = 64 * 1024;
2470    if (toconv * 4 >= written) {
2471        xmlBufGrow(out, toconv * 4);
2472        written = xmlBufAvail(out) - 1;
2473    }
2474    if (written > 256 * 1024)
2475        written = 256 * 1024;
2476
2477    c_in = toconv;
2478    c_out = written;
2479    if (output->encoder->output != NULL) {
2480        ret = output->encoder->output(xmlBufEnd(out), &c_out,
2481                                      xmlBufContent(in), &c_in);
2482        if (c_out > 0) {
2483            xmlBufShrink(in, c_in);
2484            xmlBufAddLen(out, c_out);
2485            writtentot += c_out;
2486        }
2487    }
2488#ifdef LIBXML_ICONV_ENABLED
2489    else if (output->encoder->iconv_out != NULL) {
2490        ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2491                              &c_out, xmlBufContent(in), &c_in);
2492        xmlBufShrink(in, c_in);
2493        xmlBufAddLen(out, c_out);
2494        writtentot += c_out;
2495        if (ret == -1) {
2496            if (c_out > 0) {
2497                /*
2498                 * Can be a limitation of iconv
2499                 */
2500                charref_len = 0;
2501                goto retry;
2502            }
2503            ret = -3;
2504        }
2505    }
2506#endif /* LIBXML_ICONV_ENABLED */
2507#ifdef LIBXML_ICU_ENABLED
2508    else if (output->encoder->uconv_out != NULL) {
2509        ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2510                              &c_out, xmlBufContent(in), &c_in);
2511        xmlBufShrink(in, c_in);
2512        xmlBufAddLen(out, c_out);
2513        writtentot += c_out;
2514        if (ret == -1) {
2515            if (c_out > 0) {
2516                /*
2517                 * Can be a limitation of uconv
2518                 */
2519                charref_len = 0;
2520                goto retry;
2521            }
2522            ret = -3;
2523        }
2524    }
2525#endif /* LIBXML_ICU_ENABLED */
2526    else {
2527        xmlEncodingErr(XML_I18N_NO_OUTPUT,
2528                       "xmlCharEncOutFunc: no output function !\n", NULL);
2529        return(-1);
2530    }
2531
2532    if (ret >= 0) output += ret;
2533
2534    /*
2535     * Attempt to handle error cases
2536     */
2537    switch (ret) {
2538        case 0:
2539#ifdef DEBUG_ENCODING
2540	    xmlGenericError(xmlGenericErrorContext,
2541		    "converted %d bytes to %d bytes of output\n",
2542	            c_in, c_out);
2543#endif
2544	    break;
2545        case -1:
2546#ifdef DEBUG_ENCODING
2547	    xmlGenericError(xmlGenericErrorContext,
2548		    "output conversion failed by lack of space\n");
2549#endif
2550	    break;
2551        case -3:
2552#ifdef DEBUG_ENCODING
2553	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2554	            c_in, c_out, (int) xmlBufUse(in));
2555#endif
2556	    break;
2557        case -2: {
2558	    int len = (int) xmlBufUse(in);
2559            xmlChar *content = xmlBufContent(in);
2560	    int cur;
2561
2562	    cur = xmlGetUTF8Char(content, &len);
2563	    if ((charref_len != 0) && (c_out < charref_len)) {
2564		/*
2565		 * We attempted to insert a character reference and failed.
2566		 * Undo what was written and skip the remaining charref.
2567		 */
2568                xmlBufErase(out, c_out);
2569		writtentot -= c_out;
2570		xmlBufShrink(in, charref_len - c_out);
2571		charref_len = 0;
2572
2573		ret = -1;
2574                break;
2575	    } else if (cur > 0) {
2576		xmlChar charref[20];
2577
2578#ifdef DEBUG_ENCODING
2579		xmlGenericError(xmlGenericErrorContext,
2580			"handling output conversion error\n");
2581		xmlGenericError(xmlGenericErrorContext,
2582			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2583			content[0], content[1],
2584			content[2], content[3]);
2585#endif
2586		/*
2587		 * Removes the UTF8 sequence, and replace it by a charref
2588		 * and continue the transcoding phase, hoping the error
2589		 * did not mangle the encoder state.
2590		 */
2591		charref_len = snprintf((char *) &charref[0], sizeof(charref),
2592				 "&#%d;", cur);
2593		xmlBufShrink(in, len);
2594		xmlBufAddHead(in, charref, -1);
2595
2596		goto retry;
2597	    } else {
2598		char buf[50];
2599
2600		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2601			 content[0], content[1],
2602			 content[2], content[3]);
2603		buf[49] = 0;
2604		xmlEncodingErr(XML_I18N_CONV_FAILED,
2605		    "output conversion failed due to conv error, bytes %s\n",
2606			       buf);
2607		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2608		    content[0] = ' ';
2609	    }
2610	    break;
2611	}
2612    }
2613    return(ret);
2614}
2615
2616/**
2617 * xmlCharEncOutFunc:
2618 * @handler:	char enconding transformation data structure
2619 * @out:  an xmlBuffer for the output.
2620 * @in:  an xmlBuffer for the input
2621 *
2622 * Generic front-end for the encoding handler output function
2623 * a first call with @in == NULL has to be made firs to initiate the
2624 * output in case of non-stateless encoding needing to initiate their
2625 * state or the output (like the BOM in UTF16).
2626 * In case of UTF8 sequence conversion errors for the given encoder,
2627 * the content will be automatically remapped to a CharRef sequence.
2628 *
2629 * Returns the number of byte written if success, or
2630 *     -1 general error
2631 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2632 *        the result of transformation can't fit into the encoding we want), or
2633 */
2634int
2635xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2636                  xmlBufferPtr in) {
2637    int ret = -2;
2638    int written;
2639    int writtentot = 0;
2640    int toconv;
2641    int output = 0;
2642    int charref_len = 0;
2643
2644    if (handler == NULL) return(-1);
2645    if (out == NULL) return(-1);
2646
2647retry:
2648
2649    written = out->size - out->use;
2650
2651    if (written > 0)
2652	written--; /* Gennady: count '/0' */
2653
2654    /*
2655     * First specific handling of in = NULL, i.e. the initialization call
2656     */
2657    if (in == NULL) {
2658        toconv = 0;
2659	if (handler->output != NULL) {
2660	    ret = handler->output(&out->content[out->use], &written,
2661				  NULL, &toconv);
2662	    if (ret >= 0) { /* Gennady: check return value */
2663		out->use += written;
2664		out->content[out->use] = 0;
2665	    }
2666	}
2667#ifdef LIBXML_ICONV_ENABLED
2668	else if (handler->iconv_out != NULL) {
2669	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2670				  &written, NULL, &toconv);
2671	    out->use += written;
2672	    out->content[out->use] = 0;
2673	}
2674#endif /* LIBXML_ICONV_ENABLED */
2675#ifdef LIBXML_ICU_ENABLED
2676	else if (handler->uconv_out != NULL) {
2677	    ret = xmlUconvWrapper(handler->uconv_out, 0,
2678                              &out->content[out->use],
2679				              &written, NULL, &toconv);
2680	    out->use += written;
2681	    out->content[out->use] = 0;
2682	}
2683#endif /* LIBXML_ICU_ENABLED */
2684#ifdef DEBUG_ENCODING
2685	xmlGenericError(xmlGenericErrorContext,
2686		"initialized encoder\n");
2687#endif
2688        return(0);
2689    }
2690
2691    /*
2692     * Conversion itself.
2693     */
2694    toconv = in->use;
2695    if (toconv == 0)
2696	return(0);
2697    if (toconv * 4 >= written) {
2698        xmlBufferGrow(out, toconv * 4);
2699	written = out->size - out->use - 1;
2700    }
2701    if (handler->output != NULL) {
2702	ret = handler->output(&out->content[out->use], &written,
2703	                      in->content, &toconv);
2704	if (written > 0) {
2705	    xmlBufferShrink(in, toconv);
2706	    out->use += written;
2707	    writtentot += written;
2708	}
2709	out->content[out->use] = 0;
2710    }
2711#ifdef LIBXML_ICONV_ENABLED
2712    else if (handler->iconv_out != NULL) {
2713	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2714	                      &written, in->content, &toconv);
2715	xmlBufferShrink(in, toconv);
2716	out->use += written;
2717	writtentot += written;
2718	out->content[out->use] = 0;
2719	if (ret == -1) {
2720	    if (written > 0) {
2721		/*
2722		 * Can be a limitation of iconv
2723		 */
2724                charref_len = 0;
2725		goto retry;
2726	    }
2727	    ret = -3;
2728	}
2729    }
2730#endif /* LIBXML_ICONV_ENABLED */
2731#ifdef LIBXML_ICU_ENABLED
2732    else if (handler->uconv_out != NULL) {
2733	ret = xmlUconvWrapper(handler->uconv_out, 0,
2734                              &out->content[out->use],
2735	                      &written, in->content, &toconv);
2736	xmlBufferShrink(in, toconv);
2737	out->use += written;
2738	writtentot += written;
2739	out->content[out->use] = 0;
2740	if (ret == -1) {
2741	    if (written > 0) {
2742		/*
2743		 * Can be a limitation of iconv
2744		 */
2745                charref_len = 0;
2746		goto retry;
2747	    }
2748	    ret = -3;
2749	}
2750    }
2751#endif /* LIBXML_ICU_ENABLED */
2752    else {
2753	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2754		       "xmlCharEncOutFunc: no output function !\n", NULL);
2755	return(-1);
2756    }
2757
2758    if (ret >= 0) output += ret;
2759
2760    /*
2761     * Attempt to handle error cases
2762     */
2763    switch (ret) {
2764        case 0:
2765#ifdef DEBUG_ENCODING
2766	    xmlGenericError(xmlGenericErrorContext,
2767		    "converted %d bytes to %d bytes of output\n",
2768	            toconv, written);
2769#endif
2770	    break;
2771        case -1:
2772#ifdef DEBUG_ENCODING
2773	    xmlGenericError(xmlGenericErrorContext,
2774		    "output conversion failed by lack of space\n");
2775#endif
2776	    break;
2777        case -3:
2778#ifdef DEBUG_ENCODING
2779	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2780	            toconv, written, in->use);
2781#endif
2782	    break;
2783        case -2: {
2784	    int len = in->use;
2785	    const xmlChar *utf = (const xmlChar *) in->content;
2786	    int cur;
2787
2788	    cur = xmlGetUTF8Char(utf, &len);
2789	    if ((charref_len != 0) && (written < charref_len)) {
2790		/*
2791		 * We attempted to insert a character reference and failed.
2792		 * Undo what was written and skip the remaining charref.
2793		 */
2794		out->use -= written;
2795		writtentot -= written;
2796		xmlBufferShrink(in, charref_len - written);
2797		charref_len = 0;
2798
2799		ret = -1;
2800                break;
2801	    } else if (cur > 0) {
2802		xmlChar charref[20];
2803
2804#ifdef DEBUG_ENCODING
2805		xmlGenericError(xmlGenericErrorContext,
2806			"handling output conversion error\n");
2807		xmlGenericError(xmlGenericErrorContext,
2808			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2809			in->content[0], in->content[1],
2810			in->content[2], in->content[3]);
2811#endif
2812		/*
2813		 * Removes the UTF8 sequence, and replace it by a charref
2814		 * and continue the transcoding phase, hoping the error
2815		 * did not mangle the encoder state.
2816		 */
2817		charref_len = snprintf((char *) &charref[0], sizeof(charref),
2818				 "&#%d;", cur);
2819		xmlBufferShrink(in, len);
2820		xmlBufferAddHead(in, charref, -1);
2821
2822		goto retry;
2823	    } else {
2824		char buf[50];
2825
2826		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2827			 in->content[0], in->content[1],
2828			 in->content[2], in->content[3]);
2829		buf[49] = 0;
2830		xmlEncodingErr(XML_I18N_CONV_FAILED,
2831		    "output conversion failed due to conv error, bytes %s\n",
2832			       buf);
2833		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2834		    in->content[0] = ' ';
2835	    }
2836	    break;
2837	}
2838    }
2839    return(ret);
2840}
2841
2842/**
2843 * xmlCharEncCloseFunc:
2844 * @handler:	char enconding transformation data structure
2845 *
2846 * Generic front-end for encoding handler close function
2847 *
2848 * Returns 0 if success, or -1 in case of error
2849 */
2850int
2851xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2852    int ret = 0;
2853    int tofree = 0;
2854    if (handler == NULL) return(-1);
2855    if (handler->name == NULL) return(-1);
2856#ifdef LIBXML_ICONV_ENABLED
2857    /*
2858     * Iconv handlers can be used only once, free the whole block.
2859     * and the associated icon resources.
2860     */
2861    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2862        tofree = 1;
2863	if (handler->iconv_out != NULL) {
2864	    if (iconv_close(handler->iconv_out))
2865		ret = -1;
2866	    handler->iconv_out = NULL;
2867	}
2868	if (handler->iconv_in != NULL) {
2869	    if (iconv_close(handler->iconv_in))
2870		ret = -1;
2871	    handler->iconv_in = NULL;
2872	}
2873    }
2874#endif /* LIBXML_ICONV_ENABLED */
2875#ifdef LIBXML_ICU_ENABLED
2876    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2877        tofree = 1;
2878	if (handler->uconv_out != NULL) {
2879	    closeIcuConverter(handler->uconv_out);
2880	    handler->uconv_out = NULL;
2881	}
2882	if (handler->uconv_in != NULL) {
2883	    closeIcuConverter(handler->uconv_in);
2884	    handler->uconv_in = NULL;
2885	}
2886    }
2887#endif
2888    if (tofree) {
2889        /* free up only dynamic handlers iconv/uconv */
2890        if (handler->name != NULL)
2891            xmlFree(handler->name);
2892        handler->name = NULL;
2893        xmlFree(handler);
2894    }
2895#ifdef DEBUG_ENCODING
2896    if (ret)
2897        xmlGenericError(xmlGenericErrorContext,
2898		"failed to close the encoding handler\n");
2899    else
2900        xmlGenericError(xmlGenericErrorContext,
2901		"closed the encoding handler\n");
2902#endif
2903
2904    return(ret);
2905}
2906
2907/**
2908 * xmlByteConsumed:
2909 * @ctxt: an XML parser context
2910 *
2911 * This function provides the current index of the parser relative
2912 * to the start of the current entity. This function is computed in
2913 * bytes from the beginning starting at zero and finishing at the
2914 * size in byte of the file if parsing a file. The function is
2915 * of constant cost if the input is UTF-8 but can be costly if run
2916 * on non-UTF-8 input.
2917 *
2918 * Returns the index in bytes from the beginning of the entity or -1
2919 *         in case the index could not be computed.
2920 */
2921long
2922xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2923    xmlParserInputPtr in;
2924
2925    if (ctxt == NULL) return(-1);
2926    in = ctxt->input;
2927    if (in == NULL)  return(-1);
2928    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2929        unsigned int unused = 0;
2930	xmlCharEncodingHandler * handler = in->buf->encoder;
2931        /*
2932	 * Encoding conversion, compute the number of unused original
2933	 * bytes from the input not consumed and substract that from
2934	 * the raw consumed value, this is not a cheap operation
2935	 */
2936        if (in->end - in->cur > 0) {
2937	    unsigned char convbuf[32000];
2938	    const unsigned char *cur = (const unsigned char *)in->cur;
2939	    int toconv = in->end - in->cur, written = 32000;
2940
2941	    int ret;
2942
2943	    if (handler->output != NULL) {
2944	        do {
2945		    toconv = in->end - cur;
2946		    written = 32000;
2947		    ret = handler->output(&convbuf[0], &written,
2948				      cur, &toconv);
2949		    if (ret == -1) return(-1);
2950		    unused += written;
2951		    cur += toconv;
2952		} while (ret == -2);
2953#ifdef LIBXML_ICONV_ENABLED
2954	    } else if (handler->iconv_out != NULL) {
2955	        do {
2956		    toconv = in->end - cur;
2957		    written = 32000;
2958		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2959	                      &written, cur, &toconv);
2960		    if (ret < 0) {
2961		        if (written > 0)
2962			    ret = -2;
2963			else
2964			    return(-1);
2965		    }
2966		    unused += written;
2967		    cur += toconv;
2968		} while (ret == -2);
2969#endif
2970#ifdef LIBXML_ICU_ENABLED
2971	    } else if (handler->uconv_out != NULL) {
2972	        do {
2973		    toconv = in->end - cur;
2974		    written = 32000;
2975		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2976	                      &written, cur, &toconv);
2977		    if (ret < 0) {
2978		        if (written > 0)
2979			    ret = -2;
2980			else
2981			    return(-1);
2982		    }
2983		    unused += written;
2984		    cur += toconv;
2985		} while (ret == -2);
2986#endif
2987            } else {
2988	        /* could not find a converter */
2989	        return(-1);
2990	    }
2991	}
2992	if (in->buf->rawconsumed < unused)
2993	    return(-1);
2994	return(in->buf->rawconsumed - unused);
2995    }
2996    return(in->consumed + (in->cur - in->base));
2997}
2998
2999#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
3000#ifdef LIBXML_ISO8859X_ENABLED
3001
3002/**
3003 * UTF8ToISO8859x:
3004 * @out:  a pointer to an array of bytes to store the result
3005 * @outlen:  the length of @out
3006 * @in:  a pointer to an array of UTF-8 chars
3007 * @inlen:  the length of @in
3008 * @xlattable: the 2-level transcoding table
3009 *
3010 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3011 * block of chars out.
3012 *
3013 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
3014 * The value of @inlen after return is the number of octets consumed
3015 *     as the return value is positive, else unpredictable.
3016 * The value of @outlen after return is the number of ocetes consumed.
3017 */
3018static int
3019UTF8ToISO8859x(unsigned char* out, int *outlen,
3020              const unsigned char* in, int *inlen,
3021              unsigned char const *xlattable) {
3022    const unsigned char* outstart = out;
3023    const unsigned char* inend;
3024    const unsigned char* instart = in;
3025    const unsigned char* processed = in;
3026
3027    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3028        (xlattable == NULL))
3029	return(-1);
3030    if (in == NULL) {
3031        /*
3032        * initialization nothing to do
3033        */
3034        *outlen = 0;
3035        *inlen = 0;
3036        return(0);
3037    }
3038    inend = in + (*inlen);
3039    while (in < inend) {
3040        unsigned char d = *in++;
3041        if  (d < 0x80)  {
3042            *out++ = d;
3043        } else if (d < 0xC0) {
3044            /* trailing byte in leading position */
3045            *outlen = out - outstart;
3046            *inlen = processed - instart;
3047            return(-2);
3048        } else if (d < 0xE0) {
3049            unsigned char c;
3050            if (!(in < inend)) {
3051                /* trailing byte not in input buffer */
3052                *outlen = out - outstart;
3053                *inlen = processed - instart;
3054                return(-3);
3055            }
3056            c = *in++;
3057            if ((c & 0xC0) != 0x80) {
3058                /* not a trailing byte */
3059                *outlen = out - outstart;
3060                *inlen = processed - instart;
3061                return(-2);
3062            }
3063            c = c & 0x3F;
3064            d = d & 0x1F;
3065            d = xlattable [48 + c + xlattable [d] * 64];
3066            if (d == 0) {
3067                /* not in character set */
3068                *outlen = out - outstart;
3069                *inlen = processed - instart;
3070                return(-2);
3071            }
3072            *out++ = d;
3073        } else if (d < 0xF0) {
3074            unsigned char c1;
3075            unsigned char c2;
3076            if (!(in < inend - 1)) {
3077                /* trailing bytes not in input buffer */
3078                *outlen = out - outstart;
3079                *inlen = processed - instart;
3080                return(-3);
3081            }
3082            c1 = *in++;
3083            if ((c1 & 0xC0) != 0x80) {
3084                /* not a trailing byte (c1) */
3085                *outlen = out - outstart;
3086                *inlen = processed - instart;
3087                return(-2);
3088            }
3089            c2 = *in++;
3090            if ((c2 & 0xC0) != 0x80) {
3091                /* not a trailing byte (c2) */
3092                *outlen = out - outstart;
3093                *inlen = processed - instart;
3094                return(-2);
3095            }
3096            c1 = c1 & 0x3F;
3097            c2 = c2 & 0x3F;
3098	    d = d & 0x0F;
3099	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3100			xlattable [32 + d] * 64] * 64];
3101            if (d == 0) {
3102                /* not in character set */
3103                *outlen = out - outstart;
3104                *inlen = processed - instart;
3105                return(-2);
3106            }
3107            *out++ = d;
3108        } else {
3109            /* cannot transcode >= U+010000 */
3110            *outlen = out - outstart;
3111            *inlen = processed - instart;
3112            return(-2);
3113        }
3114        processed = in;
3115    }
3116    *outlen = out - outstart;
3117    *inlen = processed - instart;
3118    return(*outlen);
3119}
3120
3121/**
3122 * ISO8859xToUTF8
3123 * @out:  a pointer to an array of bytes to store the result
3124 * @outlen:  the length of @out
3125 * @in:  a pointer to an array of ISO Latin 1 chars
3126 * @inlen:  the length of @in
3127 *
3128 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3129 * block of chars out.
3130 * Returns 0 if success, or -1 otherwise
3131 * The value of @inlen after return is the number of octets consumed
3132 * The value of @outlen after return is the number of ocetes produced.
3133 */
3134static int
3135ISO8859xToUTF8(unsigned char* out, int *outlen,
3136              const unsigned char* in, int *inlen,
3137              unsigned short const *unicodetable) {
3138    unsigned char* outstart = out;
3139    unsigned char* outend;
3140    const unsigned char* instart = in;
3141    const unsigned char* inend;
3142    const unsigned char* instop;
3143    unsigned int c;
3144
3145    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3146        (in == NULL) || (unicodetable == NULL))
3147	return(-1);
3148    outend = out + *outlen;
3149    inend = in + *inlen;
3150    instop = inend;
3151
3152    while ((in < inend) && (out < outend - 2)) {
3153        if (*in >= 0x80) {
3154            c = unicodetable [*in - 0x80];
3155            if (c == 0) {
3156                /* undefined code point */
3157                *outlen = out - outstart;
3158                *inlen = in - instart;
3159                return (-1);
3160            }
3161            if (c < 0x800) {
3162                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3163                *out++ = (c & 0x3F) | 0x80;
3164            } else {
3165                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3166                *out++ = ((c >>  6) & 0x3F) | 0x80;
3167                *out++ = (c & 0x3F) | 0x80;
3168            }
3169            ++in;
3170        }
3171        if (instop - in > outend - out) instop = in + (outend - out);
3172        while ((*in < 0x80) && (in < instop)) {
3173            *out++ = *in++;
3174        }
3175    }
3176    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3177        *out++ =  *in++;
3178    }
3179    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3180        *out++ =  *in++;
3181    }
3182    *outlen = out - outstart;
3183    *inlen = in - instart;
3184    return (*outlen);
3185}
3186
3187
3188/************************************************************************
3189 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3190 ************************************************************************/
3191
3192static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3193    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3194    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3195    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3196    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3197    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3198    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3199    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3200    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3201    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3202    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3203    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3204    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3205    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3206    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3207    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3208    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3209};
3210
3211static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3212    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3213    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3220    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3221    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3222    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3223    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3224    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3225    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3227    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3228    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3229    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3232    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3233    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3234    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3235    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3236    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3237    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3238    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3239};
3240
3241static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3242    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3243    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3244    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3245    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3246    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3247    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3248    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3249    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3250    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3251    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3252    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3253    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3254    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3255    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3256    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3257    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3258};
3259
3260static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3261    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3262    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3269    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3270    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3271    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3272    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3273    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3274    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3275    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3278    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3286    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3287    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3288    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3289    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3290    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3291    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3292};
3293
3294static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3295    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3296    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3297    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3298    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3299    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3300    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3301    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3302    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3303    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3304    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3305    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3306    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3307    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3308    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3309    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3310    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3311};
3312
3313static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3314    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3315    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3322    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3323    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3324    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3325    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3326    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3327    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3328    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3329    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3330    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3331    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3332    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3333    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3334    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3335    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3338    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3339    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3340    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3341};
3342
3343static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3344    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3345    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3346    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3347    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3348    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3349    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3350    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3351    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3352    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3353    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3354    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3355    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3356    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3357    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3358    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3359    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3360};
3361
3362static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3363    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3371    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3372    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3373    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3375    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3376    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3377    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3378    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3379    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3380    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390};
3391
3392static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3393    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3394    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3395    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3396    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3397    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3398    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3399    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3400    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3401    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3402    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3403    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3404    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3405    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3406    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3407    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3408    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409};
3410
3411static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3412    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3414    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3420    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3421    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3422    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3428    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3429    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3431    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3432    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435};
3436
3437static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3438    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3439    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3440    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3441    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3442    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3443    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3444    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3445    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3446    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3447    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3448    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3449    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3450    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3451    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3452    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3453    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3454};
3455
3456static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3457    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3458    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3465    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3466    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3467    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3468    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3474    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3481    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3482    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3483    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3484    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3485    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488};
3489
3490static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3491    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3492    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3493    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3494    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3495    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3496    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3497    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3498    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3499    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3500    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3501    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3502    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3503    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3504    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3505    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3506    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3507};
3508
3509static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3510    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3512    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3518    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3519    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3520    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3521    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3523    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3524    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3527    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3529    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3534    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3535    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3539    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3540    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541};
3542
3543static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3544    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3545    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3546    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3547    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3548    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3549    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3550    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3551    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3552    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3553    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3554    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3555    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3556    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3557    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3558    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3559    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3560};
3561
3562static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3563    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3571    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3572    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3573    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3574    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3575    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3576    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3577    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3578    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3580    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3584    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586};
3587
3588static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3589    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3590    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3591    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3592    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3593    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3594    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3595    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3596    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3597    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3598    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3599    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3600    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3601    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3602    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3603    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3604    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3605};
3606
3607static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3608    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3616    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3617    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3618    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3619    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3620    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3621    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3622    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3623    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3624    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3626    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3627    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3636    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3637    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3638    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3639};
3640
3641static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3642    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3643    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3644    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3645    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3646    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3647    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3648    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3649    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3650    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3651    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3652    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3653    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3654    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3655    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3656    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3657    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3658};
3659
3660static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3661    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3667    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3669    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3670    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3676    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3677    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3678    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3679    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3680    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3685    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3686    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688};
3689
3690static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3691    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3692    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3693    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3694    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3695    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3696    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3697    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3698    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3699    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3700    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3701    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3702    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3703    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3704    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3705    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3706    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3707};
3708
3709static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3710    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3718    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3719    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3720    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3721    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3727    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3730    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3731    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3732    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3733    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3734    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3735    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3736    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3737    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3738    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3739    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3740    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3741};
3742
3743static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3744    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3745    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3746    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3747    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3748    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3749    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3750    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3751    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3752    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3753    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3754    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3755    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3756    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3757    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3758    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3759    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3760};
3761
3762static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3763    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3764    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3765    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3771    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3772    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3773    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3778    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3779    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3780    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3783    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3794    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3798    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3800    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3801    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3803    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3804    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3805    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3806};
3807
3808static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3809    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3810    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3811    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3812    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3813    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3814    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3815    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3816    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3817    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3818    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3819    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3820    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3821    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3822    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3823    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3824    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3825};
3826
3827static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3828    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3836    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3837    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3838    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3839    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3842    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3844    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3846    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3850    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3851    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3852    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3853    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3854    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3855};
3856
3857static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3858    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3859    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3860    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3861    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3862    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3863    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3864    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3865    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3866    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3867    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3868    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3869    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3870    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3871    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3872    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3873    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3874};
3875
3876static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3877    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3878    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3879    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3880    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3881    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3882    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3883    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3884    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3885    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3886    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3887    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3888    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3889    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3890    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3891    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3892    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3893    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3894    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3895    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3896    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3897    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3898    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3899    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3900    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3901    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3902    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3903    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3904    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3905    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3906    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3907    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3908    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3909    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3910    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3911    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3912    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3913    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3914    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3915    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3916};
3917
3918
3919/*
3920 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3921 */
3922
3923static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3924    const unsigned char* in, int *inlen) {
3925    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3926}
3927static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3928    const unsigned char* in, int *inlen) {
3929    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3930}
3931
3932static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3933    const unsigned char* in, int *inlen) {
3934    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3935}
3936static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3937    const unsigned char* in, int *inlen) {
3938    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3939}
3940
3941static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3942    const unsigned char* in, int *inlen) {
3943    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3944}
3945static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3946    const unsigned char* in, int *inlen) {
3947    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3948}
3949
3950static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3951    const unsigned char* in, int *inlen) {
3952    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3953}
3954static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3955    const unsigned char* in, int *inlen) {
3956    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3957}
3958
3959static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3960    const unsigned char* in, int *inlen) {
3961    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3962}
3963static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3964    const unsigned char* in, int *inlen) {
3965    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3966}
3967
3968static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3969    const unsigned char* in, int *inlen) {
3970    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3971}
3972static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3973    const unsigned char* in, int *inlen) {
3974    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3975}
3976
3977static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3978    const unsigned char* in, int *inlen) {
3979    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3980}
3981static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3982    const unsigned char* in, int *inlen) {
3983    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3984}
3985
3986static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3987    const unsigned char* in, int *inlen) {
3988    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3989}
3990static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3991    const unsigned char* in, int *inlen) {
3992    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3993}
3994
3995static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3996    const unsigned char* in, int *inlen) {
3997    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3998}
3999static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
4000    const unsigned char* in, int *inlen) {
4001    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
4002}
4003
4004static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
4005    const unsigned char* in, int *inlen) {
4006    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
4007}
4008static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
4009    const unsigned char* in, int *inlen) {
4010    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
4011}
4012
4013static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
4014    const unsigned char* in, int *inlen) {
4015    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
4016}
4017static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
4018    const unsigned char* in, int *inlen) {
4019    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
4020}
4021
4022static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
4023    const unsigned char* in, int *inlen) {
4024    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
4025}
4026static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
4027    const unsigned char* in, int *inlen) {
4028    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
4029}
4030
4031static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
4032    const unsigned char* in, int *inlen) {
4033    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
4034}
4035static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
4036    const unsigned char* in, int *inlen) {
4037    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
4038}
4039
4040static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
4041    const unsigned char* in, int *inlen) {
4042    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
4043}
4044static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
4045    const unsigned char* in, int *inlen) {
4046    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
4047}
4048
4049static void
4050xmlRegisterCharEncodingHandlersISO8859x (void) {
4051    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
4052    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
4053    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
4054    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
4055    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
4056    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
4057    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
4058    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
4059    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
4060    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
4061    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
4062    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
4063    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
4064    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
4065}
4066
4067#endif
4068#endif
4069
4070#define bottom_encoding
4071#include "elfgcchack.h"
4072