1/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1]   ISO Latin-1 characters codes.
9 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10 *                Worldwide Character Encoding -- Version 1.0", Addison-
11 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12 *                described in Unicode Technical Report #4.
13 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14 *                Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23#define IN_LIBXML
24#include "libxml.h"
25
26#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
34#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
44#include <libxml/globals.h>
45#include <libxml/xmlerror.h>
46
47static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
49
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53    const char *name;
54    const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
62#if 0
63#define DEBUG_ENCODING  /* Define this to get encoding traces */
64#endif
65#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
69#endif
70
71static int xmlLittleEndian = 1;
72
73/**
74 * xmlEncodingErrMemory:
75 * @extra:  extra informations
76 *
77 * Handle an out of memory condition
78 */
79static void
80xmlEncodingErrMemory(const char *extra)
81{
82    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83}
84
85/**
86 * xmlErrEncoding:
87 * @error:  the error number
88 * @msg:  the error message
89 *
90 * n encoding error
91 */
92static void
93xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94{
95    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96                    XML_FROM_I18N, error, XML_ERR_FATAL,
97                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98}
99
100#ifdef LIBXML_ICU_ENABLED
101static uconv_t*
102openIcuConverter(const char* name, int toUnicode)
103{
104  UErrorCode status = U_ZERO_ERROR;
105  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
106  if (conv == NULL)
107    return NULL;
108
109  conv->uconv = ucnv_open(name, &status);
110  if (U_FAILURE(status))
111    goto error;
112
113  status = U_ZERO_ERROR;
114  if (toUnicode) {
115    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
116                        NULL, NULL, NULL, &status);
117  }
118  else {
119    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
120                        NULL, NULL, NULL, &status);
121  }
122  if (U_FAILURE(status))
123    goto error;
124
125  status = U_ZERO_ERROR;
126  conv->utf8 = ucnv_open("UTF-8", &status);
127  if (U_SUCCESS(status))
128    return conv;
129
130error:
131  if (conv->uconv)
132    ucnv_close(conv->uconv);
133  xmlFree(conv);
134  return NULL;
135}
136
137static void
138closeIcuConverter(uconv_t *conv)
139{
140  if (conv != NULL) {
141    ucnv_close(conv->uconv);
142    ucnv_close(conv->utf8);
143    xmlFree(conv);
144  }
145}
146#endif /* LIBXML_ICU_ENABLED */
147
148/************************************************************************
149 *									*
150 *		Conversions To/From UTF8 encoding			*
151 *									*
152 ************************************************************************/
153
154/**
155 * asciiToUTF8:
156 * @out:  a pointer to an array of bytes to store the result
157 * @outlen:  the length of @out
158 * @in:  a pointer to an array of ASCII chars
159 * @inlen:  the length of @in
160 *
161 * Take a block of ASCII chars in and try to convert it to an UTF-8
162 * block of chars out.
163 * Returns 0 if success, or -1 otherwise
164 * The value of @inlen after return is the number of octets consumed
165 *     if the return value is positive, else unpredictable.
166 * The value of @outlen after return is the number of octets consumed.
167 */
168static int
169asciiToUTF8(unsigned char* out, int *outlen,
170              const unsigned char* in, int *inlen) {
171    unsigned char* outstart = out;
172    const unsigned char* base = in;
173    const unsigned char* processed = in;
174    unsigned char* outend = out + *outlen;
175    const unsigned char* inend;
176    unsigned int c;
177
178    inend = in + (*inlen);
179    while ((in < inend) && (out - outstart + 5 < *outlen)) {
180	c= *in++;
181
182        if (out >= outend)
183	    break;
184        if (c < 0x80) {
185	    *out++ = c;
186	} else {
187	    *outlen = out - outstart;
188	    *inlen = processed - base;
189	    return(-1);
190	}
191
192	processed = (const unsigned char*) in;
193    }
194    *outlen = out - outstart;
195    *inlen = processed - base;
196    return(*outlen);
197}
198
199#ifdef LIBXML_OUTPUT_ENABLED
200/**
201 * UTF8Toascii:
202 * @out:  a pointer to an array of bytes to store the result
203 * @outlen:  the length of @out
204 * @in:  a pointer to an array of UTF-8 chars
205 * @inlen:  the length of @in
206 *
207 * Take a block of UTF-8 chars in and try to convert it to an ASCII
208 * block of chars out.
209 *
210 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
211 * The value of @inlen after return is the number of octets consumed
212 *     if the return value is positive, else unpredictable.
213 * The value of @outlen after return is the number of octets consumed.
214 */
215static int
216UTF8Toascii(unsigned char* out, int *outlen,
217              const unsigned char* in, int *inlen) {
218    const unsigned char* processed = in;
219    const unsigned char* outend;
220    const unsigned char* outstart = out;
221    const unsigned char* instart = in;
222    const unsigned char* inend;
223    unsigned int c, d;
224    int trailing;
225
226    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
227    if (in == NULL) {
228        /*
229	 * initialization nothing to do
230	 */
231	*outlen = 0;
232	*inlen = 0;
233	return(0);
234    }
235    inend = in + (*inlen);
236    outend = out + (*outlen);
237    while (in < inend) {
238	d = *in++;
239	if      (d < 0x80)  { c= d; trailing= 0; }
240	else if (d < 0xC0) {
241	    /* trailing byte in leading position */
242	    *outlen = out - outstart;
243	    *inlen = processed - instart;
244	    return(-2);
245        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
246        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
247        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
248	else {
249	    /* no chance for this in Ascii */
250	    *outlen = out - outstart;
251	    *inlen = processed - instart;
252	    return(-2);
253	}
254
255	if (inend - in < trailing) {
256	    break;
257	}
258
259	for ( ; trailing; trailing--) {
260	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
261		break;
262	    c <<= 6;
263	    c |= d & 0x3F;
264	}
265
266	/* assertion: c is a single UTF-4 value */
267	if (c < 0x80) {
268	    if (out >= outend)
269		break;
270	    *out++ = c;
271	} else {
272	    /* no chance for this in Ascii */
273	    *outlen = out - outstart;
274	    *inlen = processed - instart;
275	    return(-2);
276	}
277	processed = in;
278    }
279    *outlen = out - outstart;
280    *inlen = processed - instart;
281    return(*outlen);
282}
283#endif /* LIBXML_OUTPUT_ENABLED */
284
285/**
286 * isolat1ToUTF8:
287 * @out:  a pointer to an array of bytes to store the result
288 * @outlen:  the length of @out
289 * @in:  a pointer to an array of ISO Latin 1 chars
290 * @inlen:  the length of @in
291 *
292 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
293 * block of chars out.
294 * Returns the number of bytes written if success, or -1 otherwise
295 * The value of @inlen after return is the number of octets consumed
296 *     if the return value is positive, else unpredictable.
297 * The value of @outlen after return is the number of octets consumed.
298 */
299int
300isolat1ToUTF8(unsigned char* out, int *outlen,
301              const unsigned char* in, int *inlen) {
302    unsigned char* outstart = out;
303    const unsigned char* base = in;
304    unsigned char* outend;
305    const unsigned char* inend;
306    const unsigned char* instop;
307
308    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
309	return(-1);
310
311    outend = out + *outlen;
312    inend = in + (*inlen);
313    instop = inend;
314
315    while (in < inend && out < outend - 1) {
316    	if (*in >= 0x80) {
317	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
318        *out++ = ((*in) & 0x3F) | 0x80;
319	    ++in;
320	}
321	if (instop - in > outend - out) instop = in + (outend - out);
322	while (in < instop && *in < 0x80) {
323	    *out++ = *in++;
324	}
325    }
326    if (in < inend && out < outend && *in < 0x80) {
327        *out++ = *in++;
328    }
329    *outlen = out - outstart;
330    *inlen = in - base;
331    return(*outlen);
332}
333
334/**
335 * UTF8ToUTF8:
336 * @out:  a pointer to an array of bytes to store the result
337 * @outlen:  the length of @out
338 * @inb:  a pointer to an array of UTF-8 chars
339 * @inlenb:  the length of @in in UTF-8 chars
340 *
341 * No op copy operation for UTF8 handling.
342 *
343 * Returns the number of bytes written, or -1 if lack of space.
344 *     The value of *inlen after return is the number of octets consumed
345 *     if the return value is positive, else unpredictable.
346 */
347static int
348UTF8ToUTF8(unsigned char* out, int *outlen,
349           const unsigned char* inb, int *inlenb)
350{
351    int len;
352
353    if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
354	return(-1);
355    if (*outlen > *inlenb) {
356	len = *inlenb;
357    } else {
358	len = *outlen;
359    }
360    if (len < 0)
361	return(-1);
362
363    memcpy(out, inb, len);
364
365    *outlen = len;
366    *inlenb = len;
367    return(*outlen);
368}
369
370
371#ifdef LIBXML_OUTPUT_ENABLED
372/**
373 * UTF8Toisolat1:
374 * @out:  a pointer to an array of bytes to store the result
375 * @outlen:  the length of @out
376 * @in:  a pointer to an array of UTF-8 chars
377 * @inlen:  the length of @in
378 *
379 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
380 * block of chars out.
381 *
382 * Returns the number of bytes written if success, -2 if the transcoding fails,
383           or -1 otherwise
384 * The value of @inlen after return is the number of octets consumed
385 *     if the return value is positive, else unpredictable.
386 * The value of @outlen after return is the number of octets consumed.
387 */
388int
389UTF8Toisolat1(unsigned char* out, int *outlen,
390              const unsigned char* in, int *inlen) {
391    const unsigned char* processed = in;
392    const unsigned char* outend;
393    const unsigned char* outstart = out;
394    const unsigned char* instart = in;
395    const unsigned char* inend;
396    unsigned int c, d;
397    int trailing;
398
399    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
400    if (in == NULL) {
401        /*
402	 * initialization nothing to do
403	 */
404	*outlen = 0;
405	*inlen = 0;
406	return(0);
407    }
408    inend = in + (*inlen);
409    outend = out + (*outlen);
410    while (in < inend) {
411	d = *in++;
412	if      (d < 0x80)  { c= d; trailing= 0; }
413	else if (d < 0xC0) {
414	    /* trailing byte in leading position */
415	    *outlen = out - outstart;
416	    *inlen = processed - instart;
417	    return(-2);
418        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
419        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
420        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
421	else {
422	    /* no chance for this in IsoLat1 */
423	    *outlen = out - outstart;
424	    *inlen = processed - instart;
425	    return(-2);
426	}
427
428	if (inend - in < trailing) {
429	    break;
430	}
431
432	for ( ; trailing; trailing--) {
433	    if (in >= inend)
434		break;
435	    if (((d= *in++) & 0xC0) != 0x80) {
436		*outlen = out - outstart;
437		*inlen = processed - instart;
438		return(-2);
439	    }
440	    c <<= 6;
441	    c |= d & 0x3F;
442	}
443
444	/* assertion: c is a single UTF-4 value */
445	if (c <= 0xFF) {
446	    if (out >= outend)
447		break;
448	    *out++ = c;
449	} else {
450	    /* no chance for this in IsoLat1 */
451	    *outlen = out - outstart;
452	    *inlen = processed - instart;
453	    return(-2);
454	}
455	processed = in;
456    }
457    *outlen = out - outstart;
458    *inlen = processed - instart;
459    return(*outlen);
460}
461#endif /* LIBXML_OUTPUT_ENABLED */
462
463/**
464 * UTF16LEToUTF8:
465 * @out:  a pointer to an array of bytes to store the result
466 * @outlen:  the length of @out
467 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
468 * @inlenb:  the length of @in in UTF-16LE chars
469 *
470 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
471 * block of chars out. This function assumes the endian property
472 * is the same between the native type of this machine and the
473 * inputed one.
474 *
475 * Returns the number of bytes written, or -1 if lack of space, or -2
476 *     if the transcoding fails (if *in is not a valid utf16 string)
477 *     The value of *inlen after return is the number of octets consumed
478 *     if the return value is positive, else unpredictable.
479 */
480static int
481UTF16LEToUTF8(unsigned char* out, int *outlen,
482            const unsigned char* inb, int *inlenb)
483{
484    unsigned char* outstart = out;
485    const unsigned char* processed = inb;
486    unsigned char* outend = out + *outlen;
487    unsigned short* in = (unsigned short*) inb;
488    unsigned short* inend;
489    unsigned int c, d, inlen;
490    unsigned char *tmp;
491    int bits;
492
493    if ((*inlenb % 2) == 1)
494        (*inlenb)--;
495    inlen = *inlenb / 2;
496    inend = in + inlen;
497    while ((in < inend) && (out - outstart + 5 < *outlen)) {
498        if (xmlLittleEndian) {
499	    c= *in++;
500	} else {
501	    tmp = (unsigned char *) in;
502	    c = *tmp++;
503	    c = c | (((unsigned int)*tmp) << 8);
504	    in++;
505	}
506        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
507	    if (in >= inend) {           /* (in > inend) shouldn't happens */
508		break;
509	    }
510	    if (xmlLittleEndian) {
511		d = *in++;
512	    } else {
513		tmp = (unsigned char *) in;
514		d = *tmp++;
515		d = d | (((unsigned int)*tmp) << 8);
516		in++;
517	    }
518            if ((d & 0xFC00) == 0xDC00) {
519                c &= 0x03FF;
520                c <<= 10;
521                c |= d & 0x03FF;
522                c += 0x10000;
523            }
524            else {
525		*outlen = out - outstart;
526		*inlenb = processed - inb;
527	        return(-2);
528	    }
529        }
530
531	/* assertion: c is a single UTF-4 value */
532        if (out >= outend)
533	    break;
534        if      (c <    0x80) {  *out++=  c;                bits= -6; }
535        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
536        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
537        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
538
539        for ( ; bits >= 0; bits-= 6) {
540            if (out >= outend)
541	        break;
542            *out++= ((c >> bits) & 0x3F) | 0x80;
543        }
544	processed = (const unsigned char*) in;
545    }
546    *outlen = out - outstart;
547    *inlenb = processed - inb;
548    return(*outlen);
549}
550
551#ifdef LIBXML_OUTPUT_ENABLED
552/**
553 * UTF8ToUTF16LE:
554 * @outb:  a pointer to an array of bytes to store the result
555 * @outlen:  the length of @outb
556 * @in:  a pointer to an array of UTF-8 chars
557 * @inlen:  the length of @in
558 *
559 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
560 * block of chars out.
561 *
562 * Returns the number of bytes written, or -1 if lack of space, or -2
563 *     if the transcoding failed.
564 */
565static int
566UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567            const unsigned char* in, int *inlen)
568{
569    unsigned short* out = (unsigned short*) outb;
570    const unsigned char* processed = in;
571    const unsigned char *const instart = in;
572    unsigned short* outstart= out;
573    unsigned short* outend;
574    const unsigned char* inend;
575    unsigned int c, d;
576    int trailing;
577    unsigned char *tmp;
578    unsigned short tmp1, tmp2;
579
580    /* UTF16LE encoding has no BOM */
581    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
582    if (in == NULL) {
583	*outlen = 0;
584	*inlen = 0;
585	return(0);
586    }
587    inend= in + *inlen;
588    outend = out + (*outlen / 2);
589    while (in < inend) {
590      d= *in++;
591      if      (d < 0x80)  { c= d; trailing= 0; }
592      else if (d < 0xC0) {
593          /* trailing byte in leading position */
594	  *outlen = (out - outstart) * 2;
595	  *inlen = processed - instart;
596	  return(-2);
597      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
598      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
599      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
600      else {
601	/* no chance for this in UTF-16 */
602	*outlen = (out - outstart) * 2;
603	*inlen = processed - instart;
604	return(-2);
605      }
606
607      if (inend - in < trailing) {
608          break;
609      }
610
611      for ( ; trailing; trailing--) {
612          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
613	      break;
614          c <<= 6;
615          c |= d & 0x3F;
616      }
617
618      /* assertion: c is a single UTF-4 value */
619        if (c < 0x10000) {
620            if (out >= outend)
621	        break;
622	    if (xmlLittleEndian) {
623		*out++ = c;
624	    } else {
625		tmp = (unsigned char *) out;
626		*tmp = c ;
627		*(tmp + 1) = c >> 8 ;
628		out++;
629	    }
630        }
631        else if (c < 0x110000) {
632            if (out+1 >= outend)
633	        break;
634            c -= 0x10000;
635	    if (xmlLittleEndian) {
636		*out++ = 0xD800 | (c >> 10);
637		*out++ = 0xDC00 | (c & 0x03FF);
638	    } else {
639		tmp1 = 0xD800 | (c >> 10);
640		tmp = (unsigned char *) out;
641		*tmp = (unsigned char) tmp1;
642		*(tmp + 1) = tmp1 >> 8;
643		out++;
644
645		tmp2 = 0xDC00 | (c & 0x03FF);
646		tmp = (unsigned char *) out;
647		*tmp  = (unsigned char) tmp2;
648		*(tmp + 1) = tmp2 >> 8;
649		out++;
650	    }
651        }
652        else
653	    break;
654	processed = in;
655    }
656    *outlen = (out - outstart) * 2;
657    *inlen = processed - instart;
658    return(*outlen);
659}
660
661/**
662 * UTF8ToUTF16:
663 * @outb:  a pointer to an array of bytes to store the result
664 * @outlen:  the length of @outb
665 * @in:  a pointer to an array of UTF-8 chars
666 * @inlen:  the length of @in
667 *
668 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
669 * block of chars out.
670 *
671 * Returns the number of bytes written, or -1 if lack of space, or -2
672 *     if the transcoding failed.
673 */
674static int
675UTF8ToUTF16(unsigned char* outb, int *outlen,
676            const unsigned char* in, int *inlen)
677{
678    if (in == NULL) {
679	/*
680	 * initialization, add the Byte Order Mark for UTF-16LE
681	 */
682        if (*outlen >= 2) {
683	    outb[0] = 0xFF;
684	    outb[1] = 0xFE;
685	    *outlen = 2;
686	    *inlen = 0;
687#ifdef DEBUG_ENCODING
688            xmlGenericError(xmlGenericErrorContext,
689		    "Added FFFE Byte Order Mark\n");
690#endif
691	    return(2);
692	}
693	*outlen = 0;
694	*inlen = 0;
695	return(0);
696    }
697    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
698}
699#endif /* LIBXML_OUTPUT_ENABLED */
700
701/**
702 * UTF16BEToUTF8:
703 * @out:  a pointer to an array of bytes to store the result
704 * @outlen:  the length of @out
705 * @inb:  a pointer to an array of UTF-16 passed as a byte array
706 * @inlenb:  the length of @in in UTF-16 chars
707 *
708 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
709 * block of chars out. This function assumes the endian property
710 * is the same between the native type of this machine and the
711 * inputed one.
712 *
713 * Returns the number of bytes written, or -1 if lack of space, or -2
714 *     if the transcoding fails (if *in is not a valid utf16 string)
715 * The value of *inlen after return is the number of octets consumed
716 *     if the return value is positive, else unpredictable.
717 */
718static int
719UTF16BEToUTF8(unsigned char* out, int *outlen,
720            const unsigned char* inb, int *inlenb)
721{
722    unsigned char* outstart = out;
723    const unsigned char* processed = inb;
724    unsigned char* outend = out + *outlen;
725    unsigned short* in = (unsigned short*) inb;
726    unsigned short* inend;
727    unsigned int c, d, inlen;
728    unsigned char *tmp;
729    int bits;
730
731    if ((*inlenb % 2) == 1)
732        (*inlenb)--;
733    inlen = *inlenb / 2;
734    inend= in + inlen;
735    while (in < inend) {
736	if (xmlLittleEndian) {
737	    tmp = (unsigned char *) in;
738	    c = *tmp++;
739	    c = c << 8;
740	    c = c | (unsigned int) *tmp;
741	    in++;
742	} else {
743	    c= *in++;
744	}
745        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
746	    if (in >= inend) {           /* (in > inend) shouldn't happens */
747		*outlen = out - outstart;
748		*inlenb = processed - inb;
749	        return(-2);
750	    }
751	    if (xmlLittleEndian) {
752		tmp = (unsigned char *) in;
753		d = *tmp++;
754		d = d << 8;
755		d = d | (unsigned int) *tmp;
756		in++;
757	    } else {
758		d= *in++;
759	    }
760            if ((d & 0xFC00) == 0xDC00) {
761                c &= 0x03FF;
762                c <<= 10;
763                c |= d & 0x03FF;
764                c += 0x10000;
765            }
766            else {
767		*outlen = out - outstart;
768		*inlenb = processed - inb;
769	        return(-2);
770	    }
771        }
772
773	/* assertion: c is a single UTF-4 value */
774        if (out >= outend)
775	    break;
776        if      (c <    0x80) {  *out++=  c;                bits= -6; }
777        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
778        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
779        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
780
781        for ( ; bits >= 0; bits-= 6) {
782            if (out >= outend)
783	        break;
784            *out++= ((c >> bits) & 0x3F) | 0x80;
785        }
786	processed = (const unsigned char*) in;
787    }
788    *outlen = out - outstart;
789    *inlenb = processed - inb;
790    return(*outlen);
791}
792
793#ifdef LIBXML_OUTPUT_ENABLED
794/**
795 * UTF8ToUTF16BE:
796 * @outb:  a pointer to an array of bytes to store the result
797 * @outlen:  the length of @outb
798 * @in:  a pointer to an array of UTF-8 chars
799 * @inlen:  the length of @in
800 *
801 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
802 * block of chars out.
803 *
804 * Returns the number of byte written, or -1 by lack of space, or -2
805 *     if the transcoding failed.
806 */
807static int
808UTF8ToUTF16BE(unsigned char* outb, int *outlen,
809            const unsigned char* in, int *inlen)
810{
811    unsigned short* out = (unsigned short*) outb;
812    const unsigned char* processed = in;
813    const unsigned char *const instart = in;
814    unsigned short* outstart= out;
815    unsigned short* outend;
816    const unsigned char* inend;
817    unsigned int c, d;
818    int trailing;
819    unsigned char *tmp;
820    unsigned short tmp1, tmp2;
821
822    /* UTF-16BE has no BOM */
823    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
824    if (in == NULL) {
825	*outlen = 0;
826	*inlen = 0;
827	return(0);
828    }
829    inend= in + *inlen;
830    outend = out + (*outlen / 2);
831    while (in < inend) {
832      d= *in++;
833      if      (d < 0x80)  { c= d; trailing= 0; }
834      else if (d < 0xC0)  {
835          /* trailing byte in leading position */
836	  *outlen = out - outstart;
837	  *inlen = processed - instart;
838	  return(-2);
839      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
840      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
841      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
842      else {
843          /* no chance for this in UTF-16 */
844	  *outlen = out - outstart;
845	  *inlen = processed - instart;
846	  return(-2);
847      }
848
849      if (inend - in < trailing) {
850          break;
851      }
852
853      for ( ; trailing; trailing--) {
854          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
855          c <<= 6;
856          c |= d & 0x3F;
857      }
858
859      /* assertion: c is a single UTF-4 value */
860        if (c < 0x10000) {
861            if (out >= outend)  break;
862	    if (xmlLittleEndian) {
863		tmp = (unsigned char *) out;
864		*tmp = c >> 8;
865		*(tmp + 1) = c;
866		out++;
867	    } else {
868		*out++ = c;
869	    }
870        }
871        else if (c < 0x110000) {
872            if (out+1 >= outend)  break;
873            c -= 0x10000;
874	    if (xmlLittleEndian) {
875		tmp1 = 0xD800 | (c >> 10);
876		tmp = (unsigned char *) out;
877		*tmp = tmp1 >> 8;
878		*(tmp + 1) = (unsigned char) tmp1;
879		out++;
880
881		tmp2 = 0xDC00 | (c & 0x03FF);
882		tmp = (unsigned char *) out;
883		*tmp = tmp2 >> 8;
884		*(tmp + 1) = (unsigned char) tmp2;
885		out++;
886	    } else {
887		*out++ = 0xD800 | (c >> 10);
888		*out++ = 0xDC00 | (c & 0x03FF);
889	    }
890        }
891        else
892	    break;
893	processed = in;
894    }
895    *outlen = (out - outstart) * 2;
896    *inlen = processed - instart;
897    return(*outlen);
898}
899#endif /* LIBXML_OUTPUT_ENABLED */
900
901/************************************************************************
902 *									*
903 *		Generic encoding handling routines			*
904 *									*
905 ************************************************************************/
906
907/**
908 * xmlDetectCharEncoding:
909 * @in:  a pointer to the first bytes of the XML entity, must be at least
910 *       2 bytes long (at least 4 if encoding is UTF4 variant).
911 * @len:  pointer to the length of the buffer
912 *
913 * Guess the encoding of the entity using the first bytes of the entity content
914 * according to the non-normative appendix F of the XML-1.0 recommendation.
915 *
916 * Returns one of the XML_CHAR_ENCODING_... values.
917 */
918xmlCharEncoding
919xmlDetectCharEncoding(const unsigned char* in, int len)
920{
921    if (in == NULL)
922        return(XML_CHAR_ENCODING_NONE);
923    if (len >= 4) {
924	if ((in[0] == 0x00) && (in[1] == 0x00) &&
925	    (in[2] == 0x00) && (in[3] == 0x3C))
926	    return(XML_CHAR_ENCODING_UCS4BE);
927	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
928	    (in[2] == 0x00) && (in[3] == 0x00))
929	    return(XML_CHAR_ENCODING_UCS4LE);
930	if ((in[0] == 0x00) && (in[1] == 0x00) &&
931	    (in[2] == 0x3C) && (in[3] == 0x00))
932	    return(XML_CHAR_ENCODING_UCS4_2143);
933	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
934	    (in[2] == 0x00) && (in[3] == 0x00))
935	    return(XML_CHAR_ENCODING_UCS4_3412);
936	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
937	    (in[2] == 0xA7) && (in[3] == 0x94))
938	    return(XML_CHAR_ENCODING_EBCDIC);
939	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
940	    (in[2] == 0x78) && (in[3] == 0x6D))
941	    return(XML_CHAR_ENCODING_UTF8);
942	/*
943	 * Although not part of the recommendation, we also
944	 * attempt an "auto-recognition" of UTF-16LE and
945	 * UTF-16BE encodings.
946	 */
947	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
948	    (in[2] == 0x3F) && (in[3] == 0x00))
949	    return(XML_CHAR_ENCODING_UTF16LE);
950	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
951	    (in[2] == 0x00) && (in[3] == 0x3F))
952	    return(XML_CHAR_ENCODING_UTF16BE);
953    }
954    if (len >= 3) {
955	/*
956	 * Errata on XML-1.0 June 20 2001
957	 * We now allow an UTF8 encoded BOM
958	 */
959	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
960	    (in[2] == 0xBF))
961	    return(XML_CHAR_ENCODING_UTF8);
962    }
963    /* For UTF-16 we can recognize by the BOM */
964    if (len >= 2) {
965	if ((in[0] == 0xFE) && (in[1] == 0xFF))
966	    return(XML_CHAR_ENCODING_UTF16BE);
967	if ((in[0] == 0xFF) && (in[1] == 0xFE))
968	    return(XML_CHAR_ENCODING_UTF16LE);
969    }
970    return(XML_CHAR_ENCODING_NONE);
971}
972
973/**
974 * xmlCleanupEncodingAliases:
975 *
976 * Unregisters all aliases
977 */
978void
979xmlCleanupEncodingAliases(void) {
980    int i;
981
982    if (xmlCharEncodingAliases == NULL)
983	return;
984
985    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
986	if (xmlCharEncodingAliases[i].name != NULL)
987	    xmlFree((char *) xmlCharEncodingAliases[i].name);
988	if (xmlCharEncodingAliases[i].alias != NULL)
989	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
990    }
991    xmlCharEncodingAliasesNb = 0;
992    xmlCharEncodingAliasesMax = 0;
993    xmlFree(xmlCharEncodingAliases);
994    xmlCharEncodingAliases = NULL;
995}
996
997/**
998 * xmlGetEncodingAlias:
999 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1000 *
1001 * Lookup an encoding name for the given alias.
1002 *
1003 * Returns NULL if not found, otherwise the original name
1004 */
1005const char *
1006xmlGetEncodingAlias(const char *alias) {
1007    int i;
1008    char upper[100];
1009
1010    if (alias == NULL)
1011	return(NULL);
1012
1013    if (xmlCharEncodingAliases == NULL)
1014	return(NULL);
1015
1016    for (i = 0;i < 99;i++) {
1017        upper[i] = toupper(alias[i]);
1018	if (upper[i] == 0) break;
1019    }
1020    upper[i] = 0;
1021
1022    /*
1023     * Walk down the list looking for a definition of the alias
1024     */
1025    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027	    return(xmlCharEncodingAliases[i].name);
1028	}
1029    }
1030    return(NULL);
1031}
1032
1033/**
1034 * xmlAddEncodingAlias:
1035 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1036 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1037 *
1038 * Registers an alias @alias for an encoding named @name. Existing alias
1039 * will be overwritten.
1040 *
1041 * Returns 0 in case of success, -1 in case of error
1042 */
1043int
1044xmlAddEncodingAlias(const char *name, const char *alias) {
1045    int i;
1046    char upper[100];
1047
1048    if ((name == NULL) || (alias == NULL))
1049	return(-1);
1050
1051    for (i = 0;i < 99;i++) {
1052        upper[i] = toupper(alias[i]);
1053	if (upper[i] == 0) break;
1054    }
1055    upper[i] = 0;
1056
1057    if (xmlCharEncodingAliases == NULL) {
1058	xmlCharEncodingAliasesNb = 0;
1059	xmlCharEncodingAliasesMax = 20;
1060	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1061	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1062	if (xmlCharEncodingAliases == NULL)
1063	    return(-1);
1064    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1065	xmlCharEncodingAliasesMax *= 2;
1066	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1067	      xmlRealloc(xmlCharEncodingAliases,
1068		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1069    }
1070    /*
1071     * Walk down the list looking for a definition of the alias
1072     */
1073    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075	    /*
1076	     * Replace the definition.
1077	     */
1078	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1079	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1080	    return(0);
1081	}
1082    }
1083    /*
1084     * Add the definition
1085     */
1086    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1087    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1088    xmlCharEncodingAliasesNb++;
1089    return(0);
1090}
1091
1092/**
1093 * xmlDelEncodingAlias:
1094 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1095 *
1096 * Unregisters an encoding alias @alias
1097 *
1098 * Returns 0 in case of success, -1 in case of error
1099 */
1100int
1101xmlDelEncodingAlias(const char *alias) {
1102    int i;
1103
1104    if (alias == NULL)
1105	return(-1);
1106
1107    if (xmlCharEncodingAliases == NULL)
1108	return(-1);
1109    /*
1110     * Walk down the list looking for a definition of the alias
1111     */
1112    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1113	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1114	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1115	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1116	    xmlCharEncodingAliasesNb--;
1117	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1118		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1119	    return(0);
1120	}
1121    }
1122    return(-1);
1123}
1124
1125/**
1126 * xmlParseCharEncoding:
1127 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1128 *
1129 * Compare the string to the encoding schemes already known. Note
1130 * that the comparison is case insensitive accordingly to the section
1131 * [XML] 4.3.3 Character Encoding in Entities.
1132 *
1133 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1134 * if not recognized.
1135 */
1136xmlCharEncoding
1137xmlParseCharEncoding(const char* name)
1138{
1139    const char *alias;
1140    char upper[500];
1141    int i;
1142
1143    if (name == NULL)
1144	return(XML_CHAR_ENCODING_NONE);
1145
1146    /*
1147     * Do the alias resolution
1148     */
1149    alias = xmlGetEncodingAlias(name);
1150    if (alias != NULL)
1151	name = alias;
1152
1153    for (i = 0;i < 499;i++) {
1154        upper[i] = toupper(name[i]);
1155	if (upper[i] == 0) break;
1156    }
1157    upper[i] = 0;
1158
1159    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1160    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1161    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1162
1163    /*
1164     * NOTE: if we were able to parse this, the endianness of UTF16 is
1165     *       already found and in use
1166     */
1167    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1168    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1169
1170    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1171    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1172    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1173
1174    /*
1175     * NOTE: if we were able to parse this, the endianness of UCS4 is
1176     *       already found and in use
1177     */
1178    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1179    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1180    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1181
1182
1183    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1184    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1185    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1186
1187    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1188    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1189    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1190
1191    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1192    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1193    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1194    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1195    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1196    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1197    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1198
1199    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1200    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1201    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1202
1203#ifdef DEBUG_ENCODING
1204    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1205#endif
1206    return(XML_CHAR_ENCODING_ERROR);
1207}
1208
1209/**
1210 * xmlGetCharEncodingName:
1211 * @enc:  the encoding
1212 *
1213 * The "canonical" name for XML encoding.
1214 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1215 * Section 4.3.3  Character Encoding in Entities
1216 *
1217 * Returns the canonical name for the given encoding
1218 */
1219
1220const char*
1221xmlGetCharEncodingName(xmlCharEncoding enc) {
1222    switch (enc) {
1223        case XML_CHAR_ENCODING_ERROR:
1224	    return(NULL);
1225        case XML_CHAR_ENCODING_NONE:
1226	    return(NULL);
1227        case XML_CHAR_ENCODING_UTF8:
1228	    return("UTF-8");
1229        case XML_CHAR_ENCODING_UTF16LE:
1230	    return("UTF-16");
1231        case XML_CHAR_ENCODING_UTF16BE:
1232	    return("UTF-16");
1233        case XML_CHAR_ENCODING_EBCDIC:
1234            return("EBCDIC");
1235        case XML_CHAR_ENCODING_UCS4LE:
1236            return("ISO-10646-UCS-4");
1237        case XML_CHAR_ENCODING_UCS4BE:
1238            return("ISO-10646-UCS-4");
1239        case XML_CHAR_ENCODING_UCS4_2143:
1240            return("ISO-10646-UCS-4");
1241        case XML_CHAR_ENCODING_UCS4_3412:
1242            return("ISO-10646-UCS-4");
1243        case XML_CHAR_ENCODING_UCS2:
1244            return("ISO-10646-UCS-2");
1245        case XML_CHAR_ENCODING_8859_1:
1246	    return("ISO-8859-1");
1247        case XML_CHAR_ENCODING_8859_2:
1248	    return("ISO-8859-2");
1249        case XML_CHAR_ENCODING_8859_3:
1250	    return("ISO-8859-3");
1251        case XML_CHAR_ENCODING_8859_4:
1252	    return("ISO-8859-4");
1253        case XML_CHAR_ENCODING_8859_5:
1254	    return("ISO-8859-5");
1255        case XML_CHAR_ENCODING_8859_6:
1256	    return("ISO-8859-6");
1257        case XML_CHAR_ENCODING_8859_7:
1258	    return("ISO-8859-7");
1259        case XML_CHAR_ENCODING_8859_8:
1260	    return("ISO-8859-8");
1261        case XML_CHAR_ENCODING_8859_9:
1262	    return("ISO-8859-9");
1263        case XML_CHAR_ENCODING_2022_JP:
1264            return("ISO-2022-JP");
1265        case XML_CHAR_ENCODING_SHIFT_JIS:
1266            return("Shift-JIS");
1267        case XML_CHAR_ENCODING_EUC_JP:
1268            return("EUC-JP");
1269	case XML_CHAR_ENCODING_ASCII:
1270	    return(NULL);
1271    }
1272    return(NULL);
1273}
1274
1275/************************************************************************
1276 *									*
1277 *			Char encoding handlers				*
1278 *									*
1279 ************************************************************************/
1280
1281
1282/* the size should be growable, but it's not a big deal ... */
1283#define MAX_ENCODING_HANDLERS 50
1284static xmlCharEncodingHandlerPtr *handlers = NULL;
1285static int nbCharEncodingHandler = 0;
1286
1287/*
1288 * The default is UTF-8 for XML, that's also the default used for the
1289 * parser internals, so the default encoding handler is NULL
1290 */
1291
1292static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1293
1294/**
1295 * xmlNewCharEncodingHandler:
1296 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1297 * @input:  the xmlCharEncodingInputFunc to read that encoding
1298 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1299 *
1300 * Create and registers an xmlCharEncodingHandler.
1301 *
1302 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1303 */
1304xmlCharEncodingHandlerPtr
1305xmlNewCharEncodingHandler(const char *name,
1306                          xmlCharEncodingInputFunc input,
1307                          xmlCharEncodingOutputFunc output) {
1308    xmlCharEncodingHandlerPtr handler;
1309    const char *alias;
1310    char upper[500];
1311    int i;
1312    char *up = NULL;
1313
1314    /*
1315     * Do the alias resolution
1316     */
1317    alias = xmlGetEncodingAlias(name);
1318    if (alias != NULL)
1319	name = alias;
1320
1321    /*
1322     * Keep only the uppercase version of the encoding.
1323     */
1324    if (name == NULL) {
1325        xmlEncodingErr(XML_I18N_NO_NAME,
1326		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1327	return(NULL);
1328    }
1329    for (i = 0;i < 499;i++) {
1330        upper[i] = toupper(name[i]);
1331	if (upper[i] == 0) break;
1332    }
1333    upper[i] = 0;
1334    up = xmlMemStrdup(upper);
1335    if (up == NULL) {
1336        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1337	return(NULL);
1338    }
1339
1340    /*
1341     * allocate and fill-up an handler block.
1342     */
1343    handler = (xmlCharEncodingHandlerPtr)
1344              xmlMalloc(sizeof(xmlCharEncodingHandler));
1345    if (handler == NULL) {
1346        xmlFree(up);
1347        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1348	return(NULL);
1349    }
1350    handler->input = input;
1351    handler->output = output;
1352    handler->name = up;
1353
1354#ifdef LIBXML_ICONV_ENABLED
1355    handler->iconv_in = NULL;
1356    handler->iconv_out = NULL;
1357#endif
1358#ifdef LIBXML_ICU_ENABLED
1359    handler->uconv_in = NULL;
1360    handler->uconv_out = NULL;
1361#endif
1362
1363    /*
1364     * registers and returns the handler.
1365     */
1366    xmlRegisterCharEncodingHandler(handler);
1367#ifdef DEBUG_ENCODING
1368    xmlGenericError(xmlGenericErrorContext,
1369	    "Registered encoding handler for %s\n", name);
1370#endif
1371    return(handler);
1372}
1373
1374/**
1375 * xmlInitCharEncodingHandlers:
1376 *
1377 * Initialize the char encoding support, it registers the default
1378 * encoding supported.
1379 * NOTE: while public, this function usually doesn't need to be called
1380 *       in normal processing.
1381 */
1382void
1383xmlInitCharEncodingHandlers(void) {
1384    unsigned short int tst = 0x1234;
1385    unsigned char *ptr = (unsigned char *) &tst;
1386
1387    if (handlers != NULL) return;
1388
1389    handlers = (xmlCharEncodingHandlerPtr *)
1390        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1391
1392    if (*ptr == 0x12) xmlLittleEndian = 0;
1393    else if (*ptr == 0x34) xmlLittleEndian = 1;
1394    else {
1395        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1396	               "Odd problem at endianness detection\n", NULL);
1397    }
1398
1399    if (handlers == NULL) {
1400        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1401	return;
1402    }
1403    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1404#ifdef LIBXML_OUTPUT_ENABLED
1405    xmlUTF16LEHandler =
1406          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1407    xmlUTF16BEHandler =
1408          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1409    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1410    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1411    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1412    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1413#ifdef LIBXML_HTML_ENABLED
1414    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1415#endif
1416#else
1417    xmlUTF16LEHandler =
1418          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1419    xmlUTF16BEHandler =
1420          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1421    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1422    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1423    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1424    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1425#endif /* LIBXML_OUTPUT_ENABLED */
1426#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1427#ifdef LIBXML_ISO8859X_ENABLED
1428    xmlRegisterCharEncodingHandlersISO8859x ();
1429#endif
1430#endif
1431
1432}
1433
1434/**
1435 * xmlCleanupCharEncodingHandlers:
1436 *
1437 * Cleanup the memory allocated for the char encoding support, it
1438 * unregisters all the encoding handlers and the aliases.
1439 */
1440void
1441xmlCleanupCharEncodingHandlers(void) {
1442    xmlCleanupEncodingAliases();
1443
1444    if (handlers == NULL) return;
1445
1446    for (;nbCharEncodingHandler > 0;) {
1447        nbCharEncodingHandler--;
1448	if (handlers[nbCharEncodingHandler] != NULL) {
1449	    if (handlers[nbCharEncodingHandler]->name != NULL)
1450		xmlFree(handlers[nbCharEncodingHandler]->name);
1451	    xmlFree(handlers[nbCharEncodingHandler]);
1452	}
1453    }
1454    xmlFree(handlers);
1455    handlers = NULL;
1456    nbCharEncodingHandler = 0;
1457    xmlDefaultCharEncodingHandler = NULL;
1458}
1459
1460/**
1461 * xmlRegisterCharEncodingHandler:
1462 * @handler:  the xmlCharEncodingHandlerPtr handler block
1463 *
1464 * Register the char encoding handler, surprising, isn't it ?
1465 */
1466void
1467xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1468    if (handlers == NULL) xmlInitCharEncodingHandlers();
1469    if ((handler == NULL) || (handlers == NULL)) {
1470        xmlEncodingErr(XML_I18N_NO_HANDLER,
1471		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1472	return;
1473    }
1474
1475    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1476        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1477	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1478	               "MAX_ENCODING_HANDLERS");
1479	return;
1480    }
1481    handlers[nbCharEncodingHandler++] = handler;
1482}
1483
1484/**
1485 * xmlGetCharEncodingHandler:
1486 * @enc:  an xmlCharEncoding value.
1487 *
1488 * Search in the registered set the handler able to read/write that encoding.
1489 *
1490 * Returns the handler or NULL if not found
1491 */
1492xmlCharEncodingHandlerPtr
1493xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1494    xmlCharEncodingHandlerPtr handler;
1495
1496    if (handlers == NULL) xmlInitCharEncodingHandlers();
1497    switch (enc) {
1498        case XML_CHAR_ENCODING_ERROR:
1499	    return(NULL);
1500        case XML_CHAR_ENCODING_NONE:
1501	    return(NULL);
1502        case XML_CHAR_ENCODING_UTF8:
1503	    return(NULL);
1504        case XML_CHAR_ENCODING_UTF16LE:
1505	    return(xmlUTF16LEHandler);
1506        case XML_CHAR_ENCODING_UTF16BE:
1507	    return(xmlUTF16BEHandler);
1508        case XML_CHAR_ENCODING_EBCDIC:
1509            handler = xmlFindCharEncodingHandler("EBCDIC");
1510            if (handler != NULL) return(handler);
1511            handler = xmlFindCharEncodingHandler("ebcdic");
1512            if (handler != NULL) return(handler);
1513            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1514            if (handler != NULL) return(handler);
1515	    break;
1516        case XML_CHAR_ENCODING_UCS4BE:
1517            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1518            if (handler != NULL) return(handler);
1519            handler = xmlFindCharEncodingHandler("UCS-4");
1520            if (handler != NULL) return(handler);
1521            handler = xmlFindCharEncodingHandler("UCS4");
1522            if (handler != NULL) return(handler);
1523	    break;
1524        case XML_CHAR_ENCODING_UCS4LE:
1525            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1526            if (handler != NULL) return(handler);
1527            handler = xmlFindCharEncodingHandler("UCS-4");
1528            if (handler != NULL) return(handler);
1529            handler = xmlFindCharEncodingHandler("UCS4");
1530            if (handler != NULL) return(handler);
1531	    break;
1532        case XML_CHAR_ENCODING_UCS4_2143:
1533	    break;
1534        case XML_CHAR_ENCODING_UCS4_3412:
1535	    break;
1536        case XML_CHAR_ENCODING_UCS2:
1537            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1538            if (handler != NULL) return(handler);
1539            handler = xmlFindCharEncodingHandler("UCS-2");
1540            if (handler != NULL) return(handler);
1541            handler = xmlFindCharEncodingHandler("UCS2");
1542            if (handler != NULL) return(handler);
1543	    break;
1544
1545	    /*
1546	     * We used to keep ISO Latin encodings native in the
1547	     * generated data. This led to so many problems that
1548	     * this has been removed. One can still change this
1549	     * back by registering no-ops encoders for those
1550	     */
1551        case XML_CHAR_ENCODING_8859_1:
1552	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1553	    if (handler != NULL) return(handler);
1554	    break;
1555        case XML_CHAR_ENCODING_8859_2:
1556	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1557	    if (handler != NULL) return(handler);
1558	    break;
1559        case XML_CHAR_ENCODING_8859_3:
1560	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1561	    if (handler != NULL) return(handler);
1562	    break;
1563        case XML_CHAR_ENCODING_8859_4:
1564	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1565	    if (handler != NULL) return(handler);
1566	    break;
1567        case XML_CHAR_ENCODING_8859_5:
1568	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1569	    if (handler != NULL) return(handler);
1570	    break;
1571        case XML_CHAR_ENCODING_8859_6:
1572	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1573	    if (handler != NULL) return(handler);
1574	    break;
1575        case XML_CHAR_ENCODING_8859_7:
1576	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1577	    if (handler != NULL) return(handler);
1578	    break;
1579        case XML_CHAR_ENCODING_8859_8:
1580	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1581	    if (handler != NULL) return(handler);
1582	    break;
1583        case XML_CHAR_ENCODING_8859_9:
1584	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1585	    if (handler != NULL) return(handler);
1586	    break;
1587
1588
1589        case XML_CHAR_ENCODING_2022_JP:
1590            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1591            if (handler != NULL) return(handler);
1592	    break;
1593        case XML_CHAR_ENCODING_SHIFT_JIS:
1594            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1595            if (handler != NULL) return(handler);
1596            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1597            if (handler != NULL) return(handler);
1598            handler = xmlFindCharEncodingHandler("Shift_JIS");
1599            if (handler != NULL) return(handler);
1600	    break;
1601        case XML_CHAR_ENCODING_EUC_JP:
1602            handler = xmlFindCharEncodingHandler("EUC-JP");
1603            if (handler != NULL) return(handler);
1604	    break;
1605	default:
1606	    break;
1607    }
1608
1609#ifdef DEBUG_ENCODING
1610    xmlGenericError(xmlGenericErrorContext,
1611	    "No handler found for encoding %d\n", enc);
1612#endif
1613    return(NULL);
1614}
1615
1616/**
1617 * xmlFindCharEncodingHandler:
1618 * @name:  a string describing the char encoding.
1619 *
1620 * Search in the registered set the handler able to read/write that encoding.
1621 *
1622 * Returns the handler or NULL if not found
1623 */
1624xmlCharEncodingHandlerPtr
1625xmlFindCharEncodingHandler(const char *name) {
1626    const char *nalias;
1627    const char *norig;
1628    xmlCharEncoding alias;
1629#ifdef LIBXML_ICONV_ENABLED
1630    xmlCharEncodingHandlerPtr enc;
1631    iconv_t icv_in, icv_out;
1632#endif /* LIBXML_ICONV_ENABLED */
1633#ifdef LIBXML_ICU_ENABLED
1634    xmlCharEncodingHandlerPtr enc;
1635    uconv_t *ucv_in, *ucv_out;
1636#endif /* LIBXML_ICU_ENABLED */
1637    char upper[100];
1638    int i;
1639
1640    if (handlers == NULL) xmlInitCharEncodingHandlers();
1641    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1642    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1643
1644    /*
1645     * Do the alias resolution
1646     */
1647    norig = name;
1648    nalias = xmlGetEncodingAlias(name);
1649    if (nalias != NULL)
1650	name = nalias;
1651
1652    /*
1653     * Check first for directly registered encoding names
1654     */
1655    for (i = 0;i < 99;i++) {
1656        upper[i] = toupper(name[i]);
1657	if (upper[i] == 0) break;
1658    }
1659    upper[i] = 0;
1660
1661    if (handlers != NULL) {
1662        for (i = 0;i < nbCharEncodingHandler; i++) {
1663            if (!strcmp(upper, handlers[i]->name)) {
1664#ifdef DEBUG_ENCODING
1665                xmlGenericError(xmlGenericErrorContext,
1666                        "Found registered handler for encoding %s\n", name);
1667#endif
1668                return(handlers[i]);
1669            }
1670        }
1671    }
1672
1673#ifdef LIBXML_ICONV_ENABLED
1674    /* check whether iconv can handle this */
1675    icv_in = iconv_open("UTF-8", name);
1676    icv_out = iconv_open(name, "UTF-8");
1677    if (icv_in == (iconv_t) -1) {
1678        icv_in = iconv_open("UTF-8", upper);
1679    }
1680    if (icv_out == (iconv_t) -1) {
1681	icv_out = iconv_open(upper, "UTF-8");
1682    }
1683    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1684	    enc = (xmlCharEncodingHandlerPtr)
1685	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1686	    if (enc == NULL) {
1687	        iconv_close(icv_in);
1688	        iconv_close(icv_out);
1689		return(NULL);
1690	    }
1691	    enc->name = xmlMemStrdup(name);
1692	    enc->input = NULL;
1693	    enc->output = NULL;
1694	    enc->iconv_in = icv_in;
1695	    enc->iconv_out = icv_out;
1696#ifdef DEBUG_ENCODING
1697            xmlGenericError(xmlGenericErrorContext,
1698		    "Found iconv handler for encoding %s\n", name);
1699#endif
1700	    return enc;
1701    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1702	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1703		    "iconv : problems with filters for '%s'\n", name);
1704    }
1705#endif /* LIBXML_ICONV_ENABLED */
1706#ifdef LIBXML_ICU_ENABLED
1707    /* check whether icu can handle this */
1708    ucv_in = openIcuConverter(name, 1);
1709    ucv_out = openIcuConverter(name, 0);
1710    if (ucv_in != NULL && ucv_out != NULL) {
1711	    enc = (xmlCharEncodingHandlerPtr)
1712	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1713	    if (enc == NULL) {
1714                closeIcuConverter(ucv_in);
1715                closeIcuConverter(ucv_out);
1716		return(NULL);
1717	    }
1718	    enc->name = xmlMemStrdup(name);
1719	    enc->input = NULL;
1720	    enc->output = NULL;
1721	    enc->uconv_in = ucv_in;
1722	    enc->uconv_out = ucv_out;
1723#ifdef DEBUG_ENCODING
1724            xmlGenericError(xmlGenericErrorContext,
1725		    "Found ICU converter handler for encoding %s\n", name);
1726#endif
1727	    return enc;
1728    } else if (ucv_in != NULL || ucv_out != NULL) {
1729            closeIcuConverter(ucv_in);
1730            closeIcuConverter(ucv_out);
1731	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1732		    "ICU converter : problems with filters for '%s'\n", name);
1733    }
1734#endif /* LIBXML_ICU_ENABLED */
1735
1736#ifdef DEBUG_ENCODING
1737    xmlGenericError(xmlGenericErrorContext,
1738	    "No handler found for encoding %s\n", name);
1739#endif
1740
1741    /*
1742     * Fallback using the canonical names
1743     */
1744    alias = xmlParseCharEncoding(norig);
1745    if (alias != XML_CHAR_ENCODING_ERROR) {
1746        const char* canon;
1747        canon = xmlGetCharEncodingName(alias);
1748        if ((canon != NULL) && (strcmp(name, canon))) {
1749	    return(xmlFindCharEncodingHandler(canon));
1750        }
1751    }
1752
1753    /* If "none of the above", give up */
1754    return(NULL);
1755}
1756
1757/************************************************************************
1758 *									*
1759 *		ICONV based generic conversion functions		*
1760 *									*
1761 ************************************************************************/
1762
1763#ifdef LIBXML_ICONV_ENABLED
1764/**
1765 * xmlIconvWrapper:
1766 * @cd:		iconv converter data structure
1767 * @out:  a pointer to an array of bytes to store the result
1768 * @outlen:  the length of @out
1769 * @in:  a pointer to an array of ISO Latin 1 chars
1770 * @inlen:  the length of @in
1771 *
1772 * Returns 0 if success, or
1773 *     -1 by lack of space, or
1774 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1775 *        the result of transformation can't fit into the encoding we want), or
1776 *     -3 if there the last byte can't form a single output char.
1777 *
1778 * The value of @inlen after return is the number of octets consumed
1779 *     as the return value is positive, else unpredictable.
1780 * The value of @outlen after return is the number of ocetes consumed.
1781 */
1782static int
1783xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1784                const unsigned char *in, int *inlen) {
1785    size_t icv_inlen, icv_outlen;
1786    const char *icv_in = (const char *) in;
1787    char *icv_out = (char *) out;
1788    int ret;
1789
1790    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1791        if (outlen != NULL) *outlen = 0;
1792        return(-1);
1793    }
1794    icv_inlen = *inlen;
1795    icv_outlen = *outlen;
1796    ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1797    *inlen -= icv_inlen;
1798    *outlen -= icv_outlen;
1799    if ((icv_inlen != 0) || (ret == -1)) {
1800#ifdef EILSEQ
1801        if (errno == EILSEQ) {
1802            return -2;
1803        } else
1804#endif
1805#ifdef E2BIG
1806        if (errno == E2BIG) {
1807            return -1;
1808        } else
1809#endif
1810#ifdef EINVAL
1811        if (errno == EINVAL) {
1812            return -3;
1813        } else
1814#endif
1815        {
1816            return -3;
1817        }
1818    }
1819    return 0;
1820}
1821#endif /* LIBXML_ICONV_ENABLED */
1822
1823/************************************************************************
1824 *									*
1825 *		ICU based generic conversion functions	         	*
1826 *									*
1827 ************************************************************************/
1828
1829#ifdef LIBXML_ICU_ENABLED
1830/**
1831 * xmlUconvWrapper:
1832 * @cd: ICU uconverter data structure
1833 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1834 * @out:  a pointer to an array of bytes to store the result
1835 * @outlen:  the length of @out
1836 * @in:  a pointer to an array of ISO Latin 1 chars
1837 * @inlen:  the length of @in
1838 *
1839 * Returns 0 if success, or
1840 *     -1 by lack of space, or
1841 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1842 *        the result of transformation can't fit into the encoding we want), or
1843 *     -3 if there the last byte can't form a single output char.
1844 *
1845 * The value of @inlen after return is the number of octets consumed
1846 *     as the return value is positive, else unpredictable.
1847 * The value of @outlen after return is the number of ocetes consumed.
1848 */
1849static int
1850xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1851                const unsigned char *in, int *inlen) {
1852    const char *ucv_in = (const char *) in;
1853    char *ucv_out = (char *) out;
1854    UErrorCode err = U_ZERO_ERROR;
1855
1856    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1857        if (outlen != NULL) *outlen = 0;
1858        return(-1);
1859    }
1860
1861    /*
1862     * TODO(jungshik)
1863     * 1. is ucnv_convert(To|From)Algorithmic better?
1864     * 2. had we better use an explicit pivot buffer?
1865     * 3. error returned comes from 'fromUnicode' only even
1866     *    when toUnicode is true !
1867     */
1868    if (toUnicode) {
1869        /* encoding => UTF-16 => UTF-8 */
1870        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1871                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1872                       0, TRUE, &err);
1873    } else {
1874        /* UTF-8 => UTF-16 => encoding */
1875        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1876                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1877                       0, TRUE, &err);
1878    }
1879    *inlen = ucv_in - (const char*) in;
1880    *outlen = ucv_out - (char *) out;
1881    if (U_SUCCESS(err))
1882        return 0;
1883    if (err == U_BUFFER_OVERFLOW_ERROR)
1884        return -1;
1885    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1886        return -2;
1887    /* if (err == U_TRUNCATED_CHAR_FOUND) */
1888    return -3;
1889}
1890#endif /* LIBXML_ICU_ENABLED */
1891
1892/************************************************************************
1893 *									*
1894 *		The real API used by libxml for on-the-fly conversion	*
1895 *									*
1896 ************************************************************************/
1897int
1898xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1899                       xmlBufferPtr in, int len);
1900
1901/**
1902 * xmlCharEncFirstLineInt:
1903 * @handler:	char enconding transformation data structure
1904 * @out:  an xmlBuffer for the output.
1905 * @in:  an xmlBuffer for the input
1906 * @len:  number of bytes to convert for the first line, or -1
1907 *
1908 * Front-end for the encoding handler input function, but handle only
1909 * the very first line, i.e. limit itself to 45 chars.
1910 *
1911 * Returns the number of byte written if success, or
1912 *     -1 general error
1913 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1914 *        the result of transformation can't fit into the encoding we want), or
1915 */
1916int
1917xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1918                       xmlBufferPtr in, int len) {
1919    int ret = -2;
1920    int written;
1921    int toconv;
1922
1923    if (handler == NULL) return(-1);
1924    if (out == NULL) return(-1);
1925    if (in == NULL) return(-1);
1926
1927    /* calculate space available */
1928    written = out->size - out->use;
1929    toconv = in->use;
1930    /*
1931     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1932     * 45 chars should be sufficient to reach the end of the encoding
1933     * declaration without going too far inside the document content.
1934     * on UTF-16 this means 90bytes, on UCS4 this means 180
1935     * The actual value depending on guessed encoding is passed as @len
1936     * if provided
1937     */
1938    if (len >= 0) {
1939        if (toconv > len)
1940            toconv = len;
1941    } else {
1942        if (toconv > 180)
1943            toconv = 180;
1944    }
1945    if (toconv * 2 >= written) {
1946        xmlBufferGrow(out, toconv);
1947	written = out->size - out->use - 1;
1948    }
1949
1950    if (handler->input != NULL) {
1951	ret = handler->input(&out->content[out->use], &written,
1952	                     in->content, &toconv);
1953	xmlBufferShrink(in, toconv);
1954	out->use += written;
1955	out->content[out->use] = 0;
1956    }
1957#ifdef LIBXML_ICONV_ENABLED
1958    else if (handler->iconv_in != NULL) {
1959	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1960	                      &written, in->content, &toconv);
1961	xmlBufferShrink(in, toconv);
1962	out->use += written;
1963	out->content[out->use] = 0;
1964	if (ret == -1) ret = -3;
1965    }
1966#endif /* LIBXML_ICONV_ENABLED */
1967#ifdef LIBXML_ICU_ENABLED
1968    else if (handler->uconv_in != NULL) {
1969	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1970	                      &written, in->content, &toconv);
1971	xmlBufferShrink(in, toconv);
1972	out->use += written;
1973	out->content[out->use] = 0;
1974	if (ret == -1) ret = -3;
1975    }
1976#endif /* LIBXML_ICU_ENABLED */
1977#ifdef DEBUG_ENCODING
1978    switch (ret) {
1979        case 0:
1980	    xmlGenericError(xmlGenericErrorContext,
1981		    "converted %d bytes to %d bytes of input\n",
1982	            toconv, written);
1983	    break;
1984        case -1:
1985	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1986	            toconv, written, in->use);
1987	    break;
1988        case -2:
1989	    xmlGenericError(xmlGenericErrorContext,
1990		    "input conversion failed due to input error\n");
1991	    break;
1992        case -3:
1993	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1994	            toconv, written, in->use);
1995	    break;
1996	default:
1997	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1998    }
1999#endif /* DEBUG_ENCODING */
2000    /*
2001     * Ignore when input buffer is not on a boundary
2002     */
2003    if (ret == -3) ret = 0;
2004    if (ret == -1) ret = 0;
2005    return(ret);
2006}
2007
2008/**
2009 * xmlCharEncFirstLine:
2010 * @handler:	char enconding transformation data structure
2011 * @out:  an xmlBuffer for the output.
2012 * @in:  an xmlBuffer for the input
2013 *
2014 * Front-end for the encoding handler input function, but handle only
2015 * the very first line, i.e. limit itself to 45 chars.
2016 *
2017 * Returns the number of byte written if success, or
2018 *     -1 general error
2019 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2020 *        the result of transformation can't fit into the encoding we want), or
2021 */
2022int
2023xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2024                 xmlBufferPtr in) {
2025    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2026}
2027
2028/**
2029 * xmlCharEncInFunc:
2030 * @handler:	char encoding transformation data structure
2031 * @out:  an xmlBuffer for the output.
2032 * @in:  an xmlBuffer for the input
2033 *
2034 * Generic front-end for the encoding handler input function
2035 *
2036 * Returns the number of byte written if success, or
2037 *     -1 general error
2038 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2039 *        the result of transformation can't fit into the encoding we want), or
2040 */
2041int
2042xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2043                 xmlBufferPtr in)
2044{
2045    int ret = -2;
2046    int written;
2047    int toconv;
2048
2049    if (handler == NULL)
2050        return (-1);
2051    if (out == NULL)
2052        return (-1);
2053    if (in == NULL)
2054        return (-1);
2055
2056    toconv = in->use;
2057    if (toconv == 0)
2058        return (0);
2059    written = out->size - out->use;
2060    if (toconv * 2 >= written) {
2061        xmlBufferGrow(out, out->size + toconv * 2);
2062        written = out->size - out->use - 1;
2063    }
2064    if (handler->input != NULL) {
2065        ret = handler->input(&out->content[out->use], &written,
2066                             in->content, &toconv);
2067        xmlBufferShrink(in, toconv);
2068        out->use += written;
2069        out->content[out->use] = 0;
2070    }
2071#ifdef LIBXML_ICONV_ENABLED
2072    else if (handler->iconv_in != NULL) {
2073        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2074                              &written, in->content, &toconv);
2075        xmlBufferShrink(in, toconv);
2076        out->use += written;
2077        out->content[out->use] = 0;
2078        if (ret == -1)
2079            ret = -3;
2080    }
2081#endif /* LIBXML_ICONV_ENABLED */
2082#ifdef LIBXML_ICU_ENABLED
2083    else if (handler->uconv_in != NULL) {
2084        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2085                              &written, in->content, &toconv);
2086        xmlBufferShrink(in, toconv);
2087        out->use += written;
2088        out->content[out->use] = 0;
2089        if (ret == -1)
2090            ret = -3;
2091    }
2092#endif /* LIBXML_ICU_ENABLED */
2093    switch (ret) {
2094        case 0:
2095#ifdef DEBUG_ENCODING
2096            xmlGenericError(xmlGenericErrorContext,
2097                            "converted %d bytes to %d bytes of input\n",
2098                            toconv, written);
2099#endif
2100            break;
2101        case -1:
2102#ifdef DEBUG_ENCODING
2103            xmlGenericError(xmlGenericErrorContext,
2104                         "converted %d bytes to %d bytes of input, %d left\n",
2105                            toconv, written, in->use);
2106#endif
2107            break;
2108        case -3:
2109#ifdef DEBUG_ENCODING
2110            xmlGenericError(xmlGenericErrorContext,
2111                        "converted %d bytes to %d bytes of input, %d left\n",
2112                            toconv, written, in->use);
2113#endif
2114            break;
2115        case -2: {
2116            char buf[50];
2117
2118	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2119		     in->content[0], in->content[1],
2120		     in->content[2], in->content[3]);
2121	    buf[49] = 0;
2122	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2123		    "input conversion failed due to input error, bytes %s\n",
2124		           buf);
2125        }
2126    }
2127    /*
2128     * Ignore when input buffer is not on a boundary
2129     */
2130    if (ret == -3)
2131        ret = 0;
2132    return (written? written : ret);
2133}
2134
2135/**
2136 * xmlCharEncOutFunc:
2137 * @handler:	char enconding transformation data structure
2138 * @out:  an xmlBuffer for the output.
2139 * @in:  an xmlBuffer for the input
2140 *
2141 * Generic front-end for the encoding handler output function
2142 * a first call with @in == NULL has to be made firs to initiate the
2143 * output in case of non-stateless encoding needing to initiate their
2144 * state or the output (like the BOM in UTF16).
2145 * In case of UTF8 sequence conversion errors for the given encoder,
2146 * the content will be automatically remapped to a CharRef sequence.
2147 *
2148 * Returns the number of byte written if success, or
2149 *     -1 general error
2150 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2151 *        the result of transformation can't fit into the encoding we want), or
2152 */
2153int
2154xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2155                  xmlBufferPtr in) {
2156    int ret = -2;
2157    int written;
2158    int writtentot = 0;
2159    int toconv;
2160    int output = 0;
2161
2162    if (handler == NULL) return(-1);
2163    if (out == NULL) return(-1);
2164
2165retry:
2166
2167    written = out->size - out->use;
2168
2169    if (written > 0)
2170	written--; /* Gennady: count '/0' */
2171
2172    /*
2173     * First specific handling of in = NULL, i.e. the initialization call
2174     */
2175    if (in == NULL) {
2176        toconv = 0;
2177	if (handler->output != NULL) {
2178	    ret = handler->output(&out->content[out->use], &written,
2179				  NULL, &toconv);
2180	    if (ret >= 0) { /* Gennady: check return value */
2181		out->use += written;
2182		out->content[out->use] = 0;
2183	    }
2184	}
2185#ifdef LIBXML_ICONV_ENABLED
2186	else if (handler->iconv_out != NULL) {
2187	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2188				  &written, NULL, &toconv);
2189	    out->use += written;
2190	    out->content[out->use] = 0;
2191	}
2192#endif /* LIBXML_ICONV_ENABLED */
2193#ifdef LIBXML_ICU_ENABLED
2194	else if (handler->uconv_out != NULL) {
2195	    ret = xmlUconvWrapper(handler->uconv_out, 0,
2196                              &out->content[out->use],
2197 				              &written, NULL, &toconv);
2198	    out->use += written;
2199	    out->content[out->use] = 0;
2200	}
2201#endif /* LIBXML_ICU_ENABLED */
2202#ifdef DEBUG_ENCODING
2203	xmlGenericError(xmlGenericErrorContext,
2204		"initialized encoder\n");
2205#endif
2206        return(0);
2207    }
2208
2209    /*
2210     * Conversion itself.
2211     */
2212    toconv = in->use;
2213    if (toconv == 0)
2214	return(0);
2215    if (toconv * 4 >= written) {
2216        xmlBufferGrow(out, toconv * 4);
2217	written = out->size - out->use - 1;
2218    }
2219    if (handler->output != NULL) {
2220	ret = handler->output(&out->content[out->use], &written,
2221	                      in->content, &toconv);
2222	if (written > 0) {
2223	    xmlBufferShrink(in, toconv);
2224	    out->use += written;
2225	    writtentot += written;
2226	}
2227	out->content[out->use] = 0;
2228    }
2229#ifdef LIBXML_ICONV_ENABLED
2230    else if (handler->iconv_out != NULL) {
2231	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2232	                      &written, in->content, &toconv);
2233	xmlBufferShrink(in, toconv);
2234	out->use += written;
2235	writtentot += written;
2236	out->content[out->use] = 0;
2237	if (ret == -1) {
2238	    if (written > 0) {
2239		/*
2240		 * Can be a limitation of iconv
2241		 */
2242		goto retry;
2243	    }
2244	    ret = -3;
2245	}
2246    }
2247#endif /* LIBXML_ICONV_ENABLED */
2248#ifdef LIBXML_ICU_ENABLED
2249    else if (handler->uconv_out != NULL) {
2250	ret = xmlUconvWrapper(handler->uconv_out, 0,
2251                              &out->content[out->use],
2252	                      &written, in->content, &toconv);
2253	xmlBufferShrink(in, toconv);
2254	out->use += written;
2255	writtentot += written;
2256	out->content[out->use] = 0;
2257	if (ret == -1) {
2258	    if (written > 0) {
2259		/*
2260		 * Can be a limitation of iconv
2261		 */
2262		goto retry;
2263	    }
2264	    ret = -3;
2265	}
2266    }
2267#endif /* LIBXML_ICU_ENABLED */
2268    else {
2269	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2270		       "xmlCharEncOutFunc: no output function !\n", NULL);
2271	return(-1);
2272    }
2273
2274    if (ret >= 0) output += ret;
2275
2276    /*
2277     * Attempt to handle error cases
2278     */
2279    switch (ret) {
2280        case 0:
2281#ifdef DEBUG_ENCODING
2282	    xmlGenericError(xmlGenericErrorContext,
2283		    "converted %d bytes to %d bytes of output\n",
2284	            toconv, written);
2285#endif
2286	    break;
2287        case -1:
2288#ifdef DEBUG_ENCODING
2289	    xmlGenericError(xmlGenericErrorContext,
2290		    "output conversion failed by lack of space\n");
2291#endif
2292	    break;
2293        case -3:
2294#ifdef DEBUG_ENCODING
2295	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2296	            toconv, written, in->use);
2297#endif
2298	    break;
2299        case -2: {
2300	    int len = in->use;
2301	    const xmlChar *utf = (const xmlChar *) in->content;
2302	    int cur;
2303
2304	    cur = xmlGetUTF8Char(utf, &len);
2305	    if (cur > 0) {
2306		xmlChar charref[20];
2307
2308#ifdef DEBUG_ENCODING
2309		xmlGenericError(xmlGenericErrorContext,
2310			"handling output conversion error\n");
2311		xmlGenericError(xmlGenericErrorContext,
2312			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2313			in->content[0], in->content[1],
2314			in->content[2], in->content[3]);
2315#endif
2316		/*
2317		 * Removes the UTF8 sequence, and replace it by a charref
2318		 * and continue the transcoding phase, hoping the error
2319		 * did not mangle the encoder state.
2320		 */
2321		snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
2322		xmlBufferShrink(in, len);
2323		xmlBufferAddHead(in, charref, -1);
2324
2325		goto retry;
2326	    } else {
2327		char buf[50];
2328
2329		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2330			 in->content[0], in->content[1],
2331			 in->content[2], in->content[3]);
2332		buf[49] = 0;
2333		xmlEncodingErr(XML_I18N_CONV_FAILED,
2334		    "output conversion failed due to conv error, bytes %s\n",
2335			       buf);
2336		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2337		    in->content[0] = ' ';
2338	    }
2339	    break;
2340	}
2341    }
2342    return(ret);
2343}
2344
2345/**
2346 * xmlCharEncCloseFunc:
2347 * @handler:	char enconding transformation data structure
2348 *
2349 * Generic front-end for encoding handler close function
2350 *
2351 * Returns 0 if success, or -1 in case of error
2352 */
2353int
2354xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2355    int ret = 0;
2356    if (handler == NULL) return(-1);
2357    if (handler->name == NULL) return(-1);
2358#ifdef LIBXML_ICONV_ENABLED
2359    /*
2360     * Iconv handlers can be used only once, free the whole block.
2361     * and the associated icon resources.
2362     */
2363    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2364	if (handler->name != NULL)
2365	    xmlFree(handler->name);
2366	handler->name = NULL;
2367	if (handler->iconv_out != NULL) {
2368	    if (iconv_close(handler->iconv_out))
2369		ret = -1;
2370	    handler->iconv_out = NULL;
2371	}
2372	if (handler->iconv_in != NULL) {
2373	    if (iconv_close(handler->iconv_in))
2374		ret = -1;
2375	    handler->iconv_in = NULL;
2376	}
2377	xmlFree(handler);
2378    }
2379#endif /* LIBXML_ICONV_ENABLED */
2380#ifdef LIBXML_ICU_ENABLED
2381    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2382	if (handler->name != NULL)
2383	    xmlFree(handler->name);
2384	handler->name = NULL;
2385	if (handler->uconv_out != NULL) {
2386	    closeIcuConverter(handler->uconv_out);
2387	    handler->uconv_out = NULL;
2388	}
2389	if (handler->uconv_in != NULL) {
2390	    closeIcuConverter(handler->uconv_in);
2391	    handler->uconv_in = NULL;
2392	}
2393	xmlFree(handler);
2394    }
2395#endif
2396#ifdef DEBUG_ENCODING
2397    if (ret)
2398        xmlGenericError(xmlGenericErrorContext,
2399		"failed to close the encoding handler\n");
2400    else
2401        xmlGenericError(xmlGenericErrorContext,
2402		"closed the encoding handler\n");
2403#endif
2404
2405    return(ret);
2406}
2407
2408/**
2409 * xmlByteConsumed:
2410 * @ctxt: an XML parser context
2411 *
2412 * This function provides the current index of the parser relative
2413 * to the start of the current entity. This function is computed in
2414 * bytes from the beginning starting at zero and finishing at the
2415 * size in byte of the file if parsing a file. The function is
2416 * of constant cost if the input is UTF-8 but can be costly if run
2417 * on non-UTF-8 input.
2418 *
2419 * Returns the index in bytes from the beginning of the entity or -1
2420 *         in case the index could not be computed.
2421 */
2422long
2423xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2424    xmlParserInputPtr in;
2425
2426    if (ctxt == NULL) return(-1);
2427    in = ctxt->input;
2428    if (in == NULL)  return(-1);
2429    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2430        unsigned int unused = 0;
2431	xmlCharEncodingHandler * handler = in->buf->encoder;
2432        /*
2433	 * Encoding conversion, compute the number of unused original
2434	 * bytes from the input not consumed and substract that from
2435	 * the raw consumed value, this is not a cheap operation
2436	 */
2437        if (in->end - in->cur > 0) {
2438	    unsigned char convbuf[32000];
2439	    const unsigned char *cur = (const unsigned char *)in->cur;
2440	    int toconv = in->end - in->cur, written = 32000;
2441
2442	    int ret;
2443
2444	    if (handler->output != NULL) {
2445	        do {
2446		    toconv = in->end - cur;
2447		    written = 32000;
2448		    ret = handler->output(&convbuf[0], &written,
2449				      cur, &toconv);
2450		    if (ret == -1) return(-1);
2451		    unused += written;
2452		    cur += toconv;
2453		} while (ret == -2);
2454#ifdef LIBXML_ICONV_ENABLED
2455	    } else if (handler->iconv_out != NULL) {
2456	        do {
2457		    toconv = in->end - cur;
2458		    written = 32000;
2459		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2460	                      &written, cur, &toconv);
2461		    if (ret < 0) {
2462		        if (written > 0)
2463			    ret = -2;
2464			else
2465			    return(-1);
2466		    }
2467		    unused += written;
2468		    cur += toconv;
2469		} while (ret == -2);
2470#endif
2471#ifdef LIBXML_ICU_ENABLED
2472	    } else if (handler->uconv_out != NULL) {
2473	        do {
2474		    toconv = in->end - cur;
2475		    written = 32000;
2476		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2477	                      &written, cur, &toconv);
2478		    if (ret < 0) {
2479		        if (written > 0)
2480			    ret = -2;
2481			else
2482			    return(-1);
2483		    }
2484		    unused += written;
2485		    cur += toconv;
2486		} while (ret == -2);
2487            } else {
2488	        /* could not find a converter */
2489	        return(-1);
2490	    }
2491	}
2492	if (in->buf->rawconsumed < unused)
2493	    return(-1);
2494	return(in->buf->rawconsumed - unused);
2495    }
2496    return(in->consumed + (in->cur - in->base));
2497}
2498#endif
2499
2500#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2501#ifdef LIBXML_ISO8859X_ENABLED
2502
2503/**
2504 * UTF8ToISO8859x:
2505 * @out:  a pointer to an array of bytes to store the result
2506 * @outlen:  the length of @out
2507 * @in:  a pointer to an array of UTF-8 chars
2508 * @inlen:  the length of @in
2509 * @xlattable: the 2-level transcoding table
2510 *
2511 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2512 * block of chars out.
2513 *
2514 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2515 * The value of @inlen after return is the number of octets consumed
2516 *     as the return value is positive, else unpredictable.
2517 * The value of @outlen after return is the number of ocetes consumed.
2518 */
2519static int
2520UTF8ToISO8859x(unsigned char* out, int *outlen,
2521              const unsigned char* in, int *inlen,
2522              unsigned char const *xlattable) {
2523    const unsigned char* outstart = out;
2524    const unsigned char* inend;
2525    const unsigned char* instart = in;
2526
2527    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2528        (xlattable == NULL))
2529	return(-1);
2530    if (in == NULL) {
2531        /*
2532        * initialization nothing to do
2533        */
2534        *outlen = 0;
2535        *inlen = 0;
2536        return(0);
2537    }
2538    inend = in + (*inlen);
2539    while (in < inend) {
2540        unsigned char d = *in++;
2541        if  (d < 0x80)  {
2542            *out++ = d;
2543        } else if (d < 0xC0) {
2544            /* trailing byte in leading position */
2545            *outlen = out - outstart;
2546            *inlen = in - instart - 1;
2547            return(-2);
2548        } else if (d < 0xE0) {
2549            unsigned char c;
2550            if (!(in < inend)) {
2551                /* trailing byte not in input buffer */
2552                *outlen = out - outstart;
2553                *inlen = in - instart - 1;
2554                return(-2);
2555            }
2556            c = *in++;
2557            if ((c & 0xC0) != 0x80) {
2558                /* not a trailing byte */
2559                *outlen = out - outstart;
2560                *inlen = in - instart - 2;
2561                return(-2);
2562            }
2563            c = c & 0x3F;
2564            d = d & 0x1F;
2565            d = xlattable [48 + c + xlattable [d] * 64];
2566            if (d == 0) {
2567                /* not in character set */
2568                *outlen = out - outstart;
2569                *inlen = in - instart - 2;
2570                return(-2);
2571            }
2572            *out++ = d;
2573        } else if (d < 0xF0) {
2574            unsigned char c1;
2575            unsigned char c2;
2576            if (!(in < inend - 1)) {
2577                /* trailing bytes not in input buffer */
2578                *outlen = out - outstart;
2579                *inlen = in - instart - 1;
2580                return(-2);
2581            }
2582            c1 = *in++;
2583            if ((c1 & 0xC0) != 0x80) {
2584                /* not a trailing byte (c1) */
2585                *outlen = out - outstart;
2586                *inlen = in - instart - 2;
2587                return(-2);
2588            }
2589            c2 = *in++;
2590            if ((c2 & 0xC0) != 0x80) {
2591                /* not a trailing byte (c2) */
2592                *outlen = out - outstart;
2593                *inlen = in - instart - 2;
2594                return(-2);
2595            }
2596            c1 = c1 & 0x3F;
2597            c2 = c2 & 0x3F;
2598	    d = d & 0x0F;
2599	    d = xlattable [48 + c2 + xlattable [48 + c1 +
2600	    		xlattable [32 + d] * 64] * 64];
2601            if (d == 0) {
2602                /* not in character set */
2603                *outlen = out - outstart;
2604                *inlen = in - instart - 3;
2605                return(-2);
2606            }
2607            *out++ = d;
2608        } else {
2609            /* cannot transcode >= U+010000 */
2610            *outlen = out - outstart;
2611            *inlen = in - instart - 1;
2612            return(-2);
2613        }
2614    }
2615    *outlen = out - outstart;
2616    *inlen = in - instart;
2617    return(*outlen);
2618}
2619
2620/**
2621 * ISO8859xToUTF8
2622 * @out:  a pointer to an array of bytes to store the result
2623 * @outlen:  the length of @out
2624 * @in:  a pointer to an array of ISO Latin 1 chars
2625 * @inlen:  the length of @in
2626 *
2627 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2628 * block of chars out.
2629 * Returns 0 if success, or -1 otherwise
2630 * The value of @inlen after return is the number of octets consumed
2631 * The value of @outlen after return is the number of ocetes produced.
2632 */
2633static int
2634ISO8859xToUTF8(unsigned char* out, int *outlen,
2635              const unsigned char* in, int *inlen,
2636              unsigned short const *unicodetable) {
2637    unsigned char* outstart = out;
2638    unsigned char* outend;
2639    const unsigned char* instart = in;
2640    const unsigned char* inend;
2641    const unsigned char* instop;
2642    unsigned int c;
2643
2644    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2645        (in == NULL) || (unicodetable == NULL))
2646	return(-1);
2647    outend = out + *outlen;
2648    inend = in + *inlen;
2649    instop = inend;
2650    c = *in;
2651    while (in < inend && out < outend - 1) {
2652        if (c >= 0x80) {
2653            c = unicodetable [c - 0x80];
2654            if (c == 0) {
2655                /* undefined code point */
2656                *outlen = out - outstart;
2657                *inlen = in - instart;
2658                return (-1);
2659            }
2660            if (c < 0x800) {
2661                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2662                *out++ = (c & 0x3F) | 0x80;
2663            } else {
2664                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2665                *out++ = ((c >>  6) & 0x3F) | 0x80;
2666                *out++ = (c & 0x3F) | 0x80;
2667            }
2668            ++in;
2669            c = *in;
2670        }
2671        if (instop - in > outend - out) instop = in + (outend - out);
2672        while (c < 0x80 && in < instop) {
2673            *out++ =  c;
2674            ++in;
2675            c = *in;
2676        }
2677    }
2678    if (in < inend && out < outend && c < 0x80) {
2679        *out++ =  c;
2680        ++in;
2681    }
2682    *outlen = out - outstart;
2683    *inlen = in - instart;
2684    return (*outlen);
2685}
2686
2687
2688/************************************************************************
2689 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2690 ************************************************************************/
2691
2692static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2693    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2694    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2695    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2696    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2697    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2698    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2699    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2700    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2701    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2702    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2703    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2704    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2705    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2706    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2707    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2708    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2709};
2710
2711static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2712    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2713    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2714    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2715    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2716    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2717    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2718    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2719    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2720    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2721    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2722    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2723    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2724    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2725    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2727    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2728    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2729    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2732    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2733    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2734    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2735    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2736    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2737    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2738    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2739};
2740
2741static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2742    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2743    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2744    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2745    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2746    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2747    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2748    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2749    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2750    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2751    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2752    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2753    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2754    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2755    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2756    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2757    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2758};
2759
2760static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2761    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2762    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2767    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2768    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2769    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2770    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2771    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2772    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2773    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2774    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2775    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2776    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2777    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2778    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2784    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2786    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2787    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2788    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2789    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2790    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2791    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2792};
2793
2794static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2795    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2796    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2797    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2798    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2799    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2800    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2801    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2802    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2803    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2804    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2805    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2806    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2807    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2808    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2809    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2810    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2811};
2812
2813static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2814    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2815    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2816    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2817    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2818    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2819    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2820    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2821    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2822    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2823    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2824    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2825    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2826    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2827    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2828    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2829    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2830    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2831    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2832    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2833    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2834    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2835    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2838    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2839    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2840    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2841};
2842
2843static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2844    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2845    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2846    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2847    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2848    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2849    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2850    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2851    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2852    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2853    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2854    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2855    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2856    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2857    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2858    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2859    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2860};
2861
2862static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2863    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2868    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2869    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2870    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2871    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2872    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2873    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2875    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2876    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2877    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2878    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2879    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2880    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2884    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2885    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890};
2891
2892static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2893    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2894    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2895    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2896    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2897    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2898    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2899    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2900    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2901    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2902    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2903    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2904    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2905    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2906    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2907    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2908    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2909};
2910
2911static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2912    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2914    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2919    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2920    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2921    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2922    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2927    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2928    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2929    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2930    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2931    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2932    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935};
2936
2937static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2938    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2939    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2940    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2941    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2942    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2943    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2944    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2945    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2946    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2947    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2948    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2949    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2950    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2951    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2952    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2953    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2954};
2955
2956static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2957    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2958    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2965    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2966    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2967    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2968    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2972    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2973    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2974    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2981    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2982    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2983    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2984    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2985    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988};
2989
2990static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2991    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2992    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2993    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2994    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2995    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2996    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2997    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2998    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2999    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3000    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3001    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3002    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3003    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3004    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3005    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3006    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3007};
3008
3009static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3010    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3012    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3018    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3019    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3020    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3021    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3025    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3026    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3027    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3029    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3034    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3035    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3039    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3040    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3041};
3042
3043static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3044    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3045    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3046    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3047    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3048    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3049    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3050    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3051    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3052    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3053    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3054    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3055    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3056    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3057    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3058    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3059    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3060};
3061
3062static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3063    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3071    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3072    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3073    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3074    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3075    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3076    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3077    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3078    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3079    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3080    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3081    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3082    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3083    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3084    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086};
3087
3088static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3089    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3090    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3091    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3092    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3093    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3094    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3095    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3096    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3097    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3098    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3099    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3100    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3101    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3102    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3103    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3104    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3105};
3106
3107static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3108    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3116    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3117    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3118    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3119    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3120    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3121    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3122    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3123    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3124    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3126    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3127    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3136    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3137    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3138    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3139};
3140
3141static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3142    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3143    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3144    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3145    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3146    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3147    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3148    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3149    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3150    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3151    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3152    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3153    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3154    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3155    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3156    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3157    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3158};
3159
3160static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3161    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3169    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3170    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3176    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3177    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3178    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3179    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3180    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3185    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3186    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188};
3189
3190static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3191    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3192    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3193    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3194    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3195    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3196    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3197    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3198    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3199    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3200    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3201    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3202    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3203    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3204    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3205    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3206    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3207};
3208
3209static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3210    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3218    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3219    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3220    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3221    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3227    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3230    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3231    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3232    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3233    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3234    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3235    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3236    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3237    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3238    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3239    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3240    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3241};
3242
3243static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3244    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3245    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3246    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3247    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3248    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3249    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3250    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3251    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3252    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3253    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3254    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3255    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3256    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3257    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3258    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3259    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3260};
3261
3262static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3263    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3271    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3272    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3273    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3278    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3279    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3280    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3283    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3298    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3300    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3301    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3304    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3306};
3307
3308static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3309    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3310    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3311    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3312    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3313    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3314    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3315    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3316    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3317    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3318    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3319    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3320    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3321    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3322    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3323    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3324    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3325};
3326
3327static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3328    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3336    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3337    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3338    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3339    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3346    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3351    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3352    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3353    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3354    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3355};
3356
3357static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3358    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3359    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3360    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3361    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3362    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3363    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3364    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3365    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3366    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3367    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3368    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3369    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3370    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3371    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3372    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3373    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3374};
3375
3376static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3377    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3378    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3385    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3386    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3387    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3388    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3389    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3390    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3394    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3396    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3400    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3403    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3406    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3410    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3413    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3414    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3415    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3416};
3417
3418
3419/*
3420 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3421 */
3422
3423static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3424    const unsigned char* in, int *inlen) {
3425    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3426}
3427static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3428    const unsigned char* in, int *inlen) {
3429    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3430}
3431
3432static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3433    const unsigned char* in, int *inlen) {
3434    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3435}
3436static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3437    const unsigned char* in, int *inlen) {
3438    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3439}
3440
3441static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3442    const unsigned char* in, int *inlen) {
3443    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3444}
3445static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3446    const unsigned char* in, int *inlen) {
3447    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3448}
3449
3450static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3451    const unsigned char* in, int *inlen) {
3452    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3453}
3454static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3455    const unsigned char* in, int *inlen) {
3456    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3457}
3458
3459static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3460    const unsigned char* in, int *inlen) {
3461    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3462}
3463static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3464    const unsigned char* in, int *inlen) {
3465    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3466}
3467
3468static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3469    const unsigned char* in, int *inlen) {
3470    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3471}
3472static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3473    const unsigned char* in, int *inlen) {
3474    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3475}
3476
3477static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3478    const unsigned char* in, int *inlen) {
3479    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3480}
3481static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3482    const unsigned char* in, int *inlen) {
3483    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3484}
3485
3486static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3487    const unsigned char* in, int *inlen) {
3488    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3489}
3490static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3491    const unsigned char* in, int *inlen) {
3492    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3493}
3494
3495static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3496    const unsigned char* in, int *inlen) {
3497    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3498}
3499static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3500    const unsigned char* in, int *inlen) {
3501    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3502}
3503
3504static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3505    const unsigned char* in, int *inlen) {
3506    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3507}
3508static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3509    const unsigned char* in, int *inlen) {
3510    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3511}
3512
3513static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3514    const unsigned char* in, int *inlen) {
3515    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3516}
3517static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3518    const unsigned char* in, int *inlen) {
3519    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3520}
3521
3522static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3523    const unsigned char* in, int *inlen) {
3524    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3525}
3526static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3527    const unsigned char* in, int *inlen) {
3528    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3529}
3530
3531static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3532    const unsigned char* in, int *inlen) {
3533    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3534}
3535static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3536    const unsigned char* in, int *inlen) {
3537    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3538}
3539
3540static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3541    const unsigned char* in, int *inlen) {
3542    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3543}
3544static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3545    const unsigned char* in, int *inlen) {
3546    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3547}
3548
3549static void
3550xmlRegisterCharEncodingHandlersISO8859x (void) {
3551    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3552    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3553    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3554    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3555    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3556    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3557    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3558    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3559    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3560    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3561    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3562    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3563    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3564    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3565}
3566
3567#endif
3568#endif
3569
3570#define bottom_encoding
3571#include "elfgcchack.h"
3572
3573