17eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
27eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   See the file COPYING for copying permission.
37eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
47eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
57eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include <stddef.h>
67eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
77eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef COMPILED_FROM_DSP
87eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "winconfig.h"
97eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#elif defined(MACOS_CLASSIC)
107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "macconfig.h"
117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#elif defined(__amigaos__)
127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "amigaconfig.h"
137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#elif defined(__WATCOMC__)
147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "watcomconfig.h"
157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef HAVE_EXPAT_CONFIG_H
177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include <expat_config.h>
187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif /* ndef COMPILED_FROM_DSP */
207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "expat_external.h"
227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "internal.h"
237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok.h"
247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "nametab.h"
257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_DTD
277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define VTABLE1 \
337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { PREFIX(prologTok), PREFIX(contentTok), \
347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(sameName), \
377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(nameMatchesAscii), \
387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(nameLength), \
397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(skipS), \
407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(getAtts), \
417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(charRefNumber), \
427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(predefinedEntityName), \
437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(updatePosition), \
447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  PREFIX(isPublicId)
457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UCS2_GET_NAMING(pages, hi, lo) \
497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* A 2 byte UTF-8 representation splits the characters 11 bits between
527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   pages, 3 bits to add to that index and 5 bits to generate the mask.
547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UTF8_GET_NAMING2(pages, byte) \
567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                      + ((((byte)[0]) & 3) << 1) \
587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                      + ((((byte)[1]) >> 5) & 1)] \
597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         & (1 << (((byte)[1]) & 0x1F)))
607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* A 3 byte UTF-8 representation splits the characters 16 bits between
627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   into pages, 3 bits to add to that index and 5 bits to generate the
647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   mask.
657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UTF8_GET_NAMING3(pages, byte) \
677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                             + ((((byte)[1]) >> 2) & 0xF)] \
697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                       << 3) \
707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                      + ((((byte)[1]) & 3) << 1) \
717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                      + ((((byte)[2]) >> 5) & 1)] \
727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         & (1 << (((byte)[2]) & 0x1F)))
737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UTF8_GET_NAMING(pages, p, n) \
757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ((n) == 2 \
767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  : ((n) == 3 \
787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel     : 0))
807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* Detection of invalid UTF-8 sequences is based on Table 3.1B
827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   with the additional restriction of not allowing the Unicode
847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   Implementation details:
867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel     (A & 0x80) == 0     means A < 0x80
877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   and
887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel     (A & 0xC0) == 0xC0  means A > 0xBF
897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UTF8_INVALID2(p) \
927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UTF8_INVALID3(p) \
957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  (((p)[2] & 0x80) == 0 \
967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  || \
977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ((*p) == 0xEF && (p)[1] == 0xBF \
987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ? \
997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    (p)[2] > 0xBD \
1007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    : \
1017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((p)[2] & 0xC0) == 0xC0) \
1027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  || \
1037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ((*p) == 0xE0 \
1047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ? \
1057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
1067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    : \
1077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((p)[1] & 0x80) == 0 \
1087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    || \
1097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
1107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define UTF8_INVALID4(p) \
1127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
1137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  || \
1147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
1157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  || \
1167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ((*p) == 0xF0 \
1177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ? \
1187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
1197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    : \
1207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((p)[1] & 0x80) == 0 \
1217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    || \
1227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
1237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielisNever(const ENCODING *enc, const char *p)
1267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 0;
1287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isName2(const ENCODING *enc, const char *p)
1327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
1347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isName3(const ENCODING *enc, const char *p)
1387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
1407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define utf8_isName4 isNever
1437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isNmstrt2(const ENCODING *enc, const char *p)
1467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
1487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isNmstrt3(const ENCODING *enc, const char *p)
1527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
1547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define utf8_isNmstrt4 isNever
1577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isInvalid2(const ENCODING *enc, const char *p)
1607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_INVALID2((const unsigned char *)p);
1627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isInvalid3(const ENCODING *enc, const char *p)
1667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_INVALID3((const unsigned char *)p);
1687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
1717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_isInvalid4(const ENCODING *enc, const char *p)
1727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
1737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UTF8_INVALID4((const unsigned char *)p);
1747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
1757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstruct normal_encoding {
1777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ENCODING enc;
1787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  unsigned char type[256];
1797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
1807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
1817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
1827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
1837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
1847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
1857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif /* XML_MIN_SIZE */
1867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
1877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
1887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
1897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
1907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
1917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
1927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
1937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
1947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
1957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
1967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define AS_NORMAL_ENCODING(enc)   ((const struct normal_encoding *) (enc))
1987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define STANDARD_VTABLE(E) \
2027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## byteType, \
2037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isNameMin, \
2047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isNmstrtMin, \
2057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## byteToAscii, \
2067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## charMatches,
2077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
2097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define STANDARD_VTABLE(E) /* as nothing */
2117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define NORMAL_VTABLE(E) \
2157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isName2, \
2167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isName3, \
2177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isName4, \
2187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isNmstrt2, \
2197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isNmstrt3, \
2207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isNmstrt4, \
2217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isInvalid2, \
2227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isInvalid3, \
2237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel E ## isInvalid4
2247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int FASTCALL checkCharRefNumber(int);
2267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok_impl.h"
2287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "ascii.h"
2297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define sb_isNameMin isNever
2327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define sb_isNmstrtMin isNever
2337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define MINBPC(enc) ((enc)->minBytesPerChar)
2377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
2387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* minimum bytes per character */
2397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define MINBPC(enc) 1
2407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SB_BYTE_TYPE(enc, p) \
2437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
2447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
2477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielsb_byteType(const ENCODING *enc, const char *p)
2487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
2497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return SB_BYTE_TYPE(enc, p);
2507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
2517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TYPE(enc, p) \
2527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
2537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
2547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
2557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TO_ASCII(enc, p) \
2597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
2607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
2617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielsb_byteToAscii(const ENCODING *enc, const char *p)
2627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
2637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return *p;
2647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
2657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
2667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TO_ASCII(enc, p) (*(p))
2677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR(enc, p, n) \
2707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
2717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR(enc, p, n) \
2727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
2737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_INVALID_CHAR(enc, p, n) \
2747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
2757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR_MINBPC(enc, p) \
2787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
2797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
2807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
2817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
2827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR_MINBPC(enc, p) (0)
2837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
2847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
2877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define CHAR_MATCHES(enc, p, c) \
2887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
2897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRCALL
2907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielsb_charMatches(const ENCODING *enc, const char *p, int c)
2917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
2927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return *p == c;
2937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
2947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
2957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* c is an ASCII character */
2967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define CHAR_MATCHES(enc, p, c) (*(p) == c)
2977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
2987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
2997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define PREFIX(ident) normal_ ## ident
3007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define XML_TOK_IMPL_C
3017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok_impl.c"
3027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef XML_TOK_IMPL_C
3037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
3047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef MINBPC
3057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BYTE_TYPE
3067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BYTE_TO_ASCII
3077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef CHAR_MATCHES
3087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NAME_CHAR
3097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NAME_CHAR_MINBPC
3107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NMSTRT_CHAR
3117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NMSTRT_CHAR_MINBPC
3127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_INVALID_CHAR
3137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
3147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielenum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
3157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF8_cval1 = 0x00,
3167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF8_cval2 = 0xc0,
3177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF8_cval3 = 0xe0,
3187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF8_cval4 = 0xf0
3197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
3207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
3217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
3227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_toUtf8(const ENCODING *enc,
3237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            const char **fromP, const char *fromLim,
3247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            char **toP, const char *toLim)
3257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
3267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  char *to;
3277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const char *from;
3287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (fromLim - *fromP > toLim - *toP) {
3297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* Avoid copying partial characters. */
3307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
3317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
3327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
3337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
3347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (to = *toP, from = *fromP; from != fromLim; from++, to++)
3357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *to = *from;
3367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *fromP = from;
3377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *toP = to;
3387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
3397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
3407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
3417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielutf8_toUtf16(const ENCODING *enc,
3427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel             const char **fromP, const char *fromLim,
3437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel             unsigned short **toP, const unsigned short *toLim)
3447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
3457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  unsigned short *to = *toP;
3467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const char *from = *fromP;
3477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  while (from != fromLim && to != toLim) {
3487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
3497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case BT_LEAD2:
3507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
3517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      from += 2;
3527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
3537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case BT_LEAD3:
3547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *to++ = (unsigned short)(((from[0] & 0xf) << 12)
3557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                               | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
3567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      from += 3;
3577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
3587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case BT_LEAD4:
3597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      {
3607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        unsigned long n;
3617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        if (to + 1 == toLim)
3627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          goto after;
3637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
3647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
3657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        n -= 0x10000;
3667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        to[0] = (unsigned short)((n >> 10) | 0xD800);
3677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
3687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        to += 2;
3697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        from += 4;
3707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
3717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
3727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    default:
3737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *to++ = *from++;
3747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
3757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
3767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
3777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielafter:
3787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *fromP = from;
3797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *toP = to;
3807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
3817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
3827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
3837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding utf8_encoding_ns = {
3847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
3857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
3867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
3877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "utf8tab.h"
3887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
3897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
3907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
3917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
3927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
3937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding utf8_encoding = {
3947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
3957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
3967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
3977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
3987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
3997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "utf8tab.h"
4007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
4017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
4027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
4037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
4057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding internal_utf8_encoding_ns = {
4077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
4087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
4097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "iasciitab.h"
4107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "utf8tab.h"
4117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
4127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
4137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
4147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
4167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding internal_utf8_encoding = {
4187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
4197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
4207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
4217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "iasciitab.h"
4227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
4237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "utf8tab.h"
4247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
4257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
4267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
4277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
4297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellatin1_toUtf8(const ENCODING *enc,
4307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel              const char **fromP, const char *fromLim,
4317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel              char **toP, const char *toLim)
4327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
4337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (;;) {
4347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    unsigned char c;
4357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (*fromP == fromLim)
4367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
4377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    c = (unsigned char)**fromP;
4387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c & 0x80) {
4397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (toLim - *toP < 2)
4407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
4417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
4427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = (char)((c & 0x3f) | 0x80);
4437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      (*fromP)++;
4447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
4457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else {
4467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (*toP == toLim)
4477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
4487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = *(*fromP)++;
4497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
4507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
4517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
4527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
4547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellatin1_toUtf16(const ENCODING *enc,
4557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char **fromP, const char *fromLim,
4567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               unsigned short **toP, const unsigned short *toLim)
4577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
4587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  while (*fromP != fromLim && *toP != toLim)
4597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *(*toP)++ = (unsigned char)*(*fromP)++;
4607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
4617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
4637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding latin1_encoding_ns = {
4657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
4667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
4677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
4687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
4697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
4707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_)
4717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
4727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
4747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding latin1_encoding = {
4767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
4777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
4787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
4797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
4807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
4817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
4827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
4837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_)
4847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
4857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
4877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielascii_toUtf8(const ENCODING *enc,
4887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel             const char **fromP, const char *fromLim,
4897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel             char **toP, const char *toLim)
4907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
4917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  while (*fromP != fromLim && *toP != toLim)
4927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *(*toP)++ = *(*fromP)++;
4937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
4947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
4967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
4977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding ascii_encoding_ns = {
4987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
4997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
5007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
5017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* BT_NONXML == 0 */
5027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
5037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_)
5047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
5057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
5067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
5077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
5087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding ascii_encoding = {
5097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
5107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
5117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
5127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
5137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
5147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* BT_NONXML == 0 */
5157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
5167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(sb_)
5177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
5187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
5197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
5207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielunicode_byte_type(char hi, char lo)
5217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
5227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  switch ((unsigned char)hi) {
5237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
5247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return BT_LEAD4;
5257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
5267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return BT_TRAIL;
5277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xFF:
5287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    switch ((unsigned char)lo) {
5297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xFF:
5307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xFE:
5317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return BT_NONXML;
5327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
5337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    break;
5347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
5357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return BT_NONASCII;
5367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
5377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
5387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define DEFINE_UTF16_TO_UTF8(E) \
5397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void  PTRCALL \
5407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielE ## toUtf8(const ENCODING *enc, \
5417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            const char **fromP, const char *fromLim, \
5427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            char **toP, const char *toLim) \
5437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{ \
5447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const char *from; \
5457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (from = *fromP; from != fromLim; from += 2) { \
5467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    int plane; \
5477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    unsigned char lo2; \
5487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    unsigned char lo = GET_LO(from); \
5497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    unsigned char hi = GET_HI(from); \
5507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    switch (hi) { \
5517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0: \
5527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (lo < 0x80) { \
5537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        if (*toP == toLim) { \
5547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          *fromP = from; \
5557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          return; \
5567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        } \
5577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *(*toP)++ = lo; \
5587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break; \
5597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      } \
5607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* fall through */ \
5617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0x1: case 0x2: case 0x3: \
5627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0x4: case 0x5: case 0x6: case 0x7: \
5637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (toLim -  *toP < 2) { \
5647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *fromP = from; \
5657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return; \
5667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      } \
5677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
5687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = ((lo & 0x3f) | 0x80); \
5697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break; \
5707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    default: \
5717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (toLim -  *toP < 3)  { \
5727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *fromP = from; \
5737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return; \
5747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      } \
5757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
5767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
5777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
5787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = ((lo & 0x3f) | 0x80); \
5797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break; \
5807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
5817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (toLim -  *toP < 4) { \
5827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *fromP = from; \
5837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return; \
5847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      } \
5857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
5867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
5877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
5887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      from += 2; \
5897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      lo2 = GET_LO(from); \
5907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = (((lo & 0x3) << 4) \
5917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                   | ((GET_HI(from) & 0x3) << 2) \
5927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                   | (lo2 >> 6) \
5937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                   | 0x80); \
5947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
5957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break; \
5967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    } \
5977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  } \
5987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *fromP = from; \
5997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define DEFINE_UTF16_TO_UTF16(E) \
6027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void  PTRCALL \
6037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielE ## toUtf16(const ENCODING *enc, \
6047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel             const char **fromP, const char *fromLim, \
6057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel             unsigned short **toP, const unsigned short *toLim) \
6067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{ \
6077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  /* Avoid copying first half only of surrogate */ \
6087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (fromLim - *fromP > ((toLim - *toP) << 1) \
6097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
6107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    fromLim -= 2; \
6117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
6127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
6137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SET2(ptr, ch) \
6167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
6177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define GET_LO(ptr) ((unsigned char)(ptr)[0])
6187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define GET_HI(ptr) ((unsigned char)(ptr)[1])
6197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielDEFINE_UTF16_TO_UTF8(little2_)
6217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielDEFINE_UTF16_TO_UTF16(little2_)
6227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef SET2
6247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef GET_LO
6257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef GET_HI
6267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SET2(ptr, ch) \
6287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
6297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define GET_LO(ptr) ((unsigned char)(ptr)[1])
6307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define GET_HI(ptr) ((unsigned char)(ptr)[0])
6317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielDEFINE_UTF16_TO_UTF8(big2_)
6337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielDEFINE_UTF16_TO_UTF16(big2_)
6347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef SET2
6367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef GET_LO
6377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef GET_HI
6387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define LITTLE2_BYTE_TYPE(enc, p) \
6407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel ((p)[1] == 0 \
6417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
6427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  : unicode_byte_type((p)[1], (p)[0]))
6437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
6447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
6457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
6467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
6477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
6487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
6497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
6517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
6537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellittle2_byteType(const ENCODING *enc, const char *p)
6547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
6557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return LITTLE2_BYTE_TYPE(enc, p);
6567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
6597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellittle2_byteToAscii(const ENCODING *enc, const char *p)
6607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
6617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return LITTLE2_BYTE_TO_ASCII(enc, p);
6627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRCALL
6657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellittle2_charMatches(const ENCODING *enc, const char *p, int c)
6667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
6677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return LITTLE2_CHAR_MATCHES(enc, p, c);
6687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
6717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellittle2_isNameMin(const ENCODING *enc, const char *p)
6727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
6737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
6747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
6777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniellittle2_isNmstrtMin(const ENCODING *enc, const char *p)
6787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
6797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
6807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
6817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef VTABLE
6837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
6847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else /* not XML_MIN_SIZE */
6867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef PREFIX
6887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define PREFIX(ident) little2_ ## ident
6897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define MINBPC(enc) 2
6907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
6917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
6927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
6937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
6947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR(enc, p, n) 0
6957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
6967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR(enc, p, n) (0)
6977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
6987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
6997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define XML_TOK_IMPL_C
7007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok_impl.c"
7017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef XML_TOK_IMPL_C
7027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef MINBPC
7047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BYTE_TYPE
7057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BYTE_TO_ASCII
7067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef CHAR_MATCHES
7077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NAME_CHAR
7087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NAME_CHAR_MINBPC
7097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NMSTRT_CHAR
7107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NMSTRT_CHAR_MINBPC
7117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_INVALID_CHAR
7127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif /* not XML_MIN_SIZE */
7147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
7167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding little2_encoding_ns = {
7187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0,
7197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#if BYTEORDER == 1234
7207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    1
7217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
7227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    0
7237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
7247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
7257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
7267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
7277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
7287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
7297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(little2_)
7307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
7317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
7337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding little2_encoding = {
7357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0,
7367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#if BYTEORDER == 1234
7377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    1
7387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
7397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    0
7407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
7417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
7427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
7437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
7447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
7457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
7467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
7477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
7487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(little2_)
7497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
7507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#if BYTEORDER != 4321
7527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
7547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding internal_little2_encoding_ns = {
7567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0, 1 },
7577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
7587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "iasciitab.h"
7597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
7607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
7617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(little2_)
7627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
7637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
7657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding internal_little2_encoding = {
7677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0, 1 },
7687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
7697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
7707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "iasciitab.h"
7717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
7727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
7737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
7747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(little2_)
7757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
7767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
7787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BIG2_BYTE_TYPE(enc, p) \
7817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel ((p)[0] == 0 \
7827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
7837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  : unicode_byte_type((p)[0], (p)[1]))
7847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
7857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
7867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
7877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
7887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
7897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
7907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_MIN_SIZE
7927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
7947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielbig2_byteType(const ENCODING *enc, const char *p)
7957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
7967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return BIG2_BYTE_TYPE(enc, p);
7977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
7987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
7997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
8007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielbig2_byteToAscii(const ENCODING *enc, const char *p)
8017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
8027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return BIG2_BYTE_TO_ASCII(enc, p);
8037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
8047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRCALL
8067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielbig2_charMatches(const ENCODING *enc, const char *p, int c)
8077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
8087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return BIG2_CHAR_MATCHES(enc, p, c);
8097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
8107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
8127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielbig2_isNameMin(const ENCODING *enc, const char *p)
8137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
8147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
8157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
8167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
8187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielbig2_isNmstrtMin(const ENCODING *enc, const char *p)
8197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
8207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
8217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
8227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef VTABLE
8247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
8257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else /* not XML_MIN_SIZE */
8277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef PREFIX
8297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define PREFIX(ident) big2_ ## ident
8307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define MINBPC(enc) 2
8317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
8327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
8337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
8347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
8357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR(enc, p, n) 0
8367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
8377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR(enc, p, n) (0)
8387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
8397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define XML_TOK_IMPL_C
8417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok_impl.c"
8427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef XML_TOK_IMPL_C
8437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef MINBPC
8457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BYTE_TYPE
8467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BYTE_TO_ASCII
8477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef CHAR_MATCHES
8487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NAME_CHAR
8497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NAME_CHAR_MINBPC
8507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NMSTRT_CHAR
8517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_NMSTRT_CHAR_MINBPC
8527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef IS_INVALID_CHAR
8537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif /* not XML_MIN_SIZE */
8557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
8577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding big2_encoding_ns = {
8597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0,
8607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#if BYTEORDER == 4321
8617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  1
8627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
8637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  0
8647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
8657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
8667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
8677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
8687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
8697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
8707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(big2_)
8717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
8727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
8747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding big2_encoding = {
8767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0,
8777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#if BYTEORDER == 4321
8787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  1
8797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
8807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  0
8817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
8827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
8837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
8847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
8857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "asciitab.h"
8867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
8877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
8887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
8897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(big2_)
8907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
8917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#if BYTEORDER != 1234
8937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
8957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
8967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding internal_big2_encoding_ns = {
8977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0, 1 },
8987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
8997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "iasciitab.h"
9007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
9017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
9027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(big2_)
9037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
9047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
9067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const struct normal_encoding internal_big2_encoding = {
9087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  { VTABLE, 2, 0, 1 },
9097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  {
9107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define BT_COLON BT_NMSTRT
9117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "iasciitab.h"
9127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef BT_COLON
9137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "latin1tab.h"
9147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  },
9157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  STANDARD_VTABLE(big2_)
9167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
9177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
9197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef PREFIX
9217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int FASTCALL
9237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstreqci(const char *s1, const char *s2)
9247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
9257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (;;) {
9267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    char c1 = *s1++;
9277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    char c2 = *s2++;
9287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (ASCII_a <= c1 && c1 <= ASCII_z)
9297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      c1 += ASCII_A - ASCII_a;
9307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (ASCII_a <= c2 && c2 <= ASCII_z)
9317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      c2 += ASCII_A - ASCII_a;
9327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c1 != c2)
9337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
9347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!c1)
9357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
9367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
9377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 1;
9387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
9397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
9417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielinitUpdatePosition(const ENCODING *enc, const char *ptr,
9427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                   const char *end, POSITION *pos)
9437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
9447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
9457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
9467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int
9487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltoAscii(const ENCODING *enc, const char *ptr, const char *end)
9497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
9507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  char buf[1];
9517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  char *p = buf;
9527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
9537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (p == buf)
9547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return -1;
9557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  else
9567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return buf[0];
9577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
9587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int FASTCALL
9607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielisSpace(int c)
9617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
9627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  switch (c) {
9637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0x20:
9647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xD:
9657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xA:
9667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0x9:
9677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 1;
9687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
9697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 0;
9707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
9717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
9727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* Return 1 if there's just optional white space or there's an S
9737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   followed by name=val.
9747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
9757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int
9767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielparsePseudoAttribute(const ENCODING *enc,
9777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                     const char *ptr,
9787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                     const char *end,
9797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                     const char **namePtr,
9807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                     const char **nameEndPtr,
9817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                     const char **valPtr,
9827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                     const char **nextTokPtr)
9837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
9847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int c;
9857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  char open;
9867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (ptr == end) {
9877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *namePtr = NULL;
9887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 1;
9897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
9907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (!isSpace(toAscii(enc, ptr, end))) {
9917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *nextTokPtr = ptr;
9927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
9937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
9947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  do {
9957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ptr += enc->minBytesPerChar;
9967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  } while (isSpace(toAscii(enc, ptr, end)));
9977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (ptr == end) {
9987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *namePtr = NULL;
9997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 1;
10007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
10017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *namePtr = ptr;
10027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (;;) {
10037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    c = toAscii(enc, ptr, end);
10047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c == -1) {
10057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *nextTokPtr = ptr;
10067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
10077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
10087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c == ASCII_EQUALS) {
10097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *nameEndPtr = ptr;
10107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
10117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
10127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (isSpace(c)) {
10137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *nameEndPtr = ptr;
10147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      do {
10157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        ptr += enc->minBytesPerChar;
10167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      } while (isSpace(c = toAscii(enc, ptr, end)));
10177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (c != ASCII_EQUALS) {
10187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *nextTokPtr = ptr;
10197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return 0;
10207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
10217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
10227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
10237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ptr += enc->minBytesPerChar;
10247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
10257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (ptr == *namePtr) {
10267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *nextTokPtr = ptr;
10277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
10287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
10297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ptr += enc->minBytesPerChar;
10307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  c = toAscii(enc, ptr, end);
10317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  while (isSpace(c)) {
10327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ptr += enc->minBytesPerChar;
10337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    c = toAscii(enc, ptr, end);
10347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
10357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c != ASCII_QUOT && c != ASCII_APOS) {
10367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *nextTokPtr = ptr;
10377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
10387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
10397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  open = (char)c;
10407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ptr += enc->minBytesPerChar;
10417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *valPtr = ptr;
10427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (;; ptr += enc->minBytesPerChar) {
10437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    c = toAscii(enc, ptr, end);
10447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c == open)
10457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
10467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!(ASCII_a <= c && c <= ASCII_z)
10477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && !(ASCII_A <= c && c <= ASCII_Z)
10487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && !(ASCII_0 <= c && c <= ASCII_9)
10497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && c != ASCII_PERIOD
10507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && c != ASCII_MINUS
10517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && c != ASCII_UNDERSCORE) {
10527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *nextTokPtr = ptr;
10537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
10547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
10557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
10567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *nextTokPtr = ptr + enc->minBytesPerChar;
10577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 1;
10587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
10597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
10607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_version[] = {
10617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
10627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
10637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
10647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_encoding[] = {
10657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
10667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
10677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
10687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_standalone[] = {
10697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
10707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_n, ASCII_e, '\0'
10717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
10727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
10737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_yes[] = {
10747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_y, ASCII_e, ASCII_s,  '\0'
10757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
10767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
10777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_no[] = {
10787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_n, ASCII_o,  '\0'
10797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
10807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
10817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int
10827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieldoParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
10837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                                                 const char *,
10847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                                                 const char *),
10857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               int isGeneralTextEntity,
10867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const ENCODING *enc,
10877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char *ptr,
10887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char *end,
10897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char **badPtr,
10907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char **versionPtr,
10917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char **versionEndPtr,
10927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char **encodingName,
10937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const ENCODING **encoding,
10947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               int *standalone)
10957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
10967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const char *val = NULL;
10977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const char *name = NULL;
10987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const char *nameEnd = NULL;
10997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ptr += 5 * enc->minBytesPerChar;
11007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  end -= 2 * enc->minBytesPerChar;
11017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
11027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      || !name) {
11037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *badPtr = ptr;
11047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
11057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
11077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!isGeneralTextEntity) {
11087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *badPtr = name;
11097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
11107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
11117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  else {
11137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (versionPtr)
11147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *versionPtr = val;
11157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (versionEndPtr)
11167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *versionEndPtr = ptr;
11177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
11187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *badPtr = ptr;
11197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
11207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
11217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!name) {
11227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (isGeneralTextEntity) {
11237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        /* a TextDecl must have an EncodingDecl */
11247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *badPtr = ptr;
11257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return 0;
11267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
11277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 1;
11287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
11297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
11317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    int c = toAscii(enc, val, end);
11327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
11337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *badPtr = val;
11347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
11357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
11367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (encodingName)
11377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *encodingName = val;
11387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (encoding)
11397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
11407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
11417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *badPtr = ptr;
11427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
11437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
11447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (!name)
11457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 1;
11467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
11487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      || isGeneralTextEntity) {
11497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *badPtr = name;
11507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
11517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
11537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (standalone)
11547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *standalone = 1;
11557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
11577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (standalone)
11587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *standalone = 0;
11597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  else {
11617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *badPtr = val;
11627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
11637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  while (isSpace(toAscii(enc, ptr, end)))
11657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ptr += enc->minBytesPerChar;
11667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (ptr != end) {
11677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *badPtr = ptr;
11687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
11697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 1;
11717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
11727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
11737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int FASTCALL
11747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielcheckCharRefNumber(int result)
11757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
11767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  switch (result >> 8) {
11777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
11787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
11797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return -1;
11807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0:
11817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (latin1_encoding.type[result] == BT_NONXML)
11827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return -1;
11837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    break;
11847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  case 0xFF:
11857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (result == 0xFFFE || result == 0xFFFF)
11867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return -1;
11877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    break;
11887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
11897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return result;
11907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
11917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
11927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielint FASTCALL
11937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielXmlUtf8Encode(int c, char *buf)
11947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
11957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  enum {
11967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* minN is minimum legal resulting value for N byte sequence */
11977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    min2 = 0x80,
11987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    min3 = 0x800,
11997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    min4 = 0x10000
12007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  };
12017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c < 0)
12037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
12047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c < min2) {
12057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[0] = (char)(c | UTF8_cval1);
12067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 1;
12077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
12087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c < min3) {
12097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[0] = (char)((c >> 6) | UTF8_cval2);
12107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[1] = (char)((c & 0x3f) | 0x80);
12117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 2;
12127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
12137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c < min4) {
12147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[0] = (char)((c >> 12) | UTF8_cval3);
12157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
12167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[2] = (char)((c & 0x3f) | 0x80);
12177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 3;
12187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
12197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c < 0x110000) {
12207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[0] = (char)((c >> 18) | UTF8_cval4);
12217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
12227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
12237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[3] = (char)((c & 0x3f) | 0x80);
12247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 4;
12257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
12267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 0;
12277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
12287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielint FASTCALL
12307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielXmlUtf16Encode(int charNum, unsigned short *buf)
12317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
12327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (charNum < 0)
12337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
12347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (charNum < 0x10000) {
12357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[0] = (unsigned short)charNum;
12367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 1;
12377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
12387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (charNum < 0x110000) {
12397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    charNum -= 0x10000;
12407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
12417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
12427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 2;
12437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
12447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return 0;
12457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
12467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstruct unknown_encoding {
12487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  struct normal_encoding normal;
12497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  CONVERTER convert;
12507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  void *userData;
12517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  unsigned short utf16[256];
12527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  char utf8[256][4];
12537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
12547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define AS_UNKNOWN_ENCODING(enc)  ((const struct unknown_encoding *) (enc))
12567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielint
12587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielXmlSizeOfUnknownEncoding(void)
12597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
12607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return sizeof(struct unknown_encoding);
12617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
12627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
12647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielunknown_isName(const ENCODING *enc, const char *p)
12657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
12667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int c = uenc->convert(uenc->userData, p);
12687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c & ~0xFFFF)
12697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
12707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
12717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
12727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
12747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielunknown_isNmstrt(const ENCODING *enc, const char *p)
12757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
12767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int c = uenc->convert(uenc->userData, p);
12787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (c & ~0xFFFF)
12797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return 0;
12807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
12817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
12827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int PTRFASTCALL
12847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielunknown_isInvalid(const ENCODING *enc, const char *p)
12857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
12867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int c = uenc->convert(uenc->userData, p);
12887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
12897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
12907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
12917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
12927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielunknown_toUtf8(const ENCODING *enc,
12937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               const char **fromP, const char *fromLim,
12947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel               char **toP, const char *toLim)
12957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
12967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  char buf[XML_UTF8_ENCODE_MAX];
12987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (;;) {
12997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    const char *utf8;
13007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    int n;
13017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (*fromP == fromLim)
13027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
13037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    utf8 = uenc->utf8[(unsigned char)**fromP];
13047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    n = *utf8++;
13057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (n == 0) {
13067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      int c = uenc->convert(uenc->userData, *fromP);
13077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      n = XmlUtf8Encode(c, buf);
13087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (n > toLim - *toP)
13097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
13107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      utf8 = buf;
13117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
13127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                 - (BT_LEAD2 - 2));
13137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else {
13157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (n > toLim - *toP)
13167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
13177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      (*fromP)++;
13187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    do {
13207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *(*toP)++ = *utf8++;
13217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    } while (--n != 0);
13227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
13237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
13247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
13257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic void PTRCALL
13267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielunknown_toUtf16(const ENCODING *enc,
13277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                const char **fromP, const char *fromLim,
13287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                unsigned short **toP, const unsigned short *toLim)
13297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
13307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  while (*fromP != fromLim && *toP != toLim) {
13327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    unsigned short c = uenc->utf16[(unsigned char)**fromP];
13337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c == 0) {
13347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      c = (unsigned short)
13357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          uenc->convert(uenc->userData, *fromP);
13367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
13377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                 - (BT_LEAD2 - 2));
13387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else
13407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      (*fromP)++;
13417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    *(*toP)++ = c;
13427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
13437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
13447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
13457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielENCODING *
13467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielXmlInitUnknownEncoding(void *mem,
13477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                       int *table,
13487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                       CONVERTER convert,
13497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                       void *userData)
13507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
13517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int i;
13527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  struct unknown_encoding *e = (struct unknown_encoding *)mem;
13537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
13547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
13557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (i = 0; i < 128; i++)
13567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (latin1_encoding.type[i] != BT_OTHER
13577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && latin1_encoding.type[i] != BT_NONXML
13587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        && table[i] != i)
13597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return 0;
13607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (i = 0; i < 256; i++) {
13617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    int c = table[i];
13627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (c == -1) {
13637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->normal.type[i] = BT_MALFORM;
13647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* This shouldn't really get used. */
13657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf16[i] = 0xFFFF;
13667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][0] = 1;
13677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][1] = 0;
13687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else if (c < 0) {
13707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (c < -4)
13717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return 0;
13727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
13737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][0] = 0;
13747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf16[i] = 0;
13757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else if (c < 0x80) {
13777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (latin1_encoding.type[c] != BT_OTHER
13787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          && latin1_encoding.type[c] != BT_NONXML
13797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          && c != i)
13807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return 0;
13817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->normal.type[i] = latin1_encoding.type[c];
13827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][0] = 1;
13837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][1] = (char)c;
13847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
13857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else if (checkCharRefNumber(c) < 0) {
13877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->normal.type[i] = BT_NONXML;
13887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* This shouldn't really get used. */
13897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf16[i] = 0xFFFF;
13907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][0] = 1;
13917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][1] = 0;
13927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
13937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    else {
13947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (c > 0xFFFF)
13957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return 0;
13967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
13977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        e->normal.type[i] = BT_NMSTRT;
13987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
13997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        e->normal.type[i] = BT_NAME;
14007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      else
14017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        e->normal.type[i] = BT_OTHER;
14027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
14037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      e->utf16[i] = (unsigned short)c;
14047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
14057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
14067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  e->userData = userData;
14077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  e->convert = convert;
14087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (convert) {
14097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isName2 = unknown_isName;
14107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isName3 = unknown_isName;
14117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isName4 = unknown_isName;
14127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isNmstrt2 = unknown_isNmstrt;
14137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isNmstrt3 = unknown_isNmstrt;
14147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isNmstrt4 = unknown_isNmstrt;
14157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isInvalid2 = unknown_isInvalid;
14167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isInvalid3 = unknown_isInvalid;
14177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    e->normal.isInvalid4 = unknown_isInvalid;
14187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
14197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  e->normal.enc.utf8Convert = unknown_toUtf8;
14207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  e->normal.enc.utf16Convert = unknown_toUtf16;
14217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return &(e->normal.enc);
14227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
14237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* If this enumeration is changed, getEncodingIndex and encodings
14257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielmust also be changed. */
14267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielenum {
14277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UNKNOWN_ENC = -1,
14287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ISO_8859_1_ENC = 0,
14297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  US_ASCII_ENC,
14307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF_8_ENC,
14317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF_16_ENC,
14327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF_16BE_ENC,
14337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  UTF_16LE_ENC,
14347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  /* must match encodingNames up to here */
14357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  NO_ENC
14367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_ISO_8859_1[] = {
14397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
14407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_MINUS, ASCII_1, '\0'
14417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_US_ASCII[] = {
14437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
14447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  '\0'
14457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_UTF_8[] =  {
14477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
14487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_UTF_16[] = {
14507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
14517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_UTF_16BE[] = {
14537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
14547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  '\0'
14557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic const char KW_UTF_16LE[] = {
14577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
14587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  '\0'
14597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel};
14607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int FASTCALL
14627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielgetEncodingIndex(const char *name)
14637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
14647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  static const char * const encodingNames[] = {
14657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    KW_ISO_8859_1,
14667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    KW_US_ASCII,
14677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    KW_UTF_8,
14687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    KW_UTF_16,
14697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    KW_UTF_16BE,
14707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    KW_UTF_16LE,
14717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  };
14727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  int i;
14737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (name == NULL)
14747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return NO_ENC;
14757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
14767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (streqci(name, encodingNames[i]))
14777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return i;
14787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return UNKNOWN_ENC;
14797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
14807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* For binary compatibility, we store the index of the encoding
14827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   specified at initialization in the isUtf16 member.
14837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
14847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
14867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
14877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* This is what detects the encoding.  encodingTable maps from
14897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
14907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   the external (protocol) specified encoding; state is
14917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   XML_CONTENT_STATE if we're parsing an external text entity, and
14927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   XML_PROLOG_STATE otherwise.
14937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel*/
14947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
14967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielstatic int
14977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielinitScan(const ENCODING * const *encodingTable,
14987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         const INIT_ENCODING *enc,
14997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         int state,
15007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         const char *ptr,
15017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         const char *end,
15027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         const char **nextTokPtr)
15037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
15047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  const ENCODING **encPtr;
15057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
15067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (ptr == end)
15077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    return XML_TOK_NONE;
15087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  encPtr = enc->encPtr;
15097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (ptr + 1 == end) {
15107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* only a single byte available for auto-detection */
15117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifndef XML_DTD /* FIXME */
15127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* a well-formed document entity must have more than one byte */
15137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    if (state != XML_CONTENT_STATE)
15147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return XML_TOK_PARTIAL;
15157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
15167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* so we're parsing an external text entity... */
15177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* if UTF-16 was externally specified, then we need at least 2 bytes */
15187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    switch (INIT_ENC_INDEX(enc)) {
15197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case UTF_16_ENC:
15207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case UTF_16LE_ENC:
15217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case UTF_16BE_ENC:
15227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return XML_TOK_PARTIAL;
15237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
15247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    switch ((unsigned char)*ptr) {
15257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xFE:
15267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xFF:
15277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xEF: /* possibly first byte of UTF-8 BOM */
15287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
15297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          && state == XML_CONTENT_STATE)
15307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
15317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* fall through */
15327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0x00:
15337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0x3C:
15347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return XML_TOK_PARTIAL;
15357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
15367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
15377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  else {
15387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
15397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xFEFF:
15407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
15417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          && state == XML_CONTENT_STATE)
15427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
15437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *nextTokPtr = ptr + 2;
15447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *encPtr = encodingTable[UTF_16BE_ENC];
15457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return XML_TOK_BOM;
15467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* 00 3C is handled in the default case */
15477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0x3C00:
15487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
15497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           || INIT_ENC_INDEX(enc) == UTF_16_ENC)
15507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          && state == XML_CONTENT_STATE)
15517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
15527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *encPtr = encodingTable[UTF_16LE_ENC];
15537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
15547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xFFFE:
15557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
15567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          && state == XML_CONTENT_STATE)
15577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        break;
15587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *nextTokPtr = ptr + 2;
15597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      *encPtr = encodingTable[UTF_16LE_ENC];
15607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      return XML_TOK_BOM;
15617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    case 0xEFBB:
15627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* Maybe a UTF-8 BOM (EF BB BF) */
15637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      /* If there's an explicitly specified (external) encoding
15647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         of ISO-8859-1 or some flavour of UTF-16
15657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         and this is an external text entity,
15667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         don't look for the BOM,
15677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel         because it might be a legal data.
15687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      */
15697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (state == XML_CONTENT_STATE) {
15707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        int e = INIT_ENC_INDEX(enc);
15717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
15727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            || e == UTF_16LE_ENC || e == UTF_16_ENC)
15737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          break;
15747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
15757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (ptr + 2 == end)
15767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return XML_TOK_PARTIAL;
15777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if ((unsigned char)ptr[2] == 0xBF) {
15787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *nextTokPtr = ptr + 3;
15797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *encPtr = encodingTable[UTF_8_ENC];
15807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return XML_TOK_BOM;
15817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
15827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
15837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    default:
15847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      if (ptr[0] == '\0') {
15857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        /* 0 isn't a legal data character. Furthermore a document
15867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           entity can only start with ASCII characters.  So the only
15877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           way this can fail to be big-endian UTF-16 is if it is an
15887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           external parsed general entity that's labelled as
15897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           UTF-16LE.
15907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        */
15917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
15927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          break;
15937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *encPtr = encodingTable[UTF_16BE_ENC];
15947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
15957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
15967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      else if (ptr[1] == '\0') {
15977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        /* We could recover here in the case:
15987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            - parsing an external entity
15997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            - second byte is 0
16007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            - no externally specified encoding
16017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel            - no encoding declaration
16027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           by assuming UTF-16LE.  But we don't, because this would mean when
16037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           presented just with a single byte, we couldn't reliably determine
16047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel           whether we needed further bytes.
16057eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        */
16067eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        if (state == XML_CONTENT_STATE)
16077eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel          break;
16087eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        *encPtr = encodingTable[UTF_16LE_ENC];
16097eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      }
16117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel      break;
16127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    }
16137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  }
16147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
16157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
16177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define NS(x) x
16207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define ns(x) x
16217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define XML_TOK_NS_C
16227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok_ns.c"
16237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef XML_TOK_NS_C
16247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef NS
16257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef ns
16267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef XML_NS
16287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define NS(x) x ## NS
16307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define ns(x) x ## _ns
16317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define XML_TOK_NS_C
16337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "xmltok_ns.c"
16347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef XML_TOK_NS_C
16357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef NS
16377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#undef ns
16387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielENCODING *
16407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanielXmlInitUnknownEncodingNS(void *mem,
16417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                         int *table,
16427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                         CONVERTER convert,
16437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel                         void *userData)
16447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel{
16457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
16467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  if (enc)
16477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
16487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel  return enc;
16497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel}
16507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
16517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif /* XML_NS */
1652