11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   See the file COPYING for copying permission.
31320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
41320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <stddef.h>
61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef COMPILED_FROM_DSP
81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "winconfig.h"
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#elif defined(MACOS_CLASSIC)
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "macconfig.h"
111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#elif defined(__amigaos__)
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "amigaconfig.h"
131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#elif defined(__WATCOMC__)
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "watcomconfig.h"
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef HAVE_EXPAT_CONFIG_H
171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <expat_config.h>
181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* ndef COMPILED_FROM_DSP */
201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "expat_external.h"
221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "internal.h"
231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok.h"
241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "nametab.h"
251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_DTD
271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE1 \
331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { PREFIX(prologTok), PREFIX(contentTok), \
341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(sameName), \
371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(nameMatchesAscii), \
381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(nameLength), \
391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(skipS), \
401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(getAtts), \
411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(charRefNumber), \
421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(predefinedEntityName), \
431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(updatePosition), \
441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PREFIX(isPublicId)
451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UCS2_GET_NAMING(pages, hi, lo) \
491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* A 2 byte UTF-8 representation splits the characters 11 bits between
521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   pages, 3 bits to add to that index and 5 bits to generate the mask.
541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_GET_NAMING2(pages, byte) \
561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                      + ((((byte)[0]) & 3) << 1) \
581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                      + ((((byte)[1]) >> 5) & 1)] \
591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         & (1 << (((byte)[1]) & 0x1F)))
601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* A 3 byte UTF-8 representation splits the characters 16 bits between
621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   into pages, 3 bits to add to that index and 5 bits to generate the
641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   mask.
651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_GET_NAMING3(pages, byte) \
671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                             + ((((byte)[1]) >> 2) & 0xF)] \
691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                       << 3) \
701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                      + ((((byte)[1]) & 3) << 1) \
711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                      + ((((byte)[2]) >> 5) & 1)] \
721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         & (1 << (((byte)[2]) & 0x1F)))
731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_GET_NAMING(pages, p, n) \
751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ((n) == 2 \
761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  : ((n) == 3 \
781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     : 0))
801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* Detection of invalid UTF-8 sequences is based on Table 3.1B
821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   with the additional restriction of not allowing the Unicode
841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   Implementation details:
861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     (A & 0x80) == 0     means A < 0x80
871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   and
881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     (A & 0xC0) == 0xC0  means A > 0xBF
891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_INVALID2(p) \
921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_INVALID3(p) \
951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  (((p)[2] & 0x80) == 0 \
961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  || \
971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ((*p) == 0xEF && (p)[1] == 0xBF \
981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ? \
991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    (p)[2] > 0xBD \
1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    : \
1011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((p)[2] & 0xC0) == 0xC0) \
1021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  || \
1031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ((*p) == 0xE0 \
1041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ? \
1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
1061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    : \
1071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((p)[1] & 0x80) == 0 \
1081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    || \
1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_INVALID4(p) \
1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  || \
1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  || \
1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ((*p) == 0xF0 \
1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ? \
1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
1191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    : \
1201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((p)[1] & 0x80) == 0 \
1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    || \
1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciisNever(const ENCODING *enc, const char *p)
1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 0;
1281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isName2(const ENCODING *enc, const char *p)
1321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isName3(const ENCODING *enc, const char *p)
1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
1401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define utf8_isName4 isNever
1431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isNmstrt2(const ENCODING *enc, const char *p)
1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isNmstrt3(const ENCODING *enc, const char *p)
1521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define utf8_isNmstrt4 isNever
1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isInvalid2(const ENCODING *enc, const char *p)
1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_INVALID2((const unsigned char *)p);
1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isInvalid3(const ENCODING *enc, const char *p)
1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_INVALID3((const unsigned char *)p);
1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isInvalid4(const ENCODING *enc, const char *p)
1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UTF8_INVALID4((const unsigned char *)p);
1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistruct normal_encoding {
1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ENCODING enc;
1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  unsigned char type[256];
1791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
1801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
1811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
1831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
1851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* XML_MIN_SIZE */
1861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
1871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
1881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
1891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
1901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
1911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
1921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
1931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
1941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
1951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
1961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define AS_NORMAL_ENCODING(enc)   ((const struct normal_encoding *) (enc))
1981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define STANDARD_VTABLE(E) \
2021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## byteType, \
2031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNameMin, \
2041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrtMin, \
2051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## byteToAscii, \
2061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## charMatches,
2071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
2091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define STANDARD_VTABLE(E) /* as nothing */
2111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define NORMAL_VTABLE(E) \
2151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isName2, \
2161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isName3, \
2171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isName4, \
2181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrt2, \
2191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrt3, \
2201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrt4, \
2211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isInvalid2, \
2221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isInvalid3, \
2231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isInvalid4
2241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL checkCharRefNumber(int);
2261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.h"
2281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "ascii.h"
2291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define sb_isNameMin isNever
2321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define sb_isNmstrtMin isNever
2331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) ((enc)->minBytesPerChar)
2371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
2381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* minimum bytes per character */
2391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) 1
2401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SB_BYTE_TYPE(enc, p) \
2431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
2441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
2471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccisb_byteType(const ENCODING *enc, const char *p)
2481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
2491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return SB_BYTE_TYPE(enc, p);
2501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
2511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) \
2521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
2531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
2541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
2551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) \
2591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
2601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
2611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccisb_byteToAscii(const ENCODING *enc, const char *p)
2621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
2631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return *p;
2641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
2651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
2661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) (*(p))
2671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR(enc, p, n) \
2701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
2711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR(enc, p, n) \
2721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
2731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_INVALID_CHAR(enc, p, n) \
2741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
2751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) \
2781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
2791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
2801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
2811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
2821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) (0)
2831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
2841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
2871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) \
2881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
2891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRCALL
2901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccisb_charMatches(const ENCODING *enc, const char *p, int c)
2911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
2921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return *p == c;
2931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
2941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
2951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* c is an ASCII character */
2961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) (*(p) == c)
2971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
2981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define PREFIX(ident) normal_ ## ident
3001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_IMPL_C
3011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.c"
3021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_IMPL_C
3031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef MINBPC
3051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TYPE
3061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TO_ASCII
3071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef CHAR_MATCHES
3081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR
3091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR_MINBPC
3101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR
3111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR_MINBPC
3121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_INVALID_CHAR
3131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccienum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
3151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF8_cval1 = 0x00,
3161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF8_cval2 = 0xc0,
3171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF8_cval3 = 0xe0,
3181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF8_cval4 = 0xf0
3191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
3201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
3221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_toUtf8(const ENCODING *enc,
3231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            const char **fromP, const char *fromLim,
3241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            char **toP, const char *toLim)
3251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
3261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  char *to;
3271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const char *from;
3281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (fromLim - *fromP > toLim - *toP) {
3291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* Avoid copying partial characters. */
3301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
3311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
3321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
3331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
3341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (to = *toP, from = *fromP; from != fromLim; from++, to++)
3351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *to = *from;
3361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *fromP = from;
3371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *toP = to;
3381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
3391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
3411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_toUtf16(const ENCODING *enc,
3421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci             const char **fromP, const char *fromLim,
3431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci             unsigned short **toP, const unsigned short *toLim)
3441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
3451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  unsigned short *to = *toP;
3461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const char *from = *fromP;
3471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  while (from != fromLim && to != toLim) {
3481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
3491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case BT_LEAD2:
3501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
3511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      from += 2;
3521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
3531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case BT_LEAD3:
3541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *to++ = (unsigned short)(((from[0] & 0xf) << 12)
3551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                               | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
3561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      from += 3;
3571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
3581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case BT_LEAD4:
3591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      {
3601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        unsigned long n;
3611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (to + 1 == toLim)
3621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          goto after;
3631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
3641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
3651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        n -= 0x10000;
3661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        to[0] = (unsigned short)((n >> 10) | 0xD800);
3671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
3681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        to += 2;
3691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        from += 4;
3701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
3711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
3721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    default:
3731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *to++ = *from++;
3741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
3751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
3761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
3771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciafter:
3781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *fromP = from;
3791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *toP = to;
3801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
3811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
3831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding utf8_encoding_ns = {
3841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
3851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
3861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
3871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h"
3881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
3891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
3901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
3911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
3921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding utf8_encoding = {
3941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
3951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
3961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
3971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
3981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
3991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h"
4001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
4011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
4021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
4031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
4051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_utf8_encoding_ns = {
4071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
4081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
4091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h"
4101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h"
4111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
4121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
4131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
4141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
4161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_utf8_encoding = {
4181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
4191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
4201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
4211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h"
4221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
4231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h"
4241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
4251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
4261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
4271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
4291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilatin1_toUtf8(const ENCODING *enc,
4301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci              const char **fromP, const char *fromLim,
4311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci              char **toP, const char *toLim)
4321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
4331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (;;) {
4341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned char c;
4351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (*fromP == fromLim)
4361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
4371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    c = (unsigned char)**fromP;
4381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c & 0x80) {
4391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (toLim - *toP < 2)
4401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
4411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
4421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = (char)((c & 0x3f) | 0x80);
4431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      (*fromP)++;
4441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
4451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else {
4461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (*toP == toLim)
4471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
4481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = *(*fromP)++;
4491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
4501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
4511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
4521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
4541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilatin1_toUtf16(const ENCODING *enc,
4551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char **fromP, const char *fromLim,
4561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               unsigned short **toP, const unsigned short *toLim)
4571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
4581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  while (*fromP != fromLim && *toP != toLim)
4591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *(*toP)++ = (unsigned char)*(*fromP)++;
4601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
4611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
4631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding latin1_encoding_ns = {
4651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
4661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
4671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
4681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
4691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
4701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_)
4711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
4721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
4741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding latin1_encoding = {
4761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
4771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
4781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
4791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
4801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
4811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
4821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
4831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_)
4841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
4851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
4871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciascii_toUtf8(const ENCODING *enc,
4881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci             const char **fromP, const char *fromLim,
4891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci             char **toP, const char *toLim)
4901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
4911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  while (*fromP != fromLim && *toP != toLim)
4921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *(*toP)++ = *(*fromP)++;
4931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
4941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
4961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding ascii_encoding_ns = {
4981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
4991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
5001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
5011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* BT_NONXML == 0 */
5021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
5031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_)
5041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
5051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
5061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
5071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
5081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding ascii_encoding = {
5091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
5101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
5111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
5121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
5131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
5141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* BT_NONXML == 0 */
5151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
5161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(sb_)
5171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
5181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
5191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
5201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunicode_byte_type(char hi, char lo)
5211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
5221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  switch ((unsigned char)hi) {
5231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
5241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return BT_LEAD4;
5251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
5261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return BT_TRAIL;
5271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xFF:
5281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    switch ((unsigned char)lo) {
5291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xFF:
5301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xFE:
5311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return BT_NONXML;
5321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
5331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    break;
5341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
5351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return BT_NONASCII;
5361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
5371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
5381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define DEFINE_UTF16_TO_UTF8(E) \
5391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void  PTRCALL \
5401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciE ## toUtf8(const ENCODING *enc, \
5411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            const char **fromP, const char *fromLim, \
5421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            char **toP, const char *toLim) \
5431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ \
5441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const char *from; \
5451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (from = *fromP; from != fromLim; from += 2) { \
5461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    int plane; \
5471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned char lo2; \
5481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned char lo = GET_LO(from); \
5491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned char hi = GET_HI(from); \
5501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    switch (hi) { \
5511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0: \
5521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (lo < 0x80) { \
5531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (*toP == toLim) { \
5541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          *fromP = from; \
5551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          return; \
5561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        } \
5571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *(*toP)++ = lo; \
5581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break; \
5591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      } \
5601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* fall through */ \
5611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0x1: case 0x2: case 0x3: \
5621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0x4: case 0x5: case 0x6: case 0x7: \
5631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (toLim -  *toP < 2) { \
5641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *fromP = from; \
5651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return; \
5661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      } \
5671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
5681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = ((lo & 0x3f) | 0x80); \
5691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break; \
5701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    default: \
5711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (toLim -  *toP < 3)  { \
5721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *fromP = from; \
5731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return; \
5741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      } \
5751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
5761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
5771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
5781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = ((lo & 0x3f) | 0x80); \
5791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break; \
5801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
5811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (toLim -  *toP < 4) { \
5821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *fromP = from; \
5831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return; \
5841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      } \
5851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
5861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
5871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
5881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      from += 2; \
5891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      lo2 = GET_LO(from); \
5901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = (((lo & 0x3) << 4) \
5911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                   | ((GET_HI(from) & 0x3) << 2) \
5921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                   | (lo2 >> 6) \
5931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                   | 0x80); \
5941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
5951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break; \
5961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    } \
5971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  } \
5981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *fromP = from; \
5991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define DEFINE_UTF16_TO_UTF16(E) \
6021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void  PTRCALL \
6031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciE ## toUtf16(const ENCODING *enc, \
6041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci             const char **fromP, const char *fromLim, \
6051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci             unsigned short **toP, const unsigned short *toLim) \
6061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ \
6071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /* Avoid copying first half only of surrogate */ \
6081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (fromLim - *fromP > ((toLim - *toP) << 1) \
6091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
6101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    fromLim -= 2; \
6111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
6121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
6131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SET2(ptr, ch) \
6161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
6171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_LO(ptr) ((unsigned char)(ptr)[0])
6181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_HI(ptr) ((unsigned char)(ptr)[1])
6191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF8(little2_)
6211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF16(little2_)
6221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef SET2
6241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_LO
6251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_HI
6261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SET2(ptr, ch) \
6281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
6291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_LO(ptr) ((unsigned char)(ptr)[1])
6301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_HI(ptr) ((unsigned char)(ptr)[0])
6311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF8(big2_)
6331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF16(big2_)
6341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef SET2
6361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_LO
6371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_HI
6381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_BYTE_TYPE(enc, p) \
6401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[1] == 0 \
6411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
6421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  : unicode_byte_type((p)[1], (p)[0]))
6431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
6441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
6451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
6461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
6471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
6481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
6491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
6511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
6531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_byteType(const ENCODING *enc, const char *p)
6541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
6551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return LITTLE2_BYTE_TYPE(enc, p);
6561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
6591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_byteToAscii(const ENCODING *enc, const char *p)
6601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
6611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return LITTLE2_BYTE_TO_ASCII(enc, p);
6621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRCALL
6651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_charMatches(const ENCODING *enc, const char *p, int c)
6661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
6671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return LITTLE2_CHAR_MATCHES(enc, p, c);
6681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
6711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_isNameMin(const ENCODING *enc, const char *p)
6721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
6731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
6741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
6771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_isNmstrtMin(const ENCODING *enc, const char *p)
6781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
6791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
6801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef VTABLE
6831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
6841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else /* not XML_MIN_SIZE */
6861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef PREFIX
6881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define PREFIX(ident) little2_ ## ident
6891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) 2
6901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
6911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
6921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
6931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
6941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR(enc, p, n) 0
6951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
6961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR(enc, p, n) (0)
6971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
6981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_IMPL_C
7001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.c"
7011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_IMPL_C
7021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef MINBPC
7041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TYPE
7051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TO_ASCII
7061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef CHAR_MATCHES
7071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR
7081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR_MINBPC
7091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR
7101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR_MINBPC
7111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_INVALID_CHAR
7121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* not XML_MIN_SIZE */
7141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
7161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding little2_encoding_ns = {
7181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0,
7191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 1234
7201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    1
7211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
7221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    0
7231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
7241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
7251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
7261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
7271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
7281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
7291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(little2_)
7301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
7311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
7331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding little2_encoding = {
7351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0,
7361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 1234
7371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    1
7381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
7391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    0
7401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
7411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
7421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
7431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
7441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
7451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
7461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
7471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
7481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(little2_)
7491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
7501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER != 4321
7521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
7541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_little2_encoding_ns = {
7561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0, 1 },
7571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
7581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h"
7591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
7601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
7611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(little2_)
7621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
7631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
7651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_little2_encoding = {
7671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0, 1 },
7681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
7691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
7701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h"
7711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
7721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
7731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
7741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(little2_)
7751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
7761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
7781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_BYTE_TYPE(enc, p) \
7811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[0] == 0 \
7821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
7831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  : unicode_byte_type((p)[0], (p)[1]))
7841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
7851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
7861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
7871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
7881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
7891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
7901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE
7921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
7941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_byteType(const ENCODING *enc, const char *p)
7951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
7961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return BIG2_BYTE_TYPE(enc, p);
7971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
7981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
7991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
8001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_byteToAscii(const ENCODING *enc, const char *p)
8011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
8021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return BIG2_BYTE_TO_ASCII(enc, p);
8031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
8041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRCALL
8061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_charMatches(const ENCODING *enc, const char *p, int c)
8071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
8081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return BIG2_CHAR_MATCHES(enc, p, c);
8091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
8101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
8121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_isNameMin(const ENCODING *enc, const char *p)
8131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
8141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
8151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
8161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
8181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_isNmstrtMin(const ENCODING *enc, const char *p)
8191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
8201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
8211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
8221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef VTABLE
8241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
8251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else /* not XML_MIN_SIZE */
8271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef PREFIX
8291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define PREFIX(ident) big2_ ## ident
8301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) 2
8311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
8321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
8331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
8341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
8351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR(enc, p, n) 0
8361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
8371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR(enc, p, n) (0)
8381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
8391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_IMPL_C
8411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.c"
8421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_IMPL_C
8431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef MINBPC
8451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TYPE
8461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TO_ASCII
8471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef CHAR_MATCHES
8481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR
8491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR_MINBPC
8501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR
8511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR_MINBPC
8521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_INVALID_CHAR
8531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* not XML_MIN_SIZE */
8551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
8571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding big2_encoding_ns = {
8591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0,
8601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 4321
8611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  1
8621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
8631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  0
8641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
8651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
8661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
8671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
8681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
8691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
8701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(big2_)
8711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
8721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
8741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding big2_encoding = {
8761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0,
8771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 4321
8781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  1
8791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else
8801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  0
8811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
8821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
8831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
8841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
8851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h"
8861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
8871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
8881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
8891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(big2_)
8901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
8911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER != 1234
8931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
8951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
8961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_big2_encoding_ns = {
8971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0, 1 },
8981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
8991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h"
9001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
9011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
9021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(big2_)
9031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
9041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
9061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_big2_encoding = {
9081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  { VTABLE, 2, 0, 1 },
9091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  {
9101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT
9111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h"
9121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON
9131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h"
9141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  },
9151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STANDARD_VTABLE(big2_)
9161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
9171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
9191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef PREFIX
9211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL
9231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistreqci(const char *s1, const char *s2)
9241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
9251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (;;) {
9261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    char c1 = *s1++;
9271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    char c2 = *s2++;
9281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (ASCII_a <= c1 && c1 <= ASCII_z)
9291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      c1 += ASCII_A - ASCII_a;
9301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (ASCII_a <= c2 && c2 <= ASCII_z)
9311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      c2 += ASCII_A - ASCII_a;
9321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c1 != c2)
9331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
9341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!c1)
9351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
9361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
9371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 1;
9381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
9391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
9411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciinitUpdatePosition(const ENCODING *enc, const char *ptr,
9421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                   const char *end, POSITION *pos)
9431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
9441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
9451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
9461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int
9481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccitoAscii(const ENCODING *enc, const char *ptr, const char *end)
9491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
9501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  char buf[1];
9511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  char *p = buf;
9521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
9531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (p == buf)
9541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return -1;
9551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  else
9561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return buf[0];
9571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
9581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL
9601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciisSpace(int c)
9611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
9621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  switch (c) {
9631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0x20:
9641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xD:
9651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xA:
9661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0x9:
9671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 1;
9681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
9691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 0;
9701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
9711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
9721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* Return 1 if there's just optional white space or there's an S
9731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   followed by name=val.
9741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
9751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int
9761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciparsePseudoAttribute(const ENCODING *enc,
9771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                     const char *ptr,
9781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                     const char *end,
9791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                     const char **namePtr,
9801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                     const char **nameEndPtr,
9811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                     const char **valPtr,
9821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                     const char **nextTokPtr)
9831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
9841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int c;
9851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  char open;
9861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (ptr == end) {
9871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *namePtr = NULL;
9881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 1;
9891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
9901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (!isSpace(toAscii(enc, ptr, end))) {
9911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *nextTokPtr = ptr;
9921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
9931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
9941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  do {
9951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ptr += enc->minBytesPerChar;
9961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  } while (isSpace(toAscii(enc, ptr, end)));
9971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (ptr == end) {
9981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *namePtr = NULL;
9991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 1;
10001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
10011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *namePtr = ptr;
10021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (;;) {
10031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    c = toAscii(enc, ptr, end);
10041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c == -1) {
10051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *nextTokPtr = ptr;
10061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
10071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
10081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c == ASCII_EQUALS) {
10091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *nameEndPtr = ptr;
10101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
10111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
10121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (isSpace(c)) {
10131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *nameEndPtr = ptr;
10141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      do {
10151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        ptr += enc->minBytesPerChar;
10161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      } while (isSpace(c = toAscii(enc, ptr, end)));
10171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (c != ASCII_EQUALS) {
10181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *nextTokPtr = ptr;
10191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return 0;
10201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
10211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
10221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
10231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ptr += enc->minBytesPerChar;
10241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
10251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (ptr == *namePtr) {
10261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *nextTokPtr = ptr;
10271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
10281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
10291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ptr += enc->minBytesPerChar;
10301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  c = toAscii(enc, ptr, end);
10311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  while (isSpace(c)) {
10321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ptr += enc->minBytesPerChar;
10331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    c = toAscii(enc, ptr, end);
10341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
10351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c != ASCII_QUOT && c != ASCII_APOS) {
10361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *nextTokPtr = ptr;
10371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
10381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
10391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  open = (char)c;
10401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ptr += enc->minBytesPerChar;
10411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *valPtr = ptr;
10421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (;; ptr += enc->minBytesPerChar) {
10431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    c = toAscii(enc, ptr, end);
10441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c == open)
10451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
10461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!(ASCII_a <= c && c <= ASCII_z)
10471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && !(ASCII_A <= c && c <= ASCII_Z)
10481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && !(ASCII_0 <= c && c <= ASCII_9)
10491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && c != ASCII_PERIOD
10501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && c != ASCII_MINUS
10511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && c != ASCII_UNDERSCORE) {
10521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *nextTokPtr = ptr;
10531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
10541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
10551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
10561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *nextTokPtr = ptr + enc->minBytesPerChar;
10571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 1;
10581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
10591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
10601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_version[] = {
10611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
10621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
10631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
10641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_encoding[] = {
10651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
10661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
10671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
10681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_standalone[] = {
10691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
10701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_n, ASCII_e, '\0'
10711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
10721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
10731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_yes[] = {
10741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_y, ASCII_e, ASCII_s,  '\0'
10751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
10761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
10771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_no[] = {
10781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_n, ASCII_o,  '\0'
10791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
10801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
10811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int
10821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccidoParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
10831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                                 const char *,
10841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                                 const char *),
10851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               int isGeneralTextEntity,
10861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const ENCODING *enc,
10871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char *ptr,
10881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char *end,
10891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char **badPtr,
10901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char **versionPtr,
10911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char **versionEndPtr,
10921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char **encodingName,
10931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const ENCODING **encoding,
10941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               int *standalone)
10951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
10961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const char *val = NULL;
10971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const char *name = NULL;
10981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const char *nameEnd = NULL;
10991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ptr += 5 * enc->minBytesPerChar;
11001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  end -= 2 * enc->minBytesPerChar;
11011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
11021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      || !name) {
11031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *badPtr = ptr;
11041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
11051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
11071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!isGeneralTextEntity) {
11081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *badPtr = name;
11091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
11101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
11111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  else {
11131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (versionPtr)
11141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *versionPtr = val;
11151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (versionEndPtr)
11161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *versionEndPtr = ptr;
11171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
11181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *badPtr = ptr;
11191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
11201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
11211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!name) {
11221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (isGeneralTextEntity) {
11231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        /* a TextDecl must have an EncodingDecl */
11241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *badPtr = ptr;
11251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return 0;
11261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
11271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 1;
11281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
11291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
11311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    int c = toAscii(enc, val, end);
11321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
11331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *badPtr = val;
11341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
11351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
11361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (encodingName)
11371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *encodingName = val;
11381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (encoding)
11391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
11401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
11411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *badPtr = ptr;
11421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
11431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
11441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!name)
11451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 1;
11461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
11481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      || isGeneralTextEntity) {
11491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *badPtr = name;
11501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
11511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
11531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (standalone)
11541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *standalone = 1;
11551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
11571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (standalone)
11581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *standalone = 0;
11591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  else {
11611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *badPtr = val;
11621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
11631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  while (isSpace(toAscii(enc, ptr, end)))
11651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ptr += enc->minBytesPerChar;
11661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (ptr != end) {
11671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *badPtr = ptr;
11681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
11691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 1;
11711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
11721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
11731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL
11741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccicheckCharRefNumber(int result)
11751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
11761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  switch (result >> 8) {
11771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
11781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
11791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return -1;
11801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0:
11811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (latin1_encoding.type[result] == BT_NONXML)
11821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return -1;
11831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    break;
11841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  case 0xFF:
11851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (result == 0xFFFE || result == 0xFFFF)
11861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return -1;
11871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    break;
11881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
11891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return result;
11901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
11911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
11921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciint FASTCALL
11931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlUtf8Encode(int c, char *buf)
11941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
11951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  enum {
11961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* minN is minimum legal resulting value for N byte sequence */
11971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    min2 = 0x80,
11981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    min3 = 0x800,
11991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    min4 = 0x10000
12001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  };
12011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c < 0)
12031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
12041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c < min2) {
12051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[0] = (char)(c | UTF8_cval1);
12061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 1;
12071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
12081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c < min3) {
12091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[0] = (char)((c >> 6) | UTF8_cval2);
12101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[1] = (char)((c & 0x3f) | 0x80);
12111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 2;
12121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
12131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c < min4) {
12141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[0] = (char)((c >> 12) | UTF8_cval3);
12151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
12161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[2] = (char)((c & 0x3f) | 0x80);
12171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 3;
12181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
12191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c < 0x110000) {
12201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[0] = (char)((c >> 18) | UTF8_cval4);
12211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
12221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
12231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[3] = (char)((c & 0x3f) | 0x80);
12241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 4;
12251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
12261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 0;
12271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
12281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciint FASTCALL
12301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlUtf16Encode(int charNum, unsigned short *buf)
12311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
12321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (charNum < 0)
12331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
12341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (charNum < 0x10000) {
12351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[0] = (unsigned short)charNum;
12361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 1;
12371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
12381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (charNum < 0x110000) {
12391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    charNum -= 0x10000;
12401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
12411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
12421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 2;
12431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
12441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return 0;
12451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
12461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistruct unknown_encoding {
12481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  struct normal_encoding normal;
12491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  CONVERTER convert;
12501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void *userData;
12511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  unsigned short utf16[256];
12521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  char utf8[256][4];
12531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
12541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define AS_UNKNOWN_ENCODING(enc)  ((const struct unknown_encoding *) (enc))
12561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciint
12581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlSizeOfUnknownEncoding(void)
12591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
12601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return sizeof(struct unknown_encoding);
12611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
12621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
12641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_isName(const ENCODING *enc, const char *p)
12651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
12661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int c = uenc->convert(uenc->userData, p);
12681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c & ~0xFFFF)
12691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
12701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
12711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
12721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
12741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_isNmstrt(const ENCODING *enc, const char *p)
12751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
12761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int c = uenc->convert(uenc->userData, p);
12781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (c & ~0xFFFF)
12791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return 0;
12801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
12811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
12821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL
12841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_isInvalid(const ENCODING *enc, const char *p)
12851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
12861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int c = uenc->convert(uenc->userData, p);
12881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
12891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
12901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
12911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
12921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_toUtf8(const ENCODING *enc,
12931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const char **fromP, const char *fromLim,
12941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               char **toP, const char *toLim)
12951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
12961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
12971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  char buf[XML_UTF8_ENCODE_MAX];
12981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (;;) {
12991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    const char *utf8;
13001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    int n;
13011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (*fromP == fromLim)
13021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
13031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    utf8 = uenc->utf8[(unsigned char)**fromP];
13041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    n = *utf8++;
13051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (n == 0) {
13061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      int c = uenc->convert(uenc->userData, *fromP);
13071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      n = XmlUtf8Encode(c, buf);
13081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (n > toLim - *toP)
13091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
13101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      utf8 = buf;
13111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
13121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                 - (BT_LEAD2 - 2));
13131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else {
13151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (n > toLim - *toP)
13161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
13171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      (*fromP)++;
13181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    do {
13201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *(*toP)++ = *utf8++;
13211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    } while (--n != 0);
13221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
13231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
13241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
13251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL
13261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_toUtf16(const ENCODING *enc,
13271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                const char **fromP, const char *fromLim,
13281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                unsigned short **toP, const unsigned short *toLim)
13291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
13301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  while (*fromP != fromLim && *toP != toLim) {
13321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned short c = uenc->utf16[(unsigned char)**fromP];
13331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c == 0) {
13341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      c = (unsigned short)
13351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          uenc->convert(uenc->userData, *fromP);
13361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
13371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                 - (BT_LEAD2 - 2));
13381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else
13401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      (*fromP)++;
13411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    *(*toP)++ = c;
13421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
13431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
13441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
13451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciENCODING *
13461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlInitUnknownEncoding(void *mem,
13471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                       int *table,
13481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                       CONVERTER convert,
13491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                       void *userData)
13501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
13511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int i;
13521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  struct unknown_encoding *e = (struct unknown_encoding *)mem;
13531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
13541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
13551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (i = 0; i < 128; i++)
13561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (latin1_encoding.type[i] != BT_OTHER
13571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && latin1_encoding.type[i] != BT_NONXML
13581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        && table[i] != i)
13591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return 0;
13601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (i = 0; i < 256; i++) {
13611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    int c = table[i];
13621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (c == -1) {
13631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->normal.type[i] = BT_MALFORM;
13641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* This shouldn't really get used. */
13651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf16[i] = 0xFFFF;
13661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][0] = 1;
13671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][1] = 0;
13681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else if (c < 0) {
13701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (c < -4)
13711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return 0;
13721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
13731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][0] = 0;
13741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf16[i] = 0;
13751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else if (c < 0x80) {
13771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (latin1_encoding.type[c] != BT_OTHER
13781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          && latin1_encoding.type[c] != BT_NONXML
13791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          && c != i)
13801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return 0;
13811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->normal.type[i] = latin1_encoding.type[c];
13821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][0] = 1;
13831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][1] = (char)c;
13841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
13851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else if (checkCharRefNumber(c) < 0) {
13871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->normal.type[i] = BT_NONXML;
13881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* This shouldn't really get used. */
13891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf16[i] = 0xFFFF;
13901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][0] = 1;
13911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][1] = 0;
13921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
13931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    else {
13941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (c > 0xFFFF)
13951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return 0;
13961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
13971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        e->normal.type[i] = BT_NMSTRT;
13981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
13991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        e->normal.type[i] = BT_NAME;
14001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      else
14011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        e->normal.type[i] = BT_OTHER;
14021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
14031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      e->utf16[i] = (unsigned short)c;
14041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
14051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
14061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  e->userData = userData;
14071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  e->convert = convert;
14081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (convert) {
14091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isName2 = unknown_isName;
14101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isName3 = unknown_isName;
14111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isName4 = unknown_isName;
14121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isNmstrt2 = unknown_isNmstrt;
14131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isNmstrt3 = unknown_isNmstrt;
14141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isNmstrt4 = unknown_isNmstrt;
14151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isInvalid2 = unknown_isInvalid;
14161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isInvalid3 = unknown_isInvalid;
14171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    e->normal.isInvalid4 = unknown_isInvalid;
14181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
14191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  e->normal.enc.utf8Convert = unknown_toUtf8;
14201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  e->normal.enc.utf16Convert = unknown_toUtf16;
14211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return &(e->normal.enc);
14221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
14231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* If this enumeration is changed, getEncodingIndex and encodings
14251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccimust also be changed. */
14261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccienum {
14271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UNKNOWN_ENC = -1,
14281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ISO_8859_1_ENC = 0,
14291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  US_ASCII_ENC,
14301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF_8_ENC,
14311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF_16_ENC,
14321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF_16BE_ENC,
14331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  UTF_16LE_ENC,
14341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /* must match encodingNames up to here */
14351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  NO_ENC
14361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_ISO_8859_1[] = {
14391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
14401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_MINUS, ASCII_1, '\0'
14411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_US_ASCII[] = {
14431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
14441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  '\0'
14451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_8[] =  {
14471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
14481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_16[] = {
14501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
14511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_16BE[] = {
14531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
14541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  '\0'
14551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_16LE[] = {
14571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
14581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  '\0'
14591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
14601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL
14621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccigetEncodingIndex(const char *name)
14631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
14641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  static const char * const encodingNames[] = {
14651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    KW_ISO_8859_1,
14661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    KW_US_ASCII,
14671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    KW_UTF_8,
14681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    KW_UTF_16,
14691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    KW_UTF_16BE,
14701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    KW_UTF_16LE,
14711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  };
14721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int i;
14731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (name == NULL)
14741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return NO_ENC;
14751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
14761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (streqci(name, encodingNames[i]))
14771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return i;
14781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return UNKNOWN_ENC;
14791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
14801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* For binary compatibility, we store the index of the encoding
14821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   specified at initialization in the isUtf16 member.
14831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
14841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
14861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
14871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* This is what detects the encoding.  encodingTable maps from
14891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
14901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   the external (protocol) specified encoding; state is
14911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   XML_CONTENT_STATE if we're parsing an external text entity, and
14921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   XML_PROLOG_STATE otherwise.
14931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/
14941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
14961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int
14971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciinitScan(const ENCODING * const *encodingTable,
14981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         const INIT_ENCODING *enc,
14991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         int state,
15001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         const char *ptr,
15011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         const char *end,
15021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         const char **nextTokPtr)
15031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
15041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const ENCODING **encPtr;
15051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
15061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (ptr == end)
15071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return XML_TOK_NONE;
15081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  encPtr = enc->encPtr;
15091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (ptr + 1 == end) {
15101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* only a single byte available for auto-detection */
15111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifndef XML_DTD /* FIXME */
15121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* a well-formed document entity must have more than one byte */
15131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (state != XML_CONTENT_STATE)
15141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return XML_TOK_PARTIAL;
15151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif
15161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* so we're parsing an external text entity... */
15171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* if UTF-16 was externally specified, then we need at least 2 bytes */
15181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    switch (INIT_ENC_INDEX(enc)) {
15191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case UTF_16_ENC:
15201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case UTF_16LE_ENC:
15211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case UTF_16BE_ENC:
15221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return XML_TOK_PARTIAL;
15231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
15241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    switch ((unsigned char)*ptr) {
15251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xFE:
15261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xFF:
15271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xEF: /* possibly first byte of UTF-8 BOM */
15281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
15291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          && state == XML_CONTENT_STATE)
15301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
15311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* fall through */
15321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0x00:
15331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0x3C:
15341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return XML_TOK_PARTIAL;
15351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
15361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
15371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  else {
15381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
15391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xFEFF:
15401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
15411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          && state == XML_CONTENT_STATE)
15421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
15431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *nextTokPtr = ptr + 2;
15441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *encPtr = encodingTable[UTF_16BE_ENC];
15451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return XML_TOK_BOM;
15461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    /* 00 3C is handled in the default case */
15471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0x3C00:
15481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
15491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           || INIT_ENC_INDEX(enc) == UTF_16_ENC)
15501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          && state == XML_CONTENT_STATE)
15511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
15521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *encPtr = encodingTable[UTF_16LE_ENC];
15531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
15541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xFFFE:
15551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
15561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          && state == XML_CONTENT_STATE)
15571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        break;
15581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *nextTokPtr = ptr + 2;
15591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      *encPtr = encodingTable[UTF_16LE_ENC];
15601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return XML_TOK_BOM;
15611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    case 0xEFBB:
15621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* Maybe a UTF-8 BOM (EF BB BF) */
15631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      /* If there's an explicitly specified (external) encoding
15641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         of ISO-8859-1 or some flavour of UTF-16
15651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         and this is an external text entity,
15661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         don't look for the BOM,
15671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         because it might be a legal data.
15681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      */
15691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (state == XML_CONTENT_STATE) {
15701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        int e = INIT_ENC_INDEX(enc);
15711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
15721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            || e == UTF_16LE_ENC || e == UTF_16_ENC)
15731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          break;
15741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
15751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (ptr + 2 == end)
15761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return XML_TOK_PARTIAL;
15771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if ((unsigned char)ptr[2] == 0xBF) {
15781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *nextTokPtr = ptr + 3;
15791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *encPtr = encodingTable[UTF_8_ENC];
15801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return XML_TOK_BOM;
15811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
15821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
15831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    default:
15841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (ptr[0] == '\0') {
15851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        /* 0 isn't a legal data character. Furthermore a document
15861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           entity can only start with ASCII characters.  So the only
15871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           way this can fail to be big-endian UTF-16 if it it's an
15881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           external parsed general entity that's labelled as
15891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           UTF-16LE.
15901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        */
15911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
15921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          break;
15931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *encPtr = encodingTable[UTF_16BE_ENC];
15941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
15951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
15961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      else if (ptr[1] == '\0') {
15971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        /* We could recover here in the case:
15981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            - parsing an external entity
15991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            - second byte is 0
16001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            - no externally specified encoding
16011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            - no encoding declaration
16021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           by assuming UTF-16LE.  But we don't, because this would mean when
16031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           presented just with a single byte, we couldn't reliably determine
16041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci           whether we needed further bytes.
16051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        */
16061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (state == XML_CONTENT_STATE)
16071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          break;
16081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        *encPtr = encodingTable[UTF_16LE_ENC];
16091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
16111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      break;
16121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
16131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
16141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
16151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
16171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define NS(x) x
16201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define ns(x) x
16211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_NS_C
16221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_ns.c"
16231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_NS_C
16241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef NS
16251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef ns
16261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS
16281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define NS(x) x ## NS
16301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define ns(x) x ## _ns
16311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_NS_C
16331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_ns.c"
16341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_NS_C
16351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef NS
16371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef ns
16381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciENCODING *
16401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlInitUnknownEncodingNS(void *mem,
16411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                         int *table,
16421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                         CONVERTER convert,
16431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                         void *userData)
16441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{
16451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
16461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (enc)
16471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
16481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return enc;
16491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
16501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
16511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* XML_NS */
1652