11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci See the file COPYING for copying permission. 31320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 41320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <stddef.h> 61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef COMPILED_FROM_DSP 81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "winconfig.h" 91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#elif defined(MACOS_CLASSIC) 101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "macconfig.h" 111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#elif defined(__amigaos__) 121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "amigaconfig.h" 131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#elif defined(__WATCOMC__) 141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "watcomconfig.h" 151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef HAVE_EXPAT_CONFIG_H 171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <expat_config.h> 181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* ndef COMPILED_FROM_DSP */ 201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "expat_external.h" 221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "internal.h" 231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok.h" 241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "nametab.h" 251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_DTD 271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) 281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ 301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE1 \ 331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { PREFIX(prologTok), PREFIX(contentTok), \ 341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ 351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ 361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(sameName), \ 371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(nameMatchesAscii), \ 381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(nameLength), \ 391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(skipS), \ 401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(getAtts), \ 411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(charRefNumber), \ 421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(predefinedEntityName), \ 431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(updatePosition), \ 441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci PREFIX(isPublicId) 451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) 471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UCS2_GET_NAMING(pages, hi, lo) \ 491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) 501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* A 2 byte UTF-8 representation splits the characters 11 bits between 521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci the bottom 5 and 6 bits of the bytes. We need 8 bits to index into 531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci pages, 3 bits to add to that index and 5 bits to generate the mask. 541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_GET_NAMING2(pages, byte) \ 561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ 571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci + ((((byte)[0]) & 3) << 1) \ 581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci + ((((byte)[1]) >> 5) & 1)] \ 591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci & (1 << (((byte)[1]) & 0x1F))) 601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* A 3 byte UTF-8 representation splits the characters 16 bits between 621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index 631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci into pages, 3 bits to add to that index and 5 bits to generate the 641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci mask. 651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_GET_NAMING3(pages, byte) \ 671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ 681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci + ((((byte)[1]) >> 2) & 0xF)] \ 691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci << 3) \ 701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci + ((((byte)[1]) & 3) << 1) \ 711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci + ((((byte)[2]) >> 5) & 1)] \ 721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci & (1 << (((byte)[2]) & 0x1F))) 731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_GET_NAMING(pages, p, n) \ 751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((n) == 2 \ 761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ 771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : ((n) == 3 \ 781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ 791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : 0)) 801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* Detection of invalid UTF-8 sequences is based on Table 3.1B 821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ 831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci with the additional restriction of not allowing the Unicode 841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). 851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci Implementation details: 861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (A & 0x80) == 0 means A < 0x80 871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci and 881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (A & 0xC0) == 0xC0 means A > 0xBF 891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_INVALID2(p) \ 921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) 931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_INVALID3(p) \ 951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (((p)[2] & 0x80) == 0 \ 961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || \ 971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((*p) == 0xEF && (p)[1] == 0xBF \ 981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? \ 991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (p)[2] > 0xBD \ 1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : \ 1011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[2] & 0xC0) == 0xC0) \ 1021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || \ 1031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((*p) == 0xE0 \ 1041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? \ 1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ 1061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : \ 1071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[1] & 0x80) == 0 \ 1081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || \ 1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) 1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define UTF8_INVALID4(p) \ 1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ 1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || \ 1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ 1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || \ 1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((*p) == 0xF0 \ 1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? \ 1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ 1191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : \ 1201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[1] & 0x80) == 0 \ 1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || \ 1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) 1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciisNever(const ENCODING *enc, const char *p) 1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 1281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isName2(const ENCODING *enc, const char *p) 1321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); 1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isName3(const ENCODING *enc, const char *p) 1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); 1401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define utf8_isName4 isNever 1431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isNmstrt2(const ENCODING *enc, const char *p) 1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); 1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isNmstrt3(const ENCODING *enc, const char *p) 1521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); 1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define utf8_isNmstrt4 isNever 1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isInvalid2(const ENCODING *enc, const char *p) 1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_INVALID2((const unsigned char *)p); 1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isInvalid3(const ENCODING *enc, const char *p) 1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_INVALID3((const unsigned char *)p); 1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_isInvalid4(const ENCODING *enc, const char *p) 1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UTF8_INVALID4((const unsigned char *)p); 1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistruct normal_encoding { 1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ENCODING enc; 1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned char type[256]; 1791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 1801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *byteType)(const ENCODING *, const char *); 1811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); 1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); 1831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); 1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRCALL *charMatches)(const ENCODING *, const char *, int); 1851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* XML_MIN_SIZE */ 1861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isName2)(const ENCODING *, const char *); 1871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isName3)(const ENCODING *, const char *); 1881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isName4)(const ENCODING *, const char *); 1891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); 1901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); 1911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); 1921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); 1931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); 1941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); 1951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 1961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) 1981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define STANDARD_VTABLE(E) \ 2021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## byteType, \ 2031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNameMin, \ 2041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrtMin, \ 2051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## byteToAscii, \ 2061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## charMatches, 2071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 2091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define STANDARD_VTABLE(E) /* as nothing */ 2111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define NORMAL_VTABLE(E) \ 2151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isName2, \ 2161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isName3, \ 2171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isName4, \ 2181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrt2, \ 2191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrt3, \ 2201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isNmstrt4, \ 2211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isInvalid2, \ 2221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isInvalid3, \ 2231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci E ## isInvalid4 2241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL checkCharRefNumber(int); 2261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.h" 2281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "ascii.h" 2291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define sb_isNameMin isNever 2321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define sb_isNmstrtMin isNever 2331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) ((enc)->minBytesPerChar) 2371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 2381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* minimum bytes per character */ 2391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) 1 2401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SB_BYTE_TYPE(enc, p) \ 2431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) 2441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 2471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccisb_byteType(const ENCODING *enc, const char *p) 2481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 2491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return SB_BYTE_TYPE(enc, p); 2501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 2511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) \ 2521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) 2531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 2541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) 2551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) \ 2591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) 2601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 2611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccisb_byteToAscii(const ENCODING *enc, const char *p) 2621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 2631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return *p; 2641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 2651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 2661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) (*(p)) 2671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR(enc, p, n) \ 2701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) 2711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR(enc, p, n) \ 2721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) 2731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_INVALID_CHAR(enc, p, n) \ 2741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) 2751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) \ 2781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) 2791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ 2801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) 2811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 2821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) (0) 2831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) 2841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 2871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) \ 2881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) 2891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRCALL 2901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccisb_charMatches(const ENCODING *enc, const char *p, int c) 2911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 2921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return *p == c; 2931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 2941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 2951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* c is an ASCII character */ 2961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) (*(p) == c) 2971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 2981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 2991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define PREFIX(ident) normal_ ## ident 3001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_IMPL_C 3011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.c" 3021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_IMPL_C 3031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef MINBPC 3051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TYPE 3061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TO_ASCII 3071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef CHAR_MATCHES 3081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR 3091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR_MINBPC 3101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR 3111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR_MINBPC 3121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_INVALID_CHAR 3131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccienum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 3151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF8_cval1 = 0x00, 3161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF8_cval2 = 0xc0, 3171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF8_cval3 = 0xe0, 3181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF8_cval4 = 0xf0 3191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 3201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 3221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_toUtf8(const ENCODING *enc, 3231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 3241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char **toP, const char *toLim) 3251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 3261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char *to; 3271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *from; 3281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (fromLim - *fromP > toLim - *toP) { 3291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* Avoid copying partial characters. */ 3301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) 3311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) 3321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 3331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 3341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (to = *toP, from = *fromP; from != fromLim; from++, to++) 3351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *to = *from; 3361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; 3371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *toP = to; 3381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 3391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 3411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciutf8_toUtf16(const ENCODING *enc, 3421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 3431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short **toP, const unsigned short *toLim) 3441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 3451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short *to = *toP; 3461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *from = *fromP; 3471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (from != fromLim && to != toLim) { 3481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { 3491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case BT_LEAD2: 3501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); 3511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci from += 2; 3521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 3531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case BT_LEAD3: 3541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *to++ = (unsigned short)(((from[0] & 0xf) << 12) 3551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); 3561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci from += 3; 3571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 3581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case BT_LEAD4: 3591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 3601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned long n; 3611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (to + 1 == toLim) 3621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci goto after; 3631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) 3641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 3651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci n -= 0x10000; 3661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci to[0] = (unsigned short)((n >> 10) | 0xD800); 3671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 3681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci to += 2; 3691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci from += 4; 3701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 3711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 3721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci default: 3731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *to++ = *from++; 3741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 3751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 3761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 3771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciafter: 3781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; 3791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *toP = to; 3801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 3811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 3831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding utf8_encoding_ns = { 3841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 3851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 3861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 3871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h" 3881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 3891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 3901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 3911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 3921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding utf8_encoding = { 3941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 3951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 3961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 3971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 3981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 3991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h" 4001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 4011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 4021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 4031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 4051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_utf8_encoding_ns = { 4071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 4081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 4091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h" 4101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h" 4111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 4121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 4131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 4141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 4161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_utf8_encoding = { 4181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 4191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 4201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 4211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h" 4221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 4231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "utf8tab.h" 4241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 4251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 4261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 4271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 4291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilatin1_toUtf8(const ENCODING *enc, 4301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 4311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char **toP, const char *toLim) 4321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 4331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (;;) { 4341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned char c; 4351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (*fromP == fromLim) 4361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 4371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c = (unsigned char)**fromP; 4381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c & 0x80) { 4391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (toLim - *toP < 2) 4401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 4411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (char)((c >> 6) | UTF8_cval2); 4421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (char)((c & 0x3f) | 0x80); 4431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (*fromP)++; 4441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 4451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else { 4461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (*toP == toLim) 4471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 4481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = *(*fromP)++; 4491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 4501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 4511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 4521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 4541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilatin1_toUtf16(const ENCODING *enc, 4551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 4561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short **toP, const unsigned short *toLim) 4571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 4581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (*fromP != fromLim && *toP != toLim) 4591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (unsigned char)*(*fromP)++; 4601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 4611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 4631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding latin1_encoding_ns = { 4651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 4661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 4671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 4681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 4691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 4701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) 4711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 4721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 4741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding latin1_encoding = { 4761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 4771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 4781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 4791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 4801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 4811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 4821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 4831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) 4841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 4851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 4871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciascii_toUtf8(const ENCODING *enc, 4881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 4891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char **toP, const char *toLim) 4901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 4911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (*fromP != fromLim && *toP != toLim) 4921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = *(*fromP)++; 4931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 4941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 4961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 4971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding ascii_encoding_ns = { 4981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 4991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 5001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 5011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* BT_NONXML == 0 */ 5021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 5031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) 5041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 5051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 5061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 5071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 5081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding ascii_encoding = { 5091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 5101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 5111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 5121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 5131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 5141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* BT_NONXML == 0 */ 5151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 5161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(sb_) 5171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 5181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 5191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 5201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunicode_byte_type(char hi, char lo) 5211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 5221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch ((unsigned char)hi) { 5231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xD8: case 0xD9: case 0xDA: case 0xDB: 5241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BT_LEAD4; 5251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xDC: case 0xDD: case 0xDE: case 0xDF: 5261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BT_TRAIL; 5271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFF: 5281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch ((unsigned char)lo) { 5291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFF: 5301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFE: 5311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BT_NONXML; 5321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 5331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 5341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 5351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BT_NONASCII; 5361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 5371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 5381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define DEFINE_UTF16_TO_UTF8(E) \ 5391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL \ 5401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciE ## toUtf8(const ENCODING *enc, \ 5411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, \ 5421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char **toP, const char *toLim) \ 5431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ \ 5441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *from; \ 5451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (from = *fromP; from != fromLim; from += 2) { \ 5461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int plane; \ 5471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned char lo2; \ 5481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned char lo = GET_LO(from); \ 5491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned char hi = GET_HI(from); \ 5501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch (hi) { \ 5511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0: \ 5521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (lo < 0x80) { \ 5531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (*toP == toLim) { \ 5541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; \ 5551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return; \ 5561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = lo; \ 5581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; \ 5591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* fall through */ \ 5611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x1: case 0x2: case 0x3: \ 5621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x4: case 0x5: case 0x6: case 0x7: \ 5631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (toLim - *toP < 2) { \ 5641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; \ 5651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return; \ 5661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ 5681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = ((lo & 0x3f) | 0x80); \ 5691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; \ 5701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci default: \ 5711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (toLim - *toP < 3) { \ 5721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; \ 5731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return; \ 5741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ 5761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ 5771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ 5781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = ((lo & 0x3f) | 0x80); \ 5791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; \ 5801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ 5811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (toLim - *toP < 4) { \ 5821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; \ 5831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return; \ 5841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ 5861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ 5871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ 5881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci from += 2; \ 5891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci lo2 = GET_LO(from); \ 5901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (((lo & 0x3) << 4) \ 5911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci | ((GET_HI(from) & 0x3) << 2) \ 5921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci | (lo2 >> 6) \ 5931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci | 0x80); \ 5941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ 5951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; \ 5961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } \ 5981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP = from; \ 5991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define DEFINE_UTF16_TO_UTF16(E) \ 6021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL \ 6031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciE ## toUtf16(const ENCODING *enc, \ 6041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, \ 6051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short **toP, const unsigned short *toLim) \ 6061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ \ 6071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* Avoid copying first half only of surrogate */ \ 6081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (fromLim - *fromP > ((toLim - *toP) << 1) \ 6091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ 6101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci fromLim -= 2; \ 6111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ 6121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ 6131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SET2(ptr, ch) \ 6161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) 6171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_LO(ptr) ((unsigned char)(ptr)[0]) 6181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_HI(ptr) ((unsigned char)(ptr)[1]) 6191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF8(little2_) 6211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF16(little2_) 6221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef SET2 6241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_LO 6251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_HI 6261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SET2(ptr, ch) \ 6281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) 6291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_LO(ptr) ((unsigned char)(ptr)[1]) 6301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define GET_HI(ptr) ((unsigned char)(ptr)[0]) 6311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF8(big2_) 6331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciDEFINE_UTF16_TO_UTF16(big2_) 6341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef SET2 6361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_LO 6371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef GET_HI 6381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_BYTE_TYPE(enc, p) \ 6401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[1] == 0 \ 6411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ 6421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : unicode_byte_type((p)[1], (p)[0])) 6431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) 6441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) 6451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ 6461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) 6471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ 6481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) 6491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 6511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 6531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_byteType(const ENCODING *enc, const char *p) 6541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 6551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return LITTLE2_BYTE_TYPE(enc, p); 6561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 6591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_byteToAscii(const ENCODING *enc, const char *p) 6601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 6611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return LITTLE2_BYTE_TO_ASCII(enc, p); 6621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRCALL 6651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_charMatches(const ENCODING *enc, const char *p, int c) 6661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 6671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return LITTLE2_CHAR_MATCHES(enc, p, c); 6681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 6711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_isNameMin(const ENCODING *enc, const char *p) 6721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 6731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); 6741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 6771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccilittle2_isNmstrtMin(const ENCODING *enc, const char *p) 6781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 6791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); 6801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 6811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef VTABLE 6831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 6841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else /* not XML_MIN_SIZE */ 6861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef PREFIX 6881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define PREFIX(ident) little2_ ## ident 6891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) 2 6901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 6911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) 6921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) 6931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) 6941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR(enc, p, n) 0 6951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) 6961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR(enc, p, n) (0) 6971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) 6981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 6991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_IMPL_C 7001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.c" 7011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_IMPL_C 7021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef MINBPC 7041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TYPE 7051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TO_ASCII 7061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef CHAR_MATCHES 7071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR 7081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR_MINBPC 7091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR 7101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR_MINBPC 7111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_INVALID_CHAR 7121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* not XML_MIN_SIZE */ 7141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 7161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding little2_encoding_ns = { 7181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 7191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 1234 7201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1 7211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 7221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 0 7231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 7241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 7251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 7261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 7271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 7281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 7291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(little2_) 7301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 7311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 7331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding little2_encoding = { 7351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 7361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 1234 7371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1 7381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 7391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 0 7401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 7411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 7421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 7431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 7441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 7451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 7461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 7471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 7481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(little2_) 7491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 7501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER != 4321 7521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 7541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_little2_encoding_ns = { 7561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 1 }, 7571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 7581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h" 7591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 7601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 7611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(little2_) 7621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 7631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 7651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_little2_encoding = { 7671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 1 }, 7681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 7691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 7701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h" 7711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 7721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 7731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 7741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(little2_) 7751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 7761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 7781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_BYTE_TYPE(enc, p) \ 7811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((p)[0] == 0 \ 7821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ 7831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci : unicode_byte_type((p)[0], (p)[1])) 7841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) 7851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) 7861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ 7871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) 7881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ 7891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) 7901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_MIN_SIZE 7921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 7941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_byteType(const ENCODING *enc, const char *p) 7951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 7961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BIG2_BYTE_TYPE(enc, p); 7971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 7981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 7991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 8001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_byteToAscii(const ENCODING *enc, const char *p) 8011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 8021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BIG2_BYTE_TO_ASCII(enc, p); 8031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 8041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRCALL 8061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_charMatches(const ENCODING *enc, const char *p, int c) 8071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 8081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BIG2_CHAR_MATCHES(enc, p, c); 8091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 8101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 8121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_isNameMin(const ENCODING *enc, const char *p) 8131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 8141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BIG2_IS_NAME_CHAR_MINBPC(enc, p); 8151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 8161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 8181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibig2_isNmstrtMin(const ENCODING *enc, const char *p) 8191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 8201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); 8211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 8221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef VTABLE 8241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 8251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else /* not XML_MIN_SIZE */ 8271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef PREFIX 8291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define PREFIX(ident) big2_ ## ident 8301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define MINBPC(enc) 2 8311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 8321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) 8331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) 8341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) 8351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR(enc, p, n) 0 8361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) 8371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR(enc, p, n) (0) 8381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) 8391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_IMPL_C 8411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_impl.c" 8421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_IMPL_C 8431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef MINBPC 8451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TYPE 8461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BYTE_TO_ASCII 8471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef CHAR_MATCHES 8481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR 8491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NAME_CHAR_MINBPC 8501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR 8511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_NMSTRT_CHAR_MINBPC 8521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef IS_INVALID_CHAR 8531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* not XML_MIN_SIZE */ 8551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 8571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding big2_encoding_ns = { 8591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 8601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 4321 8611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1 8621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 8631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 0 8641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 8651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 8661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 8671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 8681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 8691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 8701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(big2_) 8711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 8721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 8741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding big2_encoding = { 8761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 8771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER == 4321 8781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1 8791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#else 8801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 0 8811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 8821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 8831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 8841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 8851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "asciitab.h" 8861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 8871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 8881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 8891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(big2_) 8901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 8911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#if BYTEORDER != 1234 8931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 8951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 8961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_big2_encoding_ns = { 8971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 1 }, 8981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 8991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h" 9001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 9011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 9021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(big2_) 9031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 9041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 9061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const struct normal_encoding internal_big2_encoding = { 9081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { VTABLE, 2, 0, 1 }, 9091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci { 9101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define BT_COLON BT_NMSTRT 9111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "iasciitab.h" 9121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef BT_COLON 9131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "latin1tab.h" 9141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }, 9151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci STANDARD_VTABLE(big2_) 9161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 9171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 9191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef PREFIX 9211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL 9231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistreqci(const char *s1, const char *s2) 9241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 9251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (;;) { 9261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char c1 = *s1++; 9271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char c2 = *s2++; 9281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ASCII_a <= c1 && c1 <= ASCII_z) 9291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c1 += ASCII_A - ASCII_a; 9301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ASCII_a <= c2 && c2 <= ASCII_z) 9311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c2 += ASCII_A - ASCII_a; 9321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c1 != c2) 9331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 9341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!c1) 9351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 9361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 9371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 9381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 9391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 9411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciinitUpdatePosition(const ENCODING *enc, const char *ptr, 9421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *end, POSITION *pos) 9431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 9441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 9451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 9461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int 9481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccitoAscii(const ENCODING *enc, const char *ptr, const char *end) 9491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 9501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char buf[1]; 9511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char *p = buf; 9521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci XmlUtf8Convert(enc, &ptr, end, &p, p + 1); 9531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (p == buf) 9541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return -1; 9551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else 9561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return buf[0]; 9571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 9581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL 9601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciisSpace(int c) 9611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 9621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch (c) { 9631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x20: 9641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xD: 9651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xA: 9661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x9: 9671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 9681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 9691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 9701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 9711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 9721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* Return 1 if there's just optional white space or there's an S 9731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci followed by name=val. 9741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 9751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int 9761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciparsePseudoAttribute(const ENCODING *enc, 9771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *ptr, 9781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *end, 9791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **namePtr, 9801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **nameEndPtr, 9811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **valPtr, 9821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **nextTokPtr) 9831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 9841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c; 9851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char open; 9861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr == end) { 9871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *namePtr = NULL; 9881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 9891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 9901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!isSpace(toAscii(enc, ptr, end))) { 9911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr; 9921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 9931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 9941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci do { 9951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 9961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } while (isSpace(toAscii(enc, ptr, end))); 9971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr == end) { 9981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *namePtr = NULL; 9991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 10001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *namePtr = ptr; 10021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (;;) { 10031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c = toAscii(enc, ptr, end); 10041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c == -1) { 10051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr; 10061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 10071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c == ASCII_EQUALS) { 10091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nameEndPtr = ptr; 10101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 10111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (isSpace(c)) { 10131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nameEndPtr = ptr; 10141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci do { 10151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 10161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } while (isSpace(c = toAscii(enc, ptr, end))); 10171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c != ASCII_EQUALS) { 10181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr; 10191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 10201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 10221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 10241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr == *namePtr) { 10261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr; 10271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 10281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 10301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c = toAscii(enc, ptr, end); 10311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (isSpace(c)) { 10321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 10331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c = toAscii(enc, ptr, end); 10341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c != ASCII_QUOT && c != ASCII_APOS) { 10361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr; 10371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 10381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci open = (char)c; 10401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 10411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *valPtr = ptr; 10421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (;; ptr += enc->minBytesPerChar) { 10431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c = toAscii(enc, ptr, end); 10441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c == open) 10451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 10461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!(ASCII_a <= c && c <= ASCII_z) 10471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && !(ASCII_A <= c && c <= ASCII_Z) 10481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && !(ASCII_0 <= c && c <= ASCII_9) 10491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && c != ASCII_PERIOD 10501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && c != ASCII_MINUS 10511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && c != ASCII_UNDERSCORE) { 10521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr; 10531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 10541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 10561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr + enc->minBytesPerChar; 10571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 10581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 10591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 10601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_version[] = { 10611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' 10621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 10631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 10641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_encoding[] = { 10651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' 10661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 10671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 10681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_standalone[] = { 10691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, 10701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_n, ASCII_e, '\0' 10711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 10721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 10731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_yes[] = { 10741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_y, ASCII_e, ASCII_s, '\0' 10751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 10761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 10771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_no[] = { 10781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_n, ASCII_o, '\0' 10791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 10801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 10811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int 10821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccidoParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, 10831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *, 10841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *), 10851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int isGeneralTextEntity, 10861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const ENCODING *enc, 10871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *ptr, 10881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *end, 10891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **badPtr, 10901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **versionPtr, 10911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **versionEndPtr, 10921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **encodingName, 10931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const ENCODING **encoding, 10941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int *standalone) 10951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 10961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *val = NULL; 10971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *name = NULL; 10981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *nameEnd = NULL; 10991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += 5 * enc->minBytesPerChar; 11001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci end -= 2 * enc->minBytesPerChar; 11011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) 11021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || !name) { 11031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = ptr; 11041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { 11071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!isGeneralTextEntity) { 11081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = name; 11091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else { 11131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (versionPtr) 11141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *versionPtr = val; 11151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (versionEndPtr) 11161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *versionEndPtr = ptr; 11171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 11181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = ptr; 11191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!name) { 11221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (isGeneralTextEntity) { 11231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* a TextDecl must have an EncodingDecl */ 11241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = ptr; 11251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 11281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { 11311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c = toAscii(enc, val, end); 11321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { 11331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = val; 11341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (encodingName) 11371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encodingName = val; 11381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (encoding) 11391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); 11401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 11411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = ptr; 11421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!name) 11451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 11461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) 11481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || isGeneralTextEntity) { 11491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = name; 11501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { 11531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (standalone) 11541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *standalone = 1; 11551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { 11571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (standalone) 11581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *standalone = 0; 11591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else { 11611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = val; 11621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (isSpace(toAscii(enc, ptr, end))) 11651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ptr += enc->minBytesPerChar; 11661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr != end) { 11671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *badPtr = ptr; 11681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 11691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 11711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 11721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 11731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL 11741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccicheckCharRefNumber(int result) 11751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 11761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch (result >> 8) { 11771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xD8: case 0xD9: case 0xDA: case 0xDB: 11781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xDC: case 0xDD: case 0xDE: case 0xDF: 11791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return -1; 11801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0: 11811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (latin1_encoding.type[result] == BT_NONXML) 11821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return -1; 11831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 11841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFF: 11851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (result == 0xFFFE || result == 0xFFFF) 11861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return -1; 11871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 11881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 11891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return result; 11901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 11911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 11921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciint FASTCALL 11931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlUtf8Encode(int c, char *buf) 11941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 11951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci enum { 11961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* minN is minimum legal resulting value for N byte sequence */ 11971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci min2 = 0x80, 11981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci min3 = 0x800, 11991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci min4 = 0x10000 12001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }; 12011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c < 0) 12031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 12041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c < min2) { 12051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[0] = (char)(c | UTF8_cval1); 12061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 12071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 12081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c < min3) { 12091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[0] = (char)((c >> 6) | UTF8_cval2); 12101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[1] = (char)((c & 0x3f) | 0x80); 12111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 2; 12121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 12131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c < min4) { 12141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[0] = (char)((c >> 12) | UTF8_cval3); 12151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); 12161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[2] = (char)((c & 0x3f) | 0x80); 12171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 3; 12181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 12191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c < 0x110000) { 12201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[0] = (char)((c >> 18) | UTF8_cval4); 12211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); 12221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); 12231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[3] = (char)((c & 0x3f) | 0x80); 12241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 4; 12251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 12261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 12271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 12281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciint FASTCALL 12301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlUtf16Encode(int charNum, unsigned short *buf) 12311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 12321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (charNum < 0) 12331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 12341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (charNum < 0x10000) { 12351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[0] = (unsigned short)charNum; 12361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 1; 12371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 12381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (charNum < 0x110000) { 12391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci charNum -= 0x10000; 12401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[0] = (unsigned short)((charNum >> 10) + 0xD800); 12411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); 12421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 2; 12431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 12441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 12451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 12461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistruct unknown_encoding { 12481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci struct normal_encoding normal; 12491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci CONVERTER convert; 12501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci void *userData; 12511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short utf16[256]; 12521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char utf8[256][4]; 12531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 12541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) 12561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciint 12581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlSizeOfUnknownEncoding(void) 12591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 12601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return sizeof(struct unknown_encoding); 12611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 12621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 12641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_isName(const ENCODING *enc, const char *p) 12651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 12661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 12671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c = uenc->convert(uenc->userData, p); 12681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c & ~0xFFFF) 12691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 12701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); 12711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 12721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 12741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_isNmstrt(const ENCODING *enc, const char *p) 12751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 12761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 12771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c = uenc->convert(uenc->userData, p); 12781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c & ~0xFFFF) 12791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 12801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); 12811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 12821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int PTRFASTCALL 12841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_isInvalid(const ENCODING *enc, const char *p) 12851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 12861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 12871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c = uenc->convert(uenc->userData, p); 12881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 12891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 12901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 12911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 12921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_toUtf8(const ENCODING *enc, 12931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 12941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char **toP, const char *toLim) 12951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 12961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 12971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char buf[XML_UTF8_ENCODE_MAX]; 12981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (;;) { 12991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *utf8; 13001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int n; 13011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (*fromP == fromLim) 13021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 13031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci utf8 = uenc->utf8[(unsigned char)**fromP]; 13041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci n = *utf8++; 13051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (n == 0) { 13061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c = uenc->convert(uenc->userData, *fromP); 13071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci n = XmlUtf8Encode(c, buf); 13081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (n > toLim - *toP) 13091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 13101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci utf8 = buf; 13111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 13121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci - (BT_LEAD2 - 2)); 13131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else { 13151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (n > toLim - *toP) 13161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 13171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (*fromP)++; 13181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci do { 13201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = *utf8++; 13211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } while (--n != 0); 13221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 13241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 13251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic void PTRCALL 13261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciunknown_toUtf16(const ENCODING *enc, 13271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **fromP, const char *fromLim, 13281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short **toP, const unsigned short *toLim) 13291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 13301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 13311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (*fromP != fromLim && *toP != toLim) { 13321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci unsigned short c = uenc->utf16[(unsigned char)**fromP]; 13331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c == 0) { 13341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci c = (unsigned short) 13351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci uenc->convert(uenc->userData, *fromP); 13361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 13371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci - (BT_LEAD2 - 2)); 13381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else 13401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (*fromP)++; 13411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *(*toP)++ = c; 13421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 13441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 13451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciENCODING * 13461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlInitUnknownEncoding(void *mem, 13471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int *table, 13481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci CONVERTER convert, 13491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci void *userData) 13501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 13511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int i; 13521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci struct unknown_encoding *e = (struct unknown_encoding *)mem; 13531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (i = 0; i < (int)sizeof(struct normal_encoding); i++) 13541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; 13551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (i = 0; i < 128; i++) 13561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (latin1_encoding.type[i] != BT_OTHER 13571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && latin1_encoding.type[i] != BT_NONXML 13581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && table[i] != i) 13591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 13601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (i = 0; i < 256; i++) { 13611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int c = table[i]; 13621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c == -1) { 13631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = BT_MALFORM; 13641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* This shouldn't really get used. */ 13651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf16[i] = 0xFFFF; 13661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][0] = 1; 13671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][1] = 0; 13681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else if (c < 0) { 13701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c < -4) 13711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 13721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); 13731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][0] = 0; 13741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf16[i] = 0; 13751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else if (c < 0x80) { 13771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (latin1_encoding.type[c] != BT_OTHER 13781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && latin1_encoding.type[c] != BT_NONXML 13791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && c != i) 13801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 13811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = latin1_encoding.type[c]; 13821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][0] = 1; 13831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][1] = (char)c; 13841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); 13851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else if (checkCharRefNumber(c) < 0) { 13871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = BT_NONXML; 13881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* This shouldn't really get used. */ 13891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf16[i] = 0xFFFF; 13901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][0] = 1; 13911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][1] = 0; 13921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 13931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else { 13941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c > 0xFFFF) 13951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return 0; 13961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) 13971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = BT_NMSTRT; 13981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) 13991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = BT_NAME; 14001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else 14011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.type[i] = BT_OTHER; 14021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); 14031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->utf16[i] = (unsigned short)c; 14041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 14051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 14061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->userData = userData; 14071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->convert = convert; 14081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (convert) { 14091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isName2 = unknown_isName; 14101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isName3 = unknown_isName; 14111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isName4 = unknown_isName; 14121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isNmstrt2 = unknown_isNmstrt; 14131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isNmstrt3 = unknown_isNmstrt; 14141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isNmstrt4 = unknown_isNmstrt; 14151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isInvalid2 = unknown_isInvalid; 14161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isInvalid3 = unknown_isInvalid; 14171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.isInvalid4 = unknown_isInvalid; 14181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 14191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.enc.utf8Convert = unknown_toUtf8; 14201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci e->normal.enc.utf16Convert = unknown_toUtf16; 14211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return &(e->normal.enc); 14221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 14231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* If this enumeration is changed, getEncodingIndex and encodings 14251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccimust also be changed. */ 14261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccienum { 14271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UNKNOWN_ENC = -1, 14281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ISO_8859_1_ENC = 0, 14291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci US_ASCII_ENC, 14301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF_8_ENC, 14311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF_16_ENC, 14321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF_16BE_ENC, 14331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF_16LE_ENC, 14341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* must match encodingNames up to here */ 14351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci NO_ENC 14361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_ISO_8859_1[] = { 14391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, 14401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_MINUS, ASCII_1, '\0' 14411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_US_ASCII[] = { 14431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, 14441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '\0' 14451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_8[] = { 14471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' 14481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_16[] = { 14501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' 14511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_16BE[] = { 14531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, 14541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '\0' 14551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic const char KW_UTF_16LE[] = { 14571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, 14581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '\0' 14591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}; 14601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int FASTCALL 14621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TuccigetEncodingIndex(const char *name) 14631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 14641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci static const char * const encodingNames[] = { 14651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci KW_ISO_8859_1, 14661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci KW_US_ASCII, 14671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci KW_UTF_8, 14681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci KW_UTF_16, 14691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci KW_UTF_16BE, 14701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci KW_UTF_16LE, 14711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci }; 14721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int i; 14731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (name == NULL) 14741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return NO_ENC; 14751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) 14761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (streqci(name, encodingNames[i])) 14771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return i; 14781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return UNKNOWN_ENC; 14791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 14801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* For binary compatibility, we store the index of the encoding 14821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci specified at initialization in the isUtf16 member. 14831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 14841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) 14861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) 14871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/* This is what detects the encoding. encodingTable maps from 14891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of 14901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci the external (protocol) specified encoding; state is 14911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci XML_CONTENT_STATE if we're parsing an external text entity, and 14921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci XML_PROLOG_STATE otherwise. 14931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci*/ 14941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 14961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccistatic int 14971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciinitScan(const ENCODING * const *encodingTable, 14981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const INIT_ENCODING *enc, 14991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int state, 15001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *ptr, 15011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char *end, 15021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const char **nextTokPtr) 15031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 15041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const ENCODING **encPtr; 15051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 15061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr == end) 15071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_NONE; 15081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci encPtr = enc->encPtr; 15091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr + 1 == end) { 15101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* only a single byte available for auto-detection */ 15111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifndef XML_DTD /* FIXME */ 15121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* a well-formed document entity must have more than one byte */ 15131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (state != XML_CONTENT_STATE) 15141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_PARTIAL; 15151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif 15161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* so we're parsing an external text entity... */ 15171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* if UTF-16 was externally specified, then we need at least 2 bytes */ 15181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch (INIT_ENC_INDEX(enc)) { 15191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case UTF_16_ENC: 15201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case UTF_16LE_ENC: 15211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case UTF_16BE_ENC: 15221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_PARTIAL; 15231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 15241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch ((unsigned char)*ptr) { 15251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFE: 15261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFF: 15271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xEF: /* possibly first byte of UTF-8 BOM */ 15281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 15291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && state == XML_CONTENT_STATE) 15301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* fall through */ 15321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x00: 15331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x3C: 15341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_PARTIAL; 15351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 15361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 15371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else { 15381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { 15391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFEFF: 15401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 15411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && state == XML_CONTENT_STATE) 15421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr + 2; 15441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[UTF_16BE_ENC]; 15451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_BOM; 15461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* 00 3C is handled in the default case */ 15471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0x3C00: 15481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC 15491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || INIT_ENC_INDEX(enc) == UTF_16_ENC) 15501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && state == XML_CONTENT_STATE) 15511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[UTF_16LE_ENC]; 15531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 15541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xFFFE: 15551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 15561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci && state == XML_CONTENT_STATE) 15571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr + 2; 15591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[UTF_16LE_ENC]; 15601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_BOM; 15611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case 0xEFBB: 15621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* Maybe a UTF-8 BOM (EF BB BF) */ 15631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* If there's an explicitly specified (external) encoding 15641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci of ISO-8859-1 or some flavour of UTF-16 15651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci and this is an external text entity, 15661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci don't look for the BOM, 15671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci because it might be a legal data. 15681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci */ 15691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (state == XML_CONTENT_STATE) { 15701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int e = INIT_ENC_INDEX(enc); 15711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC 15721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci || e == UTF_16LE_ENC || e == UTF_16_ENC) 15731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 15751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr + 2 == end) 15761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_PARTIAL; 15771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if ((unsigned char)ptr[2] == 0xBF) { 15781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *nextTokPtr = ptr + 3; 15791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[UTF_8_ENC]; 15801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XML_TOK_BOM; 15811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 15821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci default: 15841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (ptr[0] == '\0') { 15851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* 0 isn't a legal data character. Furthermore a document 15861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci entity can only start with ASCII characters. So the only 15871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci way this can fail to be big-endian UTF-16 if it it's an 15881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci external parsed general entity that's labelled as 15891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UTF-16LE. 15901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci */ 15911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) 15921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 15931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[UTF_16BE_ENC]; 15941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 15951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 15961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else if (ptr[1] == '\0') { 15971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci /* We could recover here in the case: 15981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci - parsing an external entity 15991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci - second byte is 0 16001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci - no externally specified encoding 16011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci - no encoding declaration 16021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci by assuming UTF-16LE. But we don't, because this would mean when 16031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci presented just with a single byte, we couldn't reliably determine 16041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci whether we needed further bytes. 16051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci */ 16061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (state == XML_CONTENT_STATE) 16071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 16081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[UTF_16LE_ENC]; 16091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 16101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 16111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 16121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 16131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 16141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; 16151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 16161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 16171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define NS(x) x 16201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define ns(x) x 16211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_NS_C 16221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_ns.c" 16231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_NS_C 16241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef NS 16251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef ns 16261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifdef XML_NS 16281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define NS(x) x ## NS 16301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define ns(x) x ## _ns 16311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define XML_TOK_NS_C 16331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "xmltok_ns.c" 16341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef XML_TOK_NS_C 16351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef NS 16371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#undef ns 16381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciENCODING * 16401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciXmlInitUnknownEncodingNS(void *mem, 16411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci int *table, 16421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci CONVERTER convert, 16431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci void *userData) 16441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci{ 16451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); 16461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (enc) 16471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; 16481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return enc; 16491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 16501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 16511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif /* XML_NS */ 1652