1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 2000-2016, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   file name:  ucnvisci.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2001JUN26
14*   created by: Ram Viswanadha
15*
16*   Date        Name        Description
17*   24/7/2001   Ram         Added support for EXT character handling
18*/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
23
24#include "unicode/ucnv.h"
25#include "unicode/ucnv_cb.h"
26#include "unicode/utf16.h"
27#include "cmemory.h"
28#include "ucnv_bld.h"
29#include "ucnv_cnv.h"
30#include "cstring.h"
31#include "uassert.h"
32
33#define UCNV_OPTIONS_VERSION_MASK 0xf
34#define NUKTA               0x093c
35#define HALANT              0x094d
36#define ZWNJ                0x200c /* Zero Width Non Joiner */
37#define ZWJ                 0x200d /* Zero width Joiner */
38#define INVALID_CHAR        0xffff
39#define ATR                 0xEF   /* Attribute code */
40#define EXT                 0xF0   /* Extension code */
41#define DANDA               0x0964
42#define DOUBLE_DANDA        0x0965
43#define ISCII_NUKTA         0xE9
44#define ISCII_HALANT        0xE8
45#define ISCII_DANDA         0xEA
46#define ISCII_INV           0xD9
47#define ISCII_VOWEL_SIGN_E  0xE0
48#define INDIC_BLOCK_BEGIN   0x0900
49#define INDIC_BLOCK_END     0x0D7F
50#define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
51#define VOCALLIC_RR         0x0931
52#define LF                  0x0A
53#define ASCII_END           0xA0
54#define NO_CHAR_MARKER      0xFFFE
55#define TELUGU_DELTA        DELTA * TELUGU
56#define DEV_ABBR_SIGN       0x0970
57#define DEV_ANUDATTA        0x0952
58#define EXT_RANGE_BEGIN     0xA1
59#define EXT_RANGE_END       0xEE
60
61#define PNJ_DELTA           0x0100
62#define PNJ_BINDI           0x0A02
63#define PNJ_TIPPI           0x0A70
64#define PNJ_SIGN_VIRAMA     0x0A4D
65#define PNJ_ADHAK           0x0A71
66#define PNJ_HA              0x0A39
67#define PNJ_RRA             0x0A5C
68
69typedef enum {
70    DEVANAGARI =0,
71    BENGALI,
72    GURMUKHI,
73    GUJARATI,
74    ORIYA,
75    TAMIL,
76    TELUGU,
77    KANNADA,
78    MALAYALAM,
79    DELTA=0x80
80}UniLang;
81
82/**
83 * Enumeration for switching code pages if <ATR>+<one of below values>
84 * is encountered
85 */
86typedef enum {
87    DEF = 0x40,
88    RMN = 0x41,
89    DEV = 0x42,
90    BNG = 0x43,
91    TML = 0x44,
92    TLG = 0x45,
93    ASM = 0x46,
94    ORI = 0x47,
95    KND = 0x48,
96    MLM = 0x49,
97    GJR = 0x4A,
98    PNJ = 0x4B,
99    ARB = 0x71,
100    PES = 0x72,
101    URD = 0x73,
102    SND = 0x74,
103    KSM = 0x75,
104    PST = 0x76
105}ISCIILang;
106
107typedef enum {
108    DEV_MASK =0x80,
109    PNJ_MASK =0x40,
110    GJR_MASK =0x20,
111    ORI_MASK =0x10,
112    BNG_MASK =0x08,
113    KND_MASK =0x04,
114    MLM_MASK =0x02,
115    TML_MASK =0x01,
116    ZERO =0x00
117}MaskEnum;
118
119#define ISCII_CNV_PREFIX "ISCII,version="
120
121typedef struct {
122    UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
123    UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
124    uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
125    uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
126    uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
127    MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
128    MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
129    MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
130    UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
131    UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
132    char name[sizeof(ISCII_CNV_PREFIX) + 1];
133    UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
134} UConverterDataISCII;
135
136typedef struct LookupDataStruct {
137    UniLang uniLang;
138    MaskEnum maskEnum;
139    ISCIILang isciiLang;
140} LookupDataStruct;
141
142static const LookupDataStruct lookupInitialData[]={
143    { DEVANAGARI, DEV_MASK,  DEV },
144    { BENGALI,    BNG_MASK,  BNG },
145    { GURMUKHI,   PNJ_MASK,  PNJ },
146    { GUJARATI,   GJR_MASK,  GJR },
147    { ORIYA,      ORI_MASK,  ORI },
148    { TAMIL,      TML_MASK,  TML },
149    { TELUGU,     KND_MASK,  TLG },
150    { KANNADA,    KND_MASK,  KND },
151    { MALAYALAM,  MLM_MASK,  MLM }
152};
153
154/*
155 * For special handling of certain Gurmukhi characters.
156 * Bit 0 (value 1): PNJ consonant
157 * Bit 1 (value 2): PNJ Bindi Tippi
158 */
159static const uint8_t pnjMap[80] = {
160    /* 0A00..0A0F */
161    0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
162    /* 0A10..0A1F */
163    0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
164    /* 0A20..0A2F */
165    3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
166    /* 0A30..0A3F */
167    3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
168    /* 0A40..0A4F */
169    0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
170};
171
172static UBool
173isPNJConsonant(UChar32 c) {
174    if (c < 0xa00 || 0xa50 <= c) {
175        return FALSE;
176    } else {
177        return (UBool)(pnjMap[c - 0xa00] & 1);
178    }
179}
180
181static UBool
182isPNJBindiTippi(UChar32 c) {
183    if (c < 0xa00 || 0xa50 <= c) {
184        return FALSE;
185    } else {
186        return (UBool)(pnjMap[c - 0xa00] >> 1);
187    }
188}
189
190static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
191    if(pArgs->onlyTestIsLoadable) {
192        return;
193    }
194
195    cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
196
197    if (cnv->extraInfo != NULL) {
198        int32_t len=0;
199        UConverterDataISCII *converterData=
200                (UConverterDataISCII *) cnv->extraInfo;
201        converterData->contextCharToUnicode=NO_CHAR_MARKER;
202        cnv->toUnicodeStatus = missingCharMarker;
203        converterData->contextCharFromUnicode=0x0000;
204        converterData->resetToDefaultToUnicode=FALSE;
205        /* check if the version requested is supported */
206        if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
207            /* initialize state variables */
208            converterData->currentDeltaFromUnicode
209                    = converterData->currentDeltaToUnicode
210                            = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
211
212            converterData->currentMaskFromUnicode
213                    = converterData->currentMaskToUnicode
214                            = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
215
216            converterData->isFirstBuffer=TRUE;
217            (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
218            len = (int32_t)uprv_strlen(converterData->name);
219            converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
220            converterData->name[len+1]=0;
221
222            converterData->prevToUnicodeStatus = 0x0000;
223        } else {
224            uprv_free(cnv->extraInfo);
225            cnv->extraInfo = NULL;
226            *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
227        }
228
229    } else {
230        *errorCode =U_MEMORY_ALLOCATION_ERROR;
231    }
232}
233
234static void _ISCIIClose(UConverter *cnv) {
235    if (cnv->extraInfo!=NULL) {
236        if (!cnv->isExtraLocal) {
237            uprv_free(cnv->extraInfo);
238        }
239        cnv->extraInfo=NULL;
240    }
241}
242
243static const char* _ISCIIgetName(const UConverter* cnv) {
244    if (cnv->extraInfo) {
245        UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
246        return myData->name;
247    }
248    return NULL;
249}
250
251static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
252    UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
253    if (choice<=UCNV_RESET_TO_UNICODE) {
254        cnv->toUnicodeStatus = missingCharMarker;
255        cnv->mode=0;
256        data->currentDeltaToUnicode=data->defDeltaToUnicode;
257        data->currentMaskToUnicode = data->defMaskToUnicode;
258        data->contextCharToUnicode=NO_CHAR_MARKER;
259        data->prevToUnicodeStatus = 0x0000;
260    }
261    if (choice!=UCNV_RESET_TO_UNICODE) {
262        cnv->fromUChar32=0x0000;
263        data->contextCharFromUnicode=0x00;
264        data->currentMaskFromUnicode=data->defMaskToUnicode;
265        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
266        data->isFirstBuffer=TRUE;
267        data->resetToDefaultToUnicode=FALSE;
268    }
269}
270
271/**
272 * The values in validity table are indexed by the lower bits of Unicode
273 * range 0x0900 - 0x09ff. The values have a structure like:
274 *       ---------------------------------------------------------------
275 *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
276 *      |       |       |       |       | ASM   | KND   |       |       |
277 *       ---------------------------------------------------------------
278 * If a code point is valid in a particular script
279 * then that bit is turned on
280 *
281 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
282 * to represent these languages
283 *
284 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
285 * and combine and use 1 bit to represent these languages.
286 *
287 * TODO: It is probably easier to understand and maintain to change this
288 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
289 */
290
291static const uint8_t validityTable[128] = {
292/* This state table is tool generated please do not edit unless you know exactly what you are doing */
293/* Note: This table was edited to mirror the Windows XP implementation */
294/*ISCII:Valid:Unicode */
295/*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
296/*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
297/*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
298/*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
299/*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
300/*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
301/*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
302/*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
303/*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
304/*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
305/*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
306/*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
307/*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
308/*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
309/*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
310/*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
311/*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
312/*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
313/*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
314/*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
315/*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316/*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
317/*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
318/*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
319/*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
320/*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321/*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
322/*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
323/*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
324/*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
325/*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
326/*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
327/*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
328/*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
329/*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
330/*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
331/*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
332/*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
333/*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
334/*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
335/*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
336/*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
337/*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
338/*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
339/*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
340/*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
341/*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
342/*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
343/*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
344/*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
345/*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
346/*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
347/*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
348/*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
349/*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
350/*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
351/*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
352/*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
353/*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
354/*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
355/*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
356/*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
357/*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
358/*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
359/*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
360/*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
361/*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
362/*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
363/*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
364/*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
365/*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
366/*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
367/*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
368/*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
369/*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
370/*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
371/*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
372/*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
373/*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
374/*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
375/*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
376/*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
377/*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
378/*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
379/*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
380/*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
381/*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
382/*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
383/*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
384/*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
385/*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
386/*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
387/*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
388/*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
389/*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
390/*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
391/*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
392/*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
393/*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
394/*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
395/*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
396/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
397/*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
398/*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
399/*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
400/*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
401/*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
402/*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
403/*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
404/*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
405/*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
406/*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
407/*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
408/*
409 * The length of the array is 128 to provide values for 0x900..0x97f.
410 * The last 15 entries for 0x971..0x97f of the validity table are all zero
411 * because no Indic script uses such Unicode code points.
412 */
413/*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
414};
415
416static const uint16_t fromUnicodeTable[128]={
417    0x00a0 ,/* 0x0900 */
418    0x00a1 ,/* 0x0901 */
419    0x00a2 ,/* 0x0902 */
420    0x00a3 ,/* 0x0903 */
421    0xa4e0 ,/* 0x0904 */
422    0x00a4 ,/* 0x0905 */
423    0x00a5 ,/* 0x0906 */
424    0x00a6 ,/* 0x0907 */
425    0x00a7 ,/* 0x0908 */
426    0x00a8 ,/* 0x0909 */
427    0x00a9 ,/* 0x090a */
428    0x00aa ,/* 0x090b */
429    0xA6E9 ,/* 0x090c */
430    0x00ae ,/* 0x090d */
431    0x00ab ,/* 0x090e */
432    0x00ac ,/* 0x090f */
433    0x00ad ,/* 0x0910 */
434    0x00b2 ,/* 0x0911 */
435    0x00af ,/* 0x0912 */
436    0x00b0 ,/* 0x0913 */
437    0x00b1 ,/* 0x0914 */
438    0x00b3 ,/* 0x0915 */
439    0x00b4 ,/* 0x0916 */
440    0x00b5 ,/* 0x0917 */
441    0x00b6 ,/* 0x0918 */
442    0x00b7 ,/* 0x0919 */
443    0x00b8 ,/* 0x091a */
444    0x00b9 ,/* 0x091b */
445    0x00ba ,/* 0x091c */
446    0x00bb ,/* 0x091d */
447    0x00bc ,/* 0x091e */
448    0x00bd ,/* 0x091f */
449    0x00be ,/* 0x0920 */
450    0x00bf ,/* 0x0921 */
451    0x00c0 ,/* 0x0922 */
452    0x00c1 ,/* 0x0923 */
453    0x00c2 ,/* 0x0924 */
454    0x00c3 ,/* 0x0925 */
455    0x00c4 ,/* 0x0926 */
456    0x00c5 ,/* 0x0927 */
457    0x00c6 ,/* 0x0928 */
458    0x00c7 ,/* 0x0929 */
459    0x00c8 ,/* 0x092a */
460    0x00c9 ,/* 0x092b */
461    0x00ca ,/* 0x092c */
462    0x00cb ,/* 0x092d */
463    0x00cc ,/* 0x092e */
464    0x00cd ,/* 0x092f */
465    0x00cf ,/* 0x0930 */
466    0x00d0 ,/* 0x0931 */
467    0x00d1 ,/* 0x0932 */
468    0x00d2 ,/* 0x0933 */
469    0x00d3 ,/* 0x0934 */
470    0x00d4 ,/* 0x0935 */
471    0x00d5 ,/* 0x0936 */
472    0x00d6 ,/* 0x0937 */
473    0x00d7 ,/* 0x0938 */
474    0x00d8 ,/* 0x0939 */
475    0xFFFF ,/* 0x093A */
476    0xFFFF ,/* 0x093B */
477    0x00e9 ,/* 0x093c */
478    0xEAE9 ,/* 0x093d */
479    0x00da ,/* 0x093e */
480    0x00db ,/* 0x093f */
481    0x00dc ,/* 0x0940 */
482    0x00dd ,/* 0x0941 */
483    0x00de ,/* 0x0942 */
484    0x00df ,/* 0x0943 */
485    0xDFE9 ,/* 0x0944 */
486    0x00e3 ,/* 0x0945 */
487    0x00e0 ,/* 0x0946 */
488    0x00e1 ,/* 0x0947 */
489    0x00e2 ,/* 0x0948 */
490    0x00e7 ,/* 0x0949 */
491    0x00e4 ,/* 0x094a */
492    0x00e5 ,/* 0x094b */
493    0x00e6 ,/* 0x094c */
494    0x00e8 ,/* 0x094d */
495    0x00ec ,/* 0x094e */
496    0x00ed ,/* 0x094f */
497    0xA1E9 ,/* 0x0950 */ /* OM Symbol */
498    0xFFFF ,/* 0x0951 */
499    0xF0B8 ,/* 0x0952 */
500    0xFFFF ,/* 0x0953 */
501    0xFFFF ,/* 0x0954 */
502    0xFFFF ,/* 0x0955 */
503    0xFFFF ,/* 0x0956 */
504    0xFFFF ,/* 0x0957 */
505    0xb3e9 ,/* 0x0958 */
506    0xb4e9 ,/* 0x0959 */
507    0xb5e9 ,/* 0x095a */
508    0xbae9 ,/* 0x095b */
509    0xbfe9 ,/* 0x095c */
510    0xC0E9 ,/* 0x095d */
511    0xc9e9 ,/* 0x095e */
512    0x00ce ,/* 0x095f */
513    0xAAe9 ,/* 0x0960 */
514    0xA7E9 ,/* 0x0961 */
515    0xDBE9 ,/* 0x0962 */
516    0xDCE9 ,/* 0x0963 */
517    0x00ea ,/* 0x0964 */
518    0xeaea ,/* 0x0965 */
519    0x00f1 ,/* 0x0966 */
520    0x00f2 ,/* 0x0967 */
521    0x00f3 ,/* 0x0968 */
522    0x00f4 ,/* 0x0969 */
523    0x00f5 ,/* 0x096a */
524    0x00f6 ,/* 0x096b */
525    0x00f7 ,/* 0x096c */
526    0x00f8 ,/* 0x096d */
527    0x00f9 ,/* 0x096e */
528    0x00fa ,/* 0x096f */
529    0xF0BF ,/* 0x0970 */
530    0xFFFF ,/* 0x0971 */
531    0xFFFF ,/* 0x0972 */
532    0xFFFF ,/* 0x0973 */
533    0xFFFF ,/* 0x0974 */
534    0xFFFF ,/* 0x0975 */
535    0xFFFF ,/* 0x0976 */
536    0xFFFF ,/* 0x0977 */
537    0xFFFF ,/* 0x0978 */
538    0xFFFF ,/* 0x0979 */
539    0xFFFF ,/* 0x097a */
540    0xFFFF ,/* 0x097b */
541    0xFFFF ,/* 0x097c */
542    0xFFFF ,/* 0x097d */
543    0xFFFF ,/* 0x097e */
544    0xFFFF ,/* 0x097f */
545};
546static const uint16_t toUnicodeTable[256]={
547    0x0000,/* 0x00 */
548    0x0001,/* 0x01 */
549    0x0002,/* 0x02 */
550    0x0003,/* 0x03 */
551    0x0004,/* 0x04 */
552    0x0005,/* 0x05 */
553    0x0006,/* 0x06 */
554    0x0007,/* 0x07 */
555    0x0008,/* 0x08 */
556    0x0009,/* 0x09 */
557    0x000a,/* 0x0a */
558    0x000b,/* 0x0b */
559    0x000c,/* 0x0c */
560    0x000d,/* 0x0d */
561    0x000e,/* 0x0e */
562    0x000f,/* 0x0f */
563    0x0010,/* 0x10 */
564    0x0011,/* 0x11 */
565    0x0012,/* 0x12 */
566    0x0013,/* 0x13 */
567    0x0014,/* 0x14 */
568    0x0015,/* 0x15 */
569    0x0016,/* 0x16 */
570    0x0017,/* 0x17 */
571    0x0018,/* 0x18 */
572    0x0019,/* 0x19 */
573    0x001a,/* 0x1a */
574    0x001b,/* 0x1b */
575    0x001c,/* 0x1c */
576    0x001d,/* 0x1d */
577    0x001e,/* 0x1e */
578    0x001f,/* 0x1f */
579    0x0020,/* 0x20 */
580    0x0021,/* 0x21 */
581    0x0022,/* 0x22 */
582    0x0023,/* 0x23 */
583    0x0024,/* 0x24 */
584    0x0025,/* 0x25 */
585    0x0026,/* 0x26 */
586    0x0027,/* 0x27 */
587    0x0028,/* 0x28 */
588    0x0029,/* 0x29 */
589    0x002a,/* 0x2a */
590    0x002b,/* 0x2b */
591    0x002c,/* 0x2c */
592    0x002d,/* 0x2d */
593    0x002e,/* 0x2e */
594    0x002f,/* 0x2f */
595    0x0030,/* 0x30 */
596    0x0031,/* 0x31 */
597    0x0032,/* 0x32 */
598    0x0033,/* 0x33 */
599    0x0034,/* 0x34 */
600    0x0035,/* 0x35 */
601    0x0036,/* 0x36 */
602    0x0037,/* 0x37 */
603    0x0038,/* 0x38 */
604    0x0039,/* 0x39 */
605    0x003A,/* 0x3A */
606    0x003B,/* 0x3B */
607    0x003c,/* 0x3c */
608    0x003d,/* 0x3d */
609    0x003e,/* 0x3e */
610    0x003f,/* 0x3f */
611    0x0040,/* 0x40 */
612    0x0041,/* 0x41 */
613    0x0042,/* 0x42 */
614    0x0043,/* 0x43 */
615    0x0044,/* 0x44 */
616    0x0045,/* 0x45 */
617    0x0046,/* 0x46 */
618    0x0047,/* 0x47 */
619    0x0048,/* 0x48 */
620    0x0049,/* 0x49 */
621    0x004a,/* 0x4a */
622    0x004b,/* 0x4b */
623    0x004c,/* 0x4c */
624    0x004d,/* 0x4d */
625    0x004e,/* 0x4e */
626    0x004f,/* 0x4f */
627    0x0050,/* 0x50 */
628    0x0051,/* 0x51 */
629    0x0052,/* 0x52 */
630    0x0053,/* 0x53 */
631    0x0054,/* 0x54 */
632    0x0055,/* 0x55 */
633    0x0056,/* 0x56 */
634    0x0057,/* 0x57 */
635    0x0058,/* 0x58 */
636    0x0059,/* 0x59 */
637    0x005a,/* 0x5a */
638    0x005b,/* 0x5b */
639    0x005c,/* 0x5c */
640    0x005d,/* 0x5d */
641    0x005e,/* 0x5e */
642    0x005f,/* 0x5f */
643    0x0060,/* 0x60 */
644    0x0061,/* 0x61 */
645    0x0062,/* 0x62 */
646    0x0063,/* 0x63 */
647    0x0064,/* 0x64 */
648    0x0065,/* 0x65 */
649    0x0066,/* 0x66 */
650    0x0067,/* 0x67 */
651    0x0068,/* 0x68 */
652    0x0069,/* 0x69 */
653    0x006a,/* 0x6a */
654    0x006b,/* 0x6b */
655    0x006c,/* 0x6c */
656    0x006d,/* 0x6d */
657    0x006e,/* 0x6e */
658    0x006f,/* 0x6f */
659    0x0070,/* 0x70 */
660    0x0071,/* 0x71 */
661    0x0072,/* 0x72 */
662    0x0073,/* 0x73 */
663    0x0074,/* 0x74 */
664    0x0075,/* 0x75 */
665    0x0076,/* 0x76 */
666    0x0077,/* 0x77 */
667    0x0078,/* 0x78 */
668    0x0079,/* 0x79 */
669    0x007a,/* 0x7a */
670    0x007b,/* 0x7b */
671    0x007c,/* 0x7c */
672    0x007d,/* 0x7d */
673    0x007e,/* 0x7e */
674    0x007f,/* 0x7f */
675    0x0080,/* 0x80 */
676    0x0081,/* 0x81 */
677    0x0082,/* 0x82 */
678    0x0083,/* 0x83 */
679    0x0084,/* 0x84 */
680    0x0085,/* 0x85 */
681    0x0086,/* 0x86 */
682    0x0087,/* 0x87 */
683    0x0088,/* 0x88 */
684    0x0089,/* 0x89 */
685    0x008a,/* 0x8a */
686    0x008b,/* 0x8b */
687    0x008c,/* 0x8c */
688    0x008d,/* 0x8d */
689    0x008e,/* 0x8e */
690    0x008f,/* 0x8f */
691    0x0090,/* 0x90 */
692    0x0091,/* 0x91 */
693    0x0092,/* 0x92 */
694    0x0093,/* 0x93 */
695    0x0094,/* 0x94 */
696    0x0095,/* 0x95 */
697    0x0096,/* 0x96 */
698    0x0097,/* 0x97 */
699    0x0098,/* 0x98 */
700    0x0099,/* 0x99 */
701    0x009a,/* 0x9a */
702    0x009b,/* 0x9b */
703    0x009c,/* 0x9c */
704    0x009d,/* 0x9d */
705    0x009e,/* 0x9e */
706    0x009f,/* 0x9f */
707    0x00A0,/* 0xa0 */
708    0x0901,/* 0xa1 */
709    0x0902,/* 0xa2 */
710    0x0903,/* 0xa3 */
711    0x0905,/* 0xa4 */
712    0x0906,/* 0xa5 */
713    0x0907,/* 0xa6 */
714    0x0908,/* 0xa7 */
715    0x0909,/* 0xa8 */
716    0x090a,/* 0xa9 */
717    0x090b,/* 0xaa */
718    0x090e,/* 0xab */
719    0x090f,/* 0xac */
720    0x0910,/* 0xad */
721    0x090d,/* 0xae */
722    0x0912,/* 0xaf */
723    0x0913,/* 0xb0 */
724    0x0914,/* 0xb1 */
725    0x0911,/* 0xb2 */
726    0x0915,/* 0xb3 */
727    0x0916,/* 0xb4 */
728    0x0917,/* 0xb5 */
729    0x0918,/* 0xb6 */
730    0x0919,/* 0xb7 */
731    0x091a,/* 0xb8 */
732    0x091b,/* 0xb9 */
733    0x091c,/* 0xba */
734    0x091d,/* 0xbb */
735    0x091e,/* 0xbc */
736    0x091f,/* 0xbd */
737    0x0920,/* 0xbe */
738    0x0921,/* 0xbf */
739    0x0922,/* 0xc0 */
740    0x0923,/* 0xc1 */
741    0x0924,/* 0xc2 */
742    0x0925,/* 0xc3 */
743    0x0926,/* 0xc4 */
744    0x0927,/* 0xc5 */
745    0x0928,/* 0xc6 */
746    0x0929,/* 0xc7 */
747    0x092a,/* 0xc8 */
748    0x092b,/* 0xc9 */
749    0x092c,/* 0xca */
750    0x092d,/* 0xcb */
751    0x092e,/* 0xcc */
752    0x092f,/* 0xcd */
753    0x095f,/* 0xce */
754    0x0930,/* 0xcf */
755    0x0931,/* 0xd0 */
756    0x0932,/* 0xd1 */
757    0x0933,/* 0xd2 */
758    0x0934,/* 0xd3 */
759    0x0935,/* 0xd4 */
760    0x0936,/* 0xd5 */
761    0x0937,/* 0xd6 */
762    0x0938,/* 0xd7 */
763    0x0939,/* 0xd8 */
764    0x200D,/* 0xd9 */
765    0x093e,/* 0xda */
766    0x093f,/* 0xdb */
767    0x0940,/* 0xdc */
768    0x0941,/* 0xdd */
769    0x0942,/* 0xde */
770    0x0943,/* 0xdf */
771    0x0946,/* 0xe0 */
772    0x0947,/* 0xe1 */
773    0x0948,/* 0xe2 */
774    0x0945,/* 0xe3 */
775    0x094a,/* 0xe4 */
776    0x094b,/* 0xe5 */
777    0x094c,/* 0xe6 */
778    0x0949,/* 0xe7 */
779    0x094d,/* 0xe8 */
780    0x093c,/* 0xe9 */
781    0x0964,/* 0xea */
782    0xFFFF,/* 0xeb */
783    0xFFFF,/* 0xec */
784    0xFFFF,/* 0xed */
785    0xFFFF,/* 0xee */
786    0xFFFF,/* 0xef */
787    0xFFFF,/* 0xf0 */
788    0x0966,/* 0xf1 */
789    0x0967,/* 0xf2 */
790    0x0968,/* 0xf3 */
791    0x0969,/* 0xf4 */
792    0x096a,/* 0xf5 */
793    0x096b,/* 0xf6 */
794    0x096c,/* 0xf7 */
795    0x096d,/* 0xf8 */
796    0x096e,/* 0xf9 */
797    0x096f,/* 0xfa */
798    0xFFFF,/* 0xfb */
799    0xFFFF,/* 0xfc */
800    0xFFFF,/* 0xfd */
801    0xFFFF,/* 0xfe */
802    0xFFFF /* 0xff */
803};
804
805static const uint16_t vowelSignESpecialCases[][2]={
806	{ 2 /*length of array*/    , 0      },
807	{ 0xA4 , 0x0904 },
808};
809
810static const uint16_t nuktaSpecialCases[][2]={
811    { 16 /*length of array*/   , 0      },
812    { 0xA6 , 0x090c },
813    { 0xEA , 0x093D },
814    { 0xDF , 0x0944 },
815    { 0xA1 , 0x0950 },
816    { 0xb3 , 0x0958 },
817    { 0xb4 , 0x0959 },
818    { 0xb5 , 0x095a },
819    { 0xba , 0x095b },
820    { 0xbf , 0x095c },
821    { 0xC0 , 0x095d },
822    { 0xc9 , 0x095e },
823    { 0xAA , 0x0960 },
824    { 0xA7 , 0x0961 },
825    { 0xDB , 0x0962 },
826    { 0xDC , 0x0963 },
827};
828
829
830#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
831    int32_t offset = (int32_t)(source - args->source-1);                                        \
832      /* write the targetUniChar  to target */                                                  \
833    if(target < targetLimit){                                                                   \
834        if(targetByteUnit <= 0xFF){                                                             \
835            *(target)++ = (uint8_t)(targetByteUnit);                                            \
836            if(offsets){                                                                        \
837                *(offsets++) = offset;                                                          \
838            }                                                                                   \
839        }else{                                                                                  \
840            if (targetByteUnit > 0xFFFF) {                                                      \
841                *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
842                if (offsets) {                                                                  \
843                    --offset;                                                                   \
844                    *(offsets++) = offset;                                                      \
845                }                                                                               \
846            }                                                                                   \
847            if (!(target < targetLimit)) {                                                      \
848                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
849                                (uint8_t)(targetByteUnit >> 8);                                 \
850                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
851                                (uint8_t)targetByteUnit;                                        \
852                *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
853            } else {                                                                            \
854                *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
855                if(offsets){                                                                    \
856                    *(offsets++) = offset;                                                      \
857                }                                                                               \
858                if(target < targetLimit){                                                       \
859                    *(target)++ = (uint8_t)  targetByteUnit;                                    \
860                    if(offsets){                                                                \
861                        *(offsets++) = offset                            ;                      \
862                    }                                                                           \
863                }else{                                                                          \
864                    args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
865                                (uint8_t) (targetByteUnit);                                     \
866                    *err = U_BUFFER_OVERFLOW_ERROR;                                             \
867                }                                                                               \
868            }                                                                                   \
869        }                                                                                       \
870    }else{                                                                                      \
871        if (targetByteUnit & 0xFF0000) {                                                        \
872            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
873                        (uint8_t) (targetByteUnit >>16);                                        \
874        }                                                                                       \
875        if(targetByteUnit & 0xFF00){                                                            \
876            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
877                        (uint8_t) (targetByteUnit >>8);                                         \
878        }                                                                                       \
879        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
880                        (uint8_t) (targetByteUnit);                                             \
881        *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
882    }                                                                                           \
883}
884
885/* Rules:
886 *    Explicit Halant :
887 *                      <HALANT> + <ZWNJ>
888 *    Soft Halant :
889 *                      <HALANT> + <ZWJ>
890 */
891
892static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
893        UConverterFromUnicodeArgs * args, UErrorCode * err) {
894    const UChar *source = args->source;
895    const UChar *sourceLimit = args->sourceLimit;
896    unsigned char *target = (unsigned char *) args->target;
897    unsigned char *targetLimit = (unsigned char *) args->targetLimit;
898    int32_t* offsets = args->offsets;
899    uint32_t targetByteUnit = 0x0000;
900    UChar32 sourceChar = 0x0000;
901    UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
902    UConverterDataISCII *converterData;
903    uint16_t newDelta=0;
904    uint16_t range = 0;
905    UBool deltaChanged = FALSE;
906
907    if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
908        *err = U_ILLEGAL_ARGUMENT_ERROR;
909        return;
910    }
911    /* initialize data */
912    converterData=(UConverterDataISCII*)args->converter->extraInfo;
913    newDelta=converterData->currentDeltaFromUnicode;
914    range = (uint16_t)(newDelta/DELTA);
915
916    if ((sourceChar = args->converter->fromUChar32)!=0) {
917        goto getTrail;
918    }
919
920    /*writing the char to the output stream */
921    while (source < sourceLimit) {
922        /* Write the language code following LF only if LF is not the last character. */
923        if (args->converter->fromUnicodeStatus == LF) {
924            targetByteUnit = ATR<<8;
925            targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
926            args->converter->fromUnicodeStatus = 0x0000;
927            /* now append ATR and language code */
928            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
929            if (U_FAILURE(*err)) {
930                break;
931            }
932        }
933
934        sourceChar = *source++;
935        tempContextFromUnicode = converterData->contextCharFromUnicode;
936
937        targetByteUnit = missingCharMarker;
938
939        /*check if input is in ASCII and C0 control codes range*/
940        if (sourceChar <= ASCII_END) {
941            args->converter->fromUnicodeStatus = sourceChar;
942            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
943            if (U_FAILURE(*err)) {
944                break;
945            }
946            continue;
947        }
948        switch (sourceChar) {
949        case ZWNJ:
950            /* contextChar has HALANT */
951            if (converterData->contextCharFromUnicode) {
952                converterData->contextCharFromUnicode = 0x00;
953                targetByteUnit = ISCII_HALANT;
954            } else {
955                /* consume ZWNJ and continue */
956                converterData->contextCharFromUnicode = 0x00;
957                continue;
958            }
959            break;
960        case ZWJ:
961            /* contextChar has HALANT */
962            if (converterData->contextCharFromUnicode) {
963                targetByteUnit = ISCII_NUKTA;
964            } else {
965                targetByteUnit =ISCII_INV;
966            }
967            converterData->contextCharFromUnicode = 0x00;
968            break;
969        default:
970            /* is the sourceChar in the INDIC_RANGE? */
971            if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
972                /* Danda and Double Danda are valid in Northern scripts.. since Unicode
973                 * does not include these codepoints in all Northern scrips we need to
974                 * filter them out
975                 */
976                if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
977                    /* find out to which block the souceChar belongs*/
978                    range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
979                    newDelta =(uint16_t)(range*DELTA);
980
981                    /* Now are we in the same block as the previous? */
982                    if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
983                        converterData->currentDeltaFromUnicode = newDelta;
984                        converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
985                        deltaChanged =TRUE;
986                        converterData->isFirstBuffer=FALSE;
987                    }
988
989                    if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
990                        if (sourceChar == PNJ_TIPPI) {
991                            /* Make sure Tippi is converterd to Bindi. */
992                            sourceChar = PNJ_BINDI;
993                        } else if (sourceChar == PNJ_ADHAK) {
994                            /* This is for consonant cluster handling. */
995                            converterData->contextCharFromUnicode = PNJ_ADHAK;
996                        }
997
998                    }
999                    /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
1000                    /* now subtract the new delta from sourceChar*/
1001                    sourceChar -= converterData->currentDeltaFromUnicode;
1002                }
1003
1004                /* get the target byte unit */
1005                targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
1006
1007                /* is the code point valid in current script? */
1008                if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
1009                    /* Vocallic RR is assigned in ISCII Telugu and Unicode */
1010                    if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
1011                        targetByteUnit=missingCharMarker;
1012                    }
1013                }
1014
1015                if (deltaChanged) {
1016                    /* we are in a script block which is different than
1017                     * previous sourceChar's script block write ATR and language codes
1018                     */
1019                    uint32_t temp=0;
1020                    temp =(uint16_t)(ATR<<8);
1021                    temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
1022                    /* reset */
1023                    deltaChanged=FALSE;
1024                    /* now append ATR and language code */
1025                    WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
1026                    if (U_FAILURE(*err)) {
1027                        break;
1028                    }
1029                }
1030
1031                if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
1032                    continue;
1033                }
1034            }
1035            /* reset context char */
1036            converterData->contextCharFromUnicode = 0x00;
1037            break;
1038        }
1039        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
1040            /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
1041            /* reset context char */
1042            converterData->contextCharFromUnicode = 0x0000;
1043            targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
1044            /* write targetByteUnit to target */
1045            WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
1046            if (U_FAILURE(*err)) {
1047                break;
1048            }
1049        } else if (targetByteUnit != missingCharMarker) {
1050            if (targetByteUnit==ISCII_HALANT) {
1051                converterData->contextCharFromUnicode = (UChar)targetByteUnit;
1052            }
1053            /* write targetByteUnit to target*/
1054            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
1055            if (U_FAILURE(*err)) {
1056                break;
1057            }
1058        } else {
1059            /* oops.. the code point is unassigned */
1060            /*check if the char is a First surrogate*/
1061            if (U16_IS_SURROGATE(sourceChar)) {
1062                if (U16_IS_SURROGATE_LEAD(sourceChar)) {
1063getTrail:
1064                    /*look ahead to find the trail surrogate*/
1065                    if (source < sourceLimit) {
1066                        /* test the following code unit */
1067                        UChar trail= (*source);
1068                        if (U16_IS_TRAIL(trail)) {
1069                            source++;
1070                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
1071                            *err =U_INVALID_CHAR_FOUND;
1072                            /* convert this surrogate code point */
1073                            /* exit this condition tree */
1074                        } else {
1075                            /* this is an unmatched lead code unit (1st surrogate) */
1076                            /* callback(illegal) */
1077                            *err=U_ILLEGAL_CHAR_FOUND;
1078                        }
1079                    } else {
1080                        /* no more input */
1081                        *err = U_ZERO_ERROR;
1082                    }
1083                } else {
1084                    /* this is an unmatched trail code unit (2nd surrogate) */
1085                    /* callback(illegal) */
1086                    *err=U_ILLEGAL_CHAR_FOUND;
1087                }
1088            } else {
1089                /* callback(unassigned) for a BMP code point */
1090                *err = U_INVALID_CHAR_FOUND;
1091            }
1092
1093            args->converter->fromUChar32=sourceChar;
1094            break;
1095        }
1096    }/* end while(mySourceIndex<mySourceLength) */
1097
1098    /*save the state and return */
1099    args->source = source;
1100    args->target = (char*)target;
1101}
1102
1103static const uint16_t lookupTable[][2]={
1104    { ZERO,       ZERO     },     /*DEFALT*/
1105    { ZERO,       ZERO     },     /*ROMAN*/
1106    { DEVANAGARI, DEV_MASK },
1107    { BENGALI,    BNG_MASK },
1108    { TAMIL,      TML_MASK },
1109    { TELUGU,     KND_MASK },
1110    { BENGALI,    BNG_MASK },
1111    { ORIYA,      ORI_MASK },
1112    { KANNADA,    KND_MASK },
1113    { MALAYALAM,  MLM_MASK },
1114    { GUJARATI,   GJR_MASK },
1115    { GURMUKHI,   PNJ_MASK }
1116};
1117
1118#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1119    /* add offset to current Indic Block */                                              \
1120    if(targetUniChar>ASCII_END &&                                                        \
1121           targetUniChar != ZWJ &&                                                       \
1122           targetUniChar != ZWNJ &&                                                      \
1123           targetUniChar != DANDA &&                                                     \
1124           targetUniChar != DOUBLE_DANDA){                                               \
1125                                                                                         \
1126           targetUniChar+=(uint16_t)(delta);                                             \
1127    }                                                                                    \
1128    /* now write the targetUniChar */                                                    \
1129    if(target<args->targetLimit){                                                        \
1130        *(target)++ = (UChar)targetUniChar;                                              \
1131        if(offsets){                                                                     \
1132            *(offsets)++ = (int32_t)(offset);                                            \
1133        }                                                                                \
1134    }else{                                                                               \
1135        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
1136            (UChar)targetUniChar;                                                        \
1137        *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
1138    }                                                                                    \
1139}
1140
1141#define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
1142    targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
1143    /* is the code point valid in current script? */                                     \
1144    if(sourceChar> ASCII_END &&                                                          \
1145            (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
1146        /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
1147        if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
1148                    targetUniChar!=VOCALLIC_RR){                                         \
1149            targetUniChar=missingCharMarker;                                             \
1150        }                                                                                \
1151    }                                                                                    \
1152}
1153
1154/***********
1155 *  Rules for ISCII to Unicode converter
1156 *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1157 *  which has both precomposed and decomposed forms characters
1158 *  pre-context and post-context need to be considered.
1159 *
1160 *  Post context
1161 *  i)  ATR : Attribute code is used to declare the font and script switching.
1162 *      Currently we only switch scripts and font codes consumed without generating an error
1163 *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1164 *      obsolete characters
1165 *  Pre context
1166 *  i)  Halant: if preceeded by a halant then it is a explicit halant
1167 *  ii) Nukta :
1168 *       a) if preceeded by a halant then it is a soft halant
1169 *       b) if preceeded by specific consonants and the ligatures have pre-composed
1170 *          characters in Unicode then convert to pre-composed characters
1171 *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1172 *
1173 */
1174
1175static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
1176    const char *source = ( char *) args->source;
1177    UChar *target = args->target;
1178    const char *sourceLimit = args->sourceLimit;
1179    const UChar* targetLimit = args->targetLimit;
1180    uint32_t targetUniChar = 0x0000;
1181    uint8_t sourceChar = 0x0000;
1182    UConverterDataISCII* data;
1183    UChar32* toUnicodeStatus=NULL;
1184    UChar32 tempTargetUniChar = 0x0000;
1185    UChar* contextCharToUnicode= NULL;
1186    UBool found;
1187    int i;
1188    int offset = 0;
1189
1190    if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
1191        *err = U_ILLEGAL_ARGUMENT_ERROR;
1192        return;
1193    }
1194
1195    data = (UConverterDataISCII*)(args->converter->extraInfo);
1196    contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1197    toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1198
1199    while (U_SUCCESS(*err) && source<sourceLimit) {
1200
1201        targetUniChar = missingCharMarker;
1202
1203        if (target < targetLimit) {
1204            sourceChar = (unsigned char)*(source)++;
1205
1206            /* look at the post-context preform special processing */
1207            if (*contextCharToUnicode==ATR) {
1208
1209                /* If we have ATR in *contextCharToUnicode then we need to change our
1210                 * state to the Indic Script specified by sourceChar
1211                 */
1212
1213                /* check if the sourceChar is supported script range*/
1214                if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
1215                    data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1216                    data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
1217                } else if (sourceChar==DEF) {
1218                    /* switch back to default */
1219                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
1220                    data->currentMaskToUnicode = data->defMaskToUnicode;
1221                } else {
1222                    if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
1223                        /* these are display codes consume and continue */
1224                    } else {
1225                        *err =U_ILLEGAL_CHAR_FOUND;
1226                        /* reset */
1227                        *contextCharToUnicode=NO_CHAR_MARKER;
1228                        goto CALLBACK;
1229                    }
1230                }
1231
1232                /* reset */
1233                *contextCharToUnicode=NO_CHAR_MARKER;
1234
1235                continue;
1236
1237            } else if (*contextCharToUnicode==EXT) {
1238                /* check if sourceChar is in 0xA1-0xEE range */
1239                if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
1240                    /* We currently support only Anudatta and Devanagari abbreviation sign */
1241                    if (sourceChar==0xBF || sourceChar == 0xB8) {
1242                        targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1243
1244                        /* find out if the mapping is valid in this state */
1245                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1246                            *contextCharToUnicode= NO_CHAR_MARKER;
1247
1248                            /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1249                            if (data->prevToUnicodeStatus) {
1250                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1251                                data->prevToUnicodeStatus = 0x0000;
1252                            }
1253                            /* write to target */
1254                            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1255
1256                            continue;
1257                        }
1258                    }
1259                    /* byte unit is unassigned */
1260                    targetUniChar = missingCharMarker;
1261                    *err= U_INVALID_CHAR_FOUND;
1262                } else {
1263                    /* only 0xA1 - 0xEE are legal after EXT char */
1264                    *contextCharToUnicode= NO_CHAR_MARKER;
1265                    *err = U_ILLEGAL_CHAR_FOUND;
1266                }
1267                goto CALLBACK;
1268            } else if (*contextCharToUnicode==ISCII_INV) {
1269                if (sourceChar==ISCII_HALANT) {
1270                    targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
1271                } else {
1272                    targetUniChar = ZWJ;
1273                }
1274
1275                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1276                if (data->prevToUnicodeStatus) {
1277                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1278                    data->prevToUnicodeStatus = 0x0000;
1279                }
1280                /* write to target */
1281                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1282                /* reset */
1283                *contextCharToUnicode=NO_CHAR_MARKER;
1284            }
1285
1286            /* look at the pre-context and perform special processing */
1287            switch (sourceChar) {
1288            case ISCII_INV:
1289            case EXT:
1290            case ATR:
1291                *contextCharToUnicode = (UChar)sourceChar;
1292
1293                if (*toUnicodeStatus != missingCharMarker) {
1294                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1295                    if (data->prevToUnicodeStatus) {
1296                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1297                        data->prevToUnicodeStatus = 0x0000;
1298                    }
1299                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1300                    *toUnicodeStatus = missingCharMarker;
1301                }
1302                continue;
1303            case ISCII_DANDA:
1304                /* handle double danda*/
1305                if (*contextCharToUnicode== ISCII_DANDA) {
1306                    targetUniChar = DOUBLE_DANDA;
1307                    /* clear the context */
1308                    *contextCharToUnicode = NO_CHAR_MARKER;
1309                    *toUnicodeStatus = missingCharMarker;
1310                } else {
1311                    GET_MAPPING(sourceChar,targetUniChar,data);
1312                    *contextCharToUnicode = sourceChar;
1313                }
1314                break;
1315            case ISCII_HALANT:
1316                /* handle explicit halant */
1317                if (*contextCharToUnicode == ISCII_HALANT) {
1318                    targetUniChar = ZWNJ;
1319                    /* clear the context */
1320                    *contextCharToUnicode = NO_CHAR_MARKER;
1321                } else {
1322                    GET_MAPPING(sourceChar,targetUniChar,data);
1323                    *contextCharToUnicode = sourceChar;
1324                }
1325                break;
1326            case 0x0A:
1327            case 0x0D:
1328                data->resetToDefaultToUnicode = TRUE;
1329                GET_MAPPING(sourceChar,targetUniChar,data)
1330                ;
1331                *contextCharToUnicode = sourceChar;
1332                break;
1333
1334            case ISCII_VOWEL_SIGN_E:
1335                i=1;
1336                found=FALSE;
1337                for (; i<vowelSignESpecialCases[0][0]; i++) {
1338                    U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
1339                    if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
1340                        targetUniChar=vowelSignESpecialCases[i][1];
1341                        found=TRUE;
1342                        break;
1343                    }
1344                }
1345                if (found) {
1346                    /* find out if the mapping is valid in this state */
1347                    if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1348                        /*targetUniChar += data->currentDeltaToUnicode ;*/
1349                        *contextCharToUnicode= NO_CHAR_MARKER;
1350                        *toUnicodeStatus = missingCharMarker;
1351                        break;
1352                    }
1353                }
1354                GET_MAPPING(sourceChar,targetUniChar,data);
1355                *contextCharToUnicode = sourceChar;
1356                break;
1357
1358            case ISCII_NUKTA:
1359                /* handle soft halant */
1360                if (*contextCharToUnicode == ISCII_HALANT) {
1361                    targetUniChar = ZWJ;
1362                    /* clear the context */
1363                    *contextCharToUnicode = NO_CHAR_MARKER;
1364                    break;
1365                } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
1366                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1367                    if (data->prevToUnicodeStatus) {
1368                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1369                        data->prevToUnicodeStatus = 0x0000;
1370                    }
1371                    /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
1372                     * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
1373                     */
1374                    targetUniChar = PNJ_RRA;
1375                    WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1376                    if (U_SUCCESS(*err)) {
1377                        targetUniChar = PNJ_SIGN_VIRAMA;
1378                        WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1379                        if (U_SUCCESS(*err)) {
1380                            targetUniChar = PNJ_HA;
1381                            WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1382                        } else {
1383                            args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1384                        }
1385                    } else {
1386                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
1387                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1388                    }
1389                    *toUnicodeStatus = missingCharMarker;
1390                    data->contextCharToUnicode = NO_CHAR_MARKER;
1391                    continue;
1392                } else {
1393                    /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1394                    i=1;
1395                    found =FALSE;
1396                    for (; i<nuktaSpecialCases[0][0]; i++) {
1397                        if (nuktaSpecialCases[i][0]==(uint8_t)
1398                                *contextCharToUnicode) {
1399                            targetUniChar=nuktaSpecialCases[i][1];
1400                            found =TRUE;
1401                            break;
1402                        }
1403                    }
1404                    if (found) {
1405                        /* find out if the mapping is valid in this state */
1406                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1407                            /*targetUniChar += data->currentDeltaToUnicode ;*/
1408                            *contextCharToUnicode= NO_CHAR_MARKER;
1409                            *toUnicodeStatus = missingCharMarker;
1410                            if (data->currentDeltaToUnicode == PNJ_DELTA) {
1411                                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1412                                if (data->prevToUnicodeStatus) {
1413                                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1414                                    data->prevToUnicodeStatus = 0x0000;
1415                                }
1416                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1417                                continue;
1418                            }
1419                            break;
1420                        }
1421                        /* else fall through to default */
1422                    }
1423                    /* else fall through to default */
1424                    U_FALLTHROUGH;
1425                }
1426            default:GET_MAPPING(sourceChar,targetUniChar,data)
1427                ;
1428                *contextCharToUnicode = sourceChar;
1429                break;
1430            }
1431
1432            if (*toUnicodeStatus != missingCharMarker) {
1433                /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
1434                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
1435                        (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
1436                    /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
1437                    offset = (int)(source-args->source - 3);
1438                    tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
1439                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
1440                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
1441                    data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
1442                    *toUnicodeStatus = missingCharMarker;
1443                    continue;
1444                } else {
1445                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1446                    if (data->prevToUnicodeStatus) {
1447                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1448                        data->prevToUnicodeStatus = 0x0000;
1449                    }
1450                    /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
1451                     * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
1452                     */
1453                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
1454                        targetUniChar = PNJ_TIPPI - PNJ_DELTA;
1455                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
1456                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
1457                        /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
1458                        data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
1459                    } else {
1460                        /* write the previously mapped codepoint */
1461                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1462                    }
1463                }
1464                *toUnicodeStatus = missingCharMarker;
1465            }
1466
1467            if (targetUniChar != missingCharMarker) {
1468                /* now save the targetUniChar for delayed write */
1469                *toUnicodeStatus = (UChar) targetUniChar;
1470                if (data->resetToDefaultToUnicode==TRUE) {
1471                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
1472                    data->currentMaskToUnicode = data->defMaskToUnicode;
1473                    data->resetToDefaultToUnicode=FALSE;
1474                }
1475            } else {
1476
1477                /* we reach here only if targetUniChar == missingCharMarker
1478                 * so assign codes to reason and err
1479                 */
1480                *err = U_INVALID_CHAR_FOUND;
1481CALLBACK:
1482                args->converter->toUBytes[0] = (uint8_t) sourceChar;
1483                args->converter->toULength = 1;
1484                break;
1485            }
1486
1487        } else {
1488            *err =U_BUFFER_OVERFLOW_ERROR;
1489            break;
1490        }
1491    }
1492
1493    if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1494        /* end of the input stream */
1495        UConverter *cnv = args->converter;
1496
1497        if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
1498            /* set toUBytes[] */
1499            cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1500            cnv->toULength = 1;
1501
1502            /* avoid looping on truncated sequences */
1503            *contextCharToUnicode = NO_CHAR_MARKER;
1504        } else {
1505            cnv->toULength = 0;
1506        }
1507
1508        if (*toUnicodeStatus != missingCharMarker) {
1509            /* output a remaining target character */
1510            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1511            *toUnicodeStatus = missingCharMarker;
1512        }
1513    }
1514
1515    args->target = target;
1516    args->source = source;
1517}
1518
1519/* structure for SafeClone calculations */
1520struct cloneISCIIStruct {
1521    UConverter cnv;
1522    UConverterDataISCII mydata;
1523};
1524
1525static UConverter *
1526_ISCII_SafeClone(const UConverter *cnv,
1527              void *stackBuffer,
1528              int32_t *pBufferSize,
1529              UErrorCode *status)
1530{
1531    struct cloneISCIIStruct * localClone;
1532    int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1533
1534    if (U_FAILURE(*status)) {
1535        return 0;
1536    }
1537
1538    if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
1539        *pBufferSize = bufferSizeNeeded;
1540        return 0;
1541    }
1542
1543    localClone = (struct cloneISCIIStruct *)stackBuffer;
1544    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1545
1546    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1547    localClone->cnv.extraInfo = &localClone->mydata;
1548    localClone->cnv.isExtraLocal = TRUE;
1549
1550    return &localClone->cnv;
1551}
1552
1553static void
1554_ISCIIGetUnicodeSet(const UConverter *cnv,
1555                    const USetAdder *sa,
1556                    UConverterUnicodeSet which,
1557                    UErrorCode *pErrorCode)
1558{
1559    int32_t idx, script;
1560    uint8_t mask;
1561
1562    /* Since all ISCII versions allow switching to other ISCII
1563    scripts, we add all roundtrippable characters to this set. */
1564    sa->addRange(sa->set, 0, ASCII_END);
1565    for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1566        mask = (uint8_t)(lookupInitialData[script].maskEnum);
1567        for (idx = 0; idx < DELTA; idx++) {
1568            /* added check for TELUGU character */
1569            if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
1570                sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1571            }
1572        }
1573    }
1574    sa->add(sa->set, DANDA);
1575    sa->add(sa->set, DOUBLE_DANDA);
1576    sa->add(sa->set, ZWNJ);
1577    sa->add(sa->set, ZWJ);
1578}
1579
1580static const UConverterImpl _ISCIIImpl={
1581
1582    UCNV_ISCII,
1583
1584    NULL,
1585    NULL,
1586
1587    _ISCIIOpen,
1588    _ISCIIClose,
1589    _ISCIIReset,
1590
1591    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1592    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1593    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1594    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1595    NULL,
1596
1597    NULL,
1598    _ISCIIgetName,
1599    NULL,
1600    _ISCII_SafeClone,
1601    _ISCIIGetUnicodeSet
1602};
1603
1604static const UConverterStaticData _ISCIIStaticData={
1605    sizeof(UConverterStaticData),
1606        "ISCII",
1607         0,
1608         UCNV_IBM,
1609         UCNV_ISCII,
1610         1,
1611         4,
1612        { 0x1a, 0, 0, 0 },
1613        0x1,
1614        FALSE,
1615        FALSE,
1616        0x0,
1617        0x0,
1618        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1619
1620};
1621
1622const UConverterSharedData _ISCIIData=
1623        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
1624
1625#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1626