1/*
2**********************************************************************
3*   Copyright (C) 2000-2009, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   file name:  ucnvisci.c
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2001JUN26
12*   created by: Ram Viswanadha
13*
14*   Date        Name        Description
15*   24/7/2001   Ram         Added support for EXT character handling
16*/
17
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
21
22#include "cmemory.h"
23#include "ucnv_bld.h"
24#include "unicode/ucnv.h"
25#include "ucnv_cnv.h"
26#include "unicode/ucnv_cb.h"
27#include "unicode/uset.h"
28#include "cstring.h"
29
30#define UCNV_OPTIONS_VERSION_MASK 0xf
31#define NUKTA               0x093c
32#define HALANT              0x094d
33#define ZWNJ                0x200c /* Zero Width Non Joiner */
34#define ZWJ                 0x200d /* Zero width Joiner */
35#define INVALID_CHAR        0xffff
36#define ATR                 0xEF   /* Attribute code */
37#define EXT                 0xF0   /* Extension code */
38#define DANDA               0x0964
39#define DOUBLE_DANDA        0x0965
40#define ISCII_NUKTA         0xE9
41#define ISCII_HALANT        0xE8
42#define ISCII_DANDA         0xEA
43#define ISCII_INV           0xD9
44#define ISCII_VOWEL_SIGN_E  0xE0
45#define INDIC_BLOCK_BEGIN   0x0900
46#define INDIC_BLOCK_END     0x0D7F
47#define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
48#define VOCALLIC_RR         0x0931
49#define LF                  0x0A
50#define ASCII_END           0xA0
51#define NO_CHAR_MARKER      0xFFFE
52#define TELUGU_DELTA        DELTA * TELUGU
53#define DEV_ABBR_SIGN       0x0970
54#define DEV_ANUDATTA        0x0952
55#define EXT_RANGE_BEGIN     0xA1
56#define EXT_RANGE_END       0xEE
57
58#define PNJ_DELTA           0x0100
59#define PNJ_BINDI           0x0A02
60#define PNJ_TIPPI           0x0A70
61#define PNJ_SIGN_VIRAMA     0x0A4D
62#define PNJ_ADHAK           0x0A71
63#define PNJ_HA              0x0A39
64#define PNJ_RRA             0x0A5C
65
66static USet* PNJ_BINDI_TIPPI_SET= NULL;
67static USet* PNJ_CONSONANT_SET= NULL;
68
69typedef enum {
70    DEVANAGARI =0,
71    BENGALI,
72    GURMUKHI,
73    GUJARATI,
74    ORIYA,
75    TAMIL,
76    TELUGU,
77    KANNADA,
78    MALAYALAM,
79    DELTA=0x80
80}UniLang;
81
82/**
83 * Enumeration for switching code pages if <ATR>+<one of below values>
84 * is encountered
85 */
86typedef enum {
87    DEF = 0x40,
88    RMN = 0x41,
89    DEV = 0x42,
90    BNG = 0x43,
91    TML = 0x44,
92    TLG = 0x45,
93    ASM = 0x46,
94    ORI = 0x47,
95    KND = 0x48,
96    MLM = 0x49,
97    GJR = 0x4A,
98    PNJ = 0x4B,
99    ARB = 0x71,
100    PES = 0x72,
101    URD = 0x73,
102    SND = 0x74,
103    KSM = 0x75,
104    PST = 0x76
105}ISCIILang;
106
107typedef enum {
108    DEV_MASK =0x80,
109    PNJ_MASK =0x40,
110    GJR_MASK =0x20,
111    ORI_MASK =0x10,
112    BNG_MASK =0x08,
113    KND_MASK =0x04,
114    MLM_MASK =0x02,
115    TML_MASK =0x01,
116    ZERO =0x00
117}MaskEnum;
118
119#define ISCII_CNV_PREFIX "ISCII,version="
120
121typedef struct {
122    UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
123    UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
124    uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
125    uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
126    uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
127    MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
128    MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
129    MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
130    UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
131    UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
132    char name[sizeof(ISCII_CNV_PREFIX) + 1];
133    UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
134} UConverterDataISCII;
135
136typedef struct LookupDataStruct {
137    UniLang uniLang;
138    MaskEnum maskEnum;
139    ISCIILang isciiLang;
140} LookupDataStruct;
141
142static const LookupDataStruct lookupInitialData[]={
143    { DEVANAGARI, DEV_MASK,  DEV },
144    { BENGALI,    BNG_MASK,  BNG },
145    { GURMUKHI,   PNJ_MASK,  PNJ },
146    { GUJARATI,   GJR_MASK,  GJR },
147    { ORIYA,      ORI_MASK,  ORI },
148    { TAMIL,      TML_MASK,  TML },
149    { TELUGU,     KND_MASK,  TLG },
150    { KANNADA,    KND_MASK,  KND },
151    { MALAYALAM,  MLM_MASK,  MLM }
152};
153
154static void initializeSets() {
155    /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */
156    PNJ_CONSONANT_SET = uset_open(0,0);
157    uset_clear(PNJ_CONSONANT_SET);
158
159    uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);
160    uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);
161    uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);
162    uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);
163
164    PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);
165    uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);
166    uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);
167    uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);
168    uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);
169
170    uset_compact(PNJ_CONSONANT_SET);
171    uset_compact(PNJ_BINDI_TIPPI_SET);
172}
173
174static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
175    if(pArgs->onlyTestIsLoadable) {
176        return;
177    }
178
179    /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */
180    initializeSets();
181
182    cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
183
184    if (cnv->extraInfo != NULL) {
185        int32_t len=0;
186        UConverterDataISCII *converterData=
187                (UConverterDataISCII *) cnv->extraInfo;
188        converterData->contextCharToUnicode=NO_CHAR_MARKER;
189        cnv->toUnicodeStatus = missingCharMarker;
190        converterData->contextCharFromUnicode=0x0000;
191        converterData->resetToDefaultToUnicode=FALSE;
192        /* check if the version requested is supported */
193        if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
194            /* initialize state variables */
195            converterData->currentDeltaFromUnicode
196                    = converterData->currentDeltaToUnicode
197                            = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
198
199            converterData->currentMaskFromUnicode
200                    = converterData->currentMaskToUnicode
201                            = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
202
203            converterData->isFirstBuffer=TRUE;
204            (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
205            len = (int32_t)uprv_strlen(converterData->name);
206            converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
207            converterData->name[len+1]=0;
208
209            converterData->prevToUnicodeStatus = 0x0000;
210        } else {
211            uprv_free(cnv->extraInfo);
212            cnv->extraInfo = NULL;
213            *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
214        }
215
216    } else {
217        *errorCode =U_MEMORY_ALLOCATION_ERROR;
218    }
219}
220
221static void _ISCIIClose(UConverter *cnv) {
222    if (cnv->extraInfo!=NULL) {
223        if (!cnv->isExtraLocal) {
224            uprv_free(cnv->extraInfo);
225        }
226        cnv->extraInfo=NULL;
227    }
228    if (PNJ_CONSONANT_SET != NULL) {
229        uset_close(PNJ_CONSONANT_SET);
230        PNJ_CONSONANT_SET = NULL;
231    }
232    if (PNJ_BINDI_TIPPI_SET != NULL) {
233        uset_close(PNJ_BINDI_TIPPI_SET);
234        PNJ_BINDI_TIPPI_SET = NULL;
235    }
236}
237
238static const char* _ISCIIgetName(const UConverter* cnv) {
239    if (cnv->extraInfo) {
240        UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
241        return myData->name;
242    }
243    return NULL;
244}
245
246static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
247    UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
248    if (choice<=UCNV_RESET_TO_UNICODE) {
249        cnv->toUnicodeStatus = missingCharMarker;
250        cnv->mode=0;
251        data->currentDeltaToUnicode=data->defDeltaToUnicode;
252        data->currentMaskToUnicode = data->defMaskToUnicode;
253        data->contextCharToUnicode=NO_CHAR_MARKER;
254        data->prevToUnicodeStatus = 0x0000;
255    }
256    if (choice!=UCNV_RESET_TO_UNICODE) {
257        cnv->fromUChar32=0x0000;
258        data->contextCharFromUnicode=0x00;
259        data->currentMaskFromUnicode=data->defMaskToUnicode;
260        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
261        data->isFirstBuffer=TRUE;
262        data->resetToDefaultToUnicode=FALSE;
263    }
264}
265
266/**
267 * The values in validity table are indexed by the lower bits of Unicode
268 * range 0x0900 - 0x09ff. The values have a structure like:
269 *       ---------------------------------------------------------------
270 *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
271 *      |       |       |       |       | ASM   | KND   |       |       |
272 *       ---------------------------------------------------------------
273 * If a code point is valid in a particular script
274 * then that bit is turned on
275 *
276 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
277 * to represent these languages
278 *
279 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
280 * and combine and use 1 bit to represent these languages.
281 *
282 * TODO: It is probably easier to understand and maintain to change this
283 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
284 */
285
286static const uint8_t validityTable[128] = {
287/* This state table is tool generated please do not edit unless you know exactly what you are doing */
288/* Note: This table was edited to mirror the Windows XP implementation */
289/*ISCII:Valid:Unicode */
290/*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
291/*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
292/*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
293/*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
294/*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
295/*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
296/*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
297/*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
298/*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
299/*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
300/*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
301/*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
302/*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
303/*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
304/*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
305/*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
306/*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
307/*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
308/*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
309/*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
310/*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
311/*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
312/*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
313/*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
314/*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
315/*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316/*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
317/*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
318/*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
319/*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
320/*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321/*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
322/*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
323/*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
324/*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
325/*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
326/*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
327/*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
328/*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
329/*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
330/*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
331/*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
332/*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
333/*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
334/*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
335/*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
336/*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
337/*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
338/*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
339/*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
340/*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
341/*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
342/*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
343/*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
344/*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
345/*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
346/*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
347/*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
348/*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
349/*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
350/*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
351/*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
352/*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
353/*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
354/*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
355/*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
356/*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
357/*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
358/*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
359/*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
360/*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
361/*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
362/*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
363/*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
364/*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
365/*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
366/*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
367/*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
368/*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
369/*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
370/*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
371/*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
372/*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
373/*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
374/*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
375/*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
376/*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
377/*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
378/*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
379/*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
380/*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
381/*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
382/*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
383/*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
384/*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
385/*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
386/*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
387/*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
388/*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
389/*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
390/*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
391/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
392/*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
393/*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
394/*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
395/*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
396/*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
397/*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
398/*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
399/*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
400/*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
401/*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
402/*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
403/*
404 * The length of the array is 128 to provide values for 0x900..0x97f.
405 * The last 15 entries for 0x971..0x97f of the validity table are all zero
406 * because no Indic script uses such Unicode code points.
407 */
408/*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
409};
410
411static const uint16_t fromUnicodeTable[128]={
412    0x00a0 ,/* 0x0900 */
413    0x00a1 ,/* 0x0901 */
414    0x00a2 ,/* 0x0902 */
415    0x00a3 ,/* 0x0903 */
416    0xa4e0 ,/* 0x0904 */
417    0x00a4 ,/* 0x0905 */
418    0x00a5 ,/* 0x0906 */
419    0x00a6 ,/* 0x0907 */
420    0x00a7 ,/* 0x0908 */
421    0x00a8 ,/* 0x0909 */
422    0x00a9 ,/* 0x090a */
423    0x00aa ,/* 0x090b */
424    0xA6E9 ,/* 0x090c */
425    0x00ae ,/* 0x090d */
426    0x00ab ,/* 0x090e */
427    0x00ac ,/* 0x090f */
428    0x00ad ,/* 0x0910 */
429    0x00b2 ,/* 0x0911 */
430    0x00af ,/* 0x0912 */
431    0x00b0 ,/* 0x0913 */
432    0x00b1 ,/* 0x0914 */
433    0x00b3 ,/* 0x0915 */
434    0x00b4 ,/* 0x0916 */
435    0x00b5 ,/* 0x0917 */
436    0x00b6 ,/* 0x0918 */
437    0x00b7 ,/* 0x0919 */
438    0x00b8 ,/* 0x091a */
439    0x00b9 ,/* 0x091b */
440    0x00ba ,/* 0x091c */
441    0x00bb ,/* 0x091d */
442    0x00bc ,/* 0x091e */
443    0x00bd ,/* 0x091f */
444    0x00be ,/* 0x0920 */
445    0x00bf ,/* 0x0921 */
446    0x00c0 ,/* 0x0922 */
447    0x00c1 ,/* 0x0923 */
448    0x00c2 ,/* 0x0924 */
449    0x00c3 ,/* 0x0925 */
450    0x00c4 ,/* 0x0926 */
451    0x00c5 ,/* 0x0927 */
452    0x00c6 ,/* 0x0928 */
453    0x00c7 ,/* 0x0929 */
454    0x00c8 ,/* 0x092a */
455    0x00c9 ,/* 0x092b */
456    0x00ca ,/* 0x092c */
457    0x00cb ,/* 0x092d */
458    0x00cc ,/* 0x092e */
459    0x00cd ,/* 0x092f */
460    0x00cf ,/* 0x0930 */
461    0x00d0 ,/* 0x0931 */
462    0x00d1 ,/* 0x0932 */
463    0x00d2 ,/* 0x0933 */
464    0x00d3 ,/* 0x0934 */
465    0x00d4 ,/* 0x0935 */
466    0x00d5 ,/* 0x0936 */
467    0x00d6 ,/* 0x0937 */
468    0x00d7 ,/* 0x0938 */
469    0x00d8 ,/* 0x0939 */
470    0xFFFF ,/* 0x093A */
471    0xFFFF ,/* 0x093B */
472    0x00e9 ,/* 0x093c */
473    0xEAE9 ,/* 0x093d */
474    0x00da ,/* 0x093e */
475    0x00db ,/* 0x093f */
476    0x00dc ,/* 0x0940 */
477    0x00dd ,/* 0x0941 */
478    0x00de ,/* 0x0942 */
479    0x00df ,/* 0x0943 */
480    0xDFE9 ,/* 0x0944 */
481    0x00e3 ,/* 0x0945 */
482    0x00e0 ,/* 0x0946 */
483    0x00e1 ,/* 0x0947 */
484    0x00e2 ,/* 0x0948 */
485    0x00e7 ,/* 0x0949 */
486    0x00e4 ,/* 0x094a */
487    0x00e5 ,/* 0x094b */
488    0x00e6 ,/* 0x094c */
489    0x00e8 ,/* 0x094d */
490    0x00ec ,/* 0x094e */
491    0x00ed ,/* 0x094f */
492    0xA1E9 ,/* 0x0950 */ /* OM Symbol */
493    0xFFFF ,/* 0x0951 */
494    0xF0B8 ,/* 0x0952 */
495    0xFFFF ,/* 0x0953 */
496    0xFFFF ,/* 0x0954 */
497    0xFFFF ,/* 0x0955 */
498    0xFFFF ,/* 0x0956 */
499    0xFFFF ,/* 0x0957 */
500    0xb3e9 ,/* 0x0958 */
501    0xb4e9 ,/* 0x0959 */
502    0xb5e9 ,/* 0x095a */
503    0xbae9 ,/* 0x095b */
504    0xbfe9 ,/* 0x095c */
505    0xC0E9 ,/* 0x095d */
506    0xc9e9 ,/* 0x095e */
507    0x00ce ,/* 0x095f */
508    0xAAe9 ,/* 0x0960 */
509    0xA7E9 ,/* 0x0961 */
510    0xDBE9 ,/* 0x0962 */
511    0xDCE9 ,/* 0x0963 */
512    0x00ea ,/* 0x0964 */
513    0xeaea ,/* 0x0965 */
514    0x00f1 ,/* 0x0966 */
515    0x00f2 ,/* 0x0967 */
516    0x00f3 ,/* 0x0968 */
517    0x00f4 ,/* 0x0969 */
518    0x00f5 ,/* 0x096a */
519    0x00f6 ,/* 0x096b */
520    0x00f7 ,/* 0x096c */
521    0x00f8 ,/* 0x096d */
522    0x00f9 ,/* 0x096e */
523    0x00fa ,/* 0x096f */
524    0xF0BF ,/* 0x0970 */
525    0xFFFF ,/* 0x0971 */
526    0xFFFF ,/* 0x0972 */
527    0xFFFF ,/* 0x0973 */
528    0xFFFF ,/* 0x0974 */
529    0xFFFF ,/* 0x0975 */
530    0xFFFF ,/* 0x0976 */
531    0xFFFF ,/* 0x0977 */
532    0xFFFF ,/* 0x0978 */
533    0xFFFF ,/* 0x0979 */
534    0xFFFF ,/* 0x097a */
535    0xFFFF ,/* 0x097b */
536    0xFFFF ,/* 0x097c */
537    0xFFFF ,/* 0x097d */
538    0xFFFF ,/* 0x097e */
539    0xFFFF ,/* 0x097f */
540};
541static const uint16_t toUnicodeTable[256]={
542    0x0000,/* 0x00 */
543    0x0001,/* 0x01 */
544    0x0002,/* 0x02 */
545    0x0003,/* 0x03 */
546    0x0004,/* 0x04 */
547    0x0005,/* 0x05 */
548    0x0006,/* 0x06 */
549    0x0007,/* 0x07 */
550    0x0008,/* 0x08 */
551    0x0009,/* 0x09 */
552    0x000a,/* 0x0a */
553    0x000b,/* 0x0b */
554    0x000c,/* 0x0c */
555    0x000d,/* 0x0d */
556    0x000e,/* 0x0e */
557    0x000f,/* 0x0f */
558    0x0010,/* 0x10 */
559    0x0011,/* 0x11 */
560    0x0012,/* 0x12 */
561    0x0013,/* 0x13 */
562    0x0014,/* 0x14 */
563    0x0015,/* 0x15 */
564    0x0016,/* 0x16 */
565    0x0017,/* 0x17 */
566    0x0018,/* 0x18 */
567    0x0019,/* 0x19 */
568    0x001a,/* 0x1a */
569    0x001b,/* 0x1b */
570    0x001c,/* 0x1c */
571    0x001d,/* 0x1d */
572    0x001e,/* 0x1e */
573    0x001f,/* 0x1f */
574    0x0020,/* 0x20 */
575    0x0021,/* 0x21 */
576    0x0022,/* 0x22 */
577    0x0023,/* 0x23 */
578    0x0024,/* 0x24 */
579    0x0025,/* 0x25 */
580    0x0026,/* 0x26 */
581    0x0027,/* 0x27 */
582    0x0028,/* 0x28 */
583    0x0029,/* 0x29 */
584    0x002a,/* 0x2a */
585    0x002b,/* 0x2b */
586    0x002c,/* 0x2c */
587    0x002d,/* 0x2d */
588    0x002e,/* 0x2e */
589    0x002f,/* 0x2f */
590    0x0030,/* 0x30 */
591    0x0031,/* 0x31 */
592    0x0032,/* 0x32 */
593    0x0033,/* 0x33 */
594    0x0034,/* 0x34 */
595    0x0035,/* 0x35 */
596    0x0036,/* 0x36 */
597    0x0037,/* 0x37 */
598    0x0038,/* 0x38 */
599    0x0039,/* 0x39 */
600    0x003A,/* 0x3A */
601    0x003B,/* 0x3B */
602    0x003c,/* 0x3c */
603    0x003d,/* 0x3d */
604    0x003e,/* 0x3e */
605    0x003f,/* 0x3f */
606    0x0040,/* 0x40 */
607    0x0041,/* 0x41 */
608    0x0042,/* 0x42 */
609    0x0043,/* 0x43 */
610    0x0044,/* 0x44 */
611    0x0045,/* 0x45 */
612    0x0046,/* 0x46 */
613    0x0047,/* 0x47 */
614    0x0048,/* 0x48 */
615    0x0049,/* 0x49 */
616    0x004a,/* 0x4a */
617    0x004b,/* 0x4b */
618    0x004c,/* 0x4c */
619    0x004d,/* 0x4d */
620    0x004e,/* 0x4e */
621    0x004f,/* 0x4f */
622    0x0050,/* 0x50 */
623    0x0051,/* 0x51 */
624    0x0052,/* 0x52 */
625    0x0053,/* 0x53 */
626    0x0054,/* 0x54 */
627    0x0055,/* 0x55 */
628    0x0056,/* 0x56 */
629    0x0057,/* 0x57 */
630    0x0058,/* 0x58 */
631    0x0059,/* 0x59 */
632    0x005a,/* 0x5a */
633    0x005b,/* 0x5b */
634    0x005c,/* 0x5c */
635    0x005d,/* 0x5d */
636    0x005e,/* 0x5e */
637    0x005f,/* 0x5f */
638    0x0060,/* 0x60 */
639    0x0061,/* 0x61 */
640    0x0062,/* 0x62 */
641    0x0063,/* 0x63 */
642    0x0064,/* 0x64 */
643    0x0065,/* 0x65 */
644    0x0066,/* 0x66 */
645    0x0067,/* 0x67 */
646    0x0068,/* 0x68 */
647    0x0069,/* 0x69 */
648    0x006a,/* 0x6a */
649    0x006b,/* 0x6b */
650    0x006c,/* 0x6c */
651    0x006d,/* 0x6d */
652    0x006e,/* 0x6e */
653    0x006f,/* 0x6f */
654    0x0070,/* 0x70 */
655    0x0071,/* 0x71 */
656    0x0072,/* 0x72 */
657    0x0073,/* 0x73 */
658    0x0074,/* 0x74 */
659    0x0075,/* 0x75 */
660    0x0076,/* 0x76 */
661    0x0077,/* 0x77 */
662    0x0078,/* 0x78 */
663    0x0079,/* 0x79 */
664    0x007a,/* 0x7a */
665    0x007b,/* 0x7b */
666    0x007c,/* 0x7c */
667    0x007d,/* 0x7d */
668    0x007e,/* 0x7e */
669    0x007f,/* 0x7f */
670    0x0080,/* 0x80 */
671    0x0081,/* 0x81 */
672    0x0082,/* 0x82 */
673    0x0083,/* 0x83 */
674    0x0084,/* 0x84 */
675    0x0085,/* 0x85 */
676    0x0086,/* 0x86 */
677    0x0087,/* 0x87 */
678    0x0088,/* 0x88 */
679    0x0089,/* 0x89 */
680    0x008a,/* 0x8a */
681    0x008b,/* 0x8b */
682    0x008c,/* 0x8c */
683    0x008d,/* 0x8d */
684    0x008e,/* 0x8e */
685    0x008f,/* 0x8f */
686    0x0090,/* 0x90 */
687    0x0091,/* 0x91 */
688    0x0092,/* 0x92 */
689    0x0093,/* 0x93 */
690    0x0094,/* 0x94 */
691    0x0095,/* 0x95 */
692    0x0096,/* 0x96 */
693    0x0097,/* 0x97 */
694    0x0098,/* 0x98 */
695    0x0099,/* 0x99 */
696    0x009a,/* 0x9a */
697    0x009b,/* 0x9b */
698    0x009c,/* 0x9c */
699    0x009d,/* 0x9d */
700    0x009e,/* 0x9e */
701    0x009f,/* 0x9f */
702    0x00A0,/* 0xa0 */
703    0x0901,/* 0xa1 */
704    0x0902,/* 0xa2 */
705    0x0903,/* 0xa3 */
706    0x0905,/* 0xa4 */
707    0x0906,/* 0xa5 */
708    0x0907,/* 0xa6 */
709    0x0908,/* 0xa7 */
710    0x0909,/* 0xa8 */
711    0x090a,/* 0xa9 */
712    0x090b,/* 0xaa */
713    0x090e,/* 0xab */
714    0x090f,/* 0xac */
715    0x0910,/* 0xad */
716    0x090d,/* 0xae */
717    0x0912,/* 0xaf */
718    0x0913,/* 0xb0 */
719    0x0914,/* 0xb1 */
720    0x0911,/* 0xb2 */
721    0x0915,/* 0xb3 */
722    0x0916,/* 0xb4 */
723    0x0917,/* 0xb5 */
724    0x0918,/* 0xb6 */
725    0x0919,/* 0xb7 */
726    0x091a,/* 0xb8 */
727    0x091b,/* 0xb9 */
728    0x091c,/* 0xba */
729    0x091d,/* 0xbb */
730    0x091e,/* 0xbc */
731    0x091f,/* 0xbd */
732    0x0920,/* 0xbe */
733    0x0921,/* 0xbf */
734    0x0922,/* 0xc0 */
735    0x0923,/* 0xc1 */
736    0x0924,/* 0xc2 */
737    0x0925,/* 0xc3 */
738    0x0926,/* 0xc4 */
739    0x0927,/* 0xc5 */
740    0x0928,/* 0xc6 */
741    0x0929,/* 0xc7 */
742    0x092a,/* 0xc8 */
743    0x092b,/* 0xc9 */
744    0x092c,/* 0xca */
745    0x092d,/* 0xcb */
746    0x092e,/* 0xcc */
747    0x092f,/* 0xcd */
748    0x095f,/* 0xce */
749    0x0930,/* 0xcf */
750    0x0931,/* 0xd0 */
751    0x0932,/* 0xd1 */
752    0x0933,/* 0xd2 */
753    0x0934,/* 0xd3 */
754    0x0935,/* 0xd4 */
755    0x0936,/* 0xd5 */
756    0x0937,/* 0xd6 */
757    0x0938,/* 0xd7 */
758    0x0939,/* 0xd8 */
759    0x200D,/* 0xd9 */
760    0x093e,/* 0xda */
761    0x093f,/* 0xdb */
762    0x0940,/* 0xdc */
763    0x0941,/* 0xdd */
764    0x0942,/* 0xde */
765    0x0943,/* 0xdf */
766    0x0946,/* 0xe0 */
767    0x0947,/* 0xe1 */
768    0x0948,/* 0xe2 */
769    0x0945,/* 0xe3 */
770    0x094a,/* 0xe4 */
771    0x094b,/* 0xe5 */
772    0x094c,/* 0xe6 */
773    0x0949,/* 0xe7 */
774    0x094d,/* 0xe8 */
775    0x093c,/* 0xe9 */
776    0x0964,/* 0xea */
777    0xFFFF,/* 0xeb */
778    0xFFFF,/* 0xec */
779    0xFFFF,/* 0xed */
780    0xFFFF,/* 0xee */
781    0xFFFF,/* 0xef */
782    0xFFFF,/* 0xf0 */
783    0x0966,/* 0xf1 */
784    0x0967,/* 0xf2 */
785    0x0968,/* 0xf3 */
786    0x0969,/* 0xf4 */
787    0x096a,/* 0xf5 */
788    0x096b,/* 0xf6 */
789    0x096c,/* 0xf7 */
790    0x096d,/* 0xf8 */
791    0x096e,/* 0xf9 */
792    0x096f,/* 0xfa */
793    0xFFFF,/* 0xfb */
794    0xFFFF,/* 0xfc */
795    0xFFFF,/* 0xfd */
796    0xFFFF,/* 0xfe */
797    0xFFFF /* 0xff */
798};
799
800static const uint16_t vowelSignESpecialCases[][2]={
801	{ 2 /*length of array*/    , 0      },
802	{ 0xA4 , 0x0904 },
803};
804
805static const uint16_t nuktaSpecialCases[][2]={
806    { 16 /*length of array*/   , 0      },
807    { 0xA6 , 0x090c },
808    { 0xEA , 0x093D },
809    { 0xDF , 0x0944 },
810    { 0xA1 , 0x0950 },
811    { 0xb3 , 0x0958 },
812    { 0xb4 , 0x0959 },
813    { 0xb5 , 0x095a },
814    { 0xba , 0x095b },
815    { 0xbf , 0x095c },
816    { 0xC0 , 0x095d },
817    { 0xc9 , 0x095e },
818    { 0xAA , 0x0960 },
819    { 0xA7 , 0x0961 },
820    { 0xDB , 0x0962 },
821    { 0xDC , 0x0963 },
822};
823
824
825#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
826    int32_t offset = (int32_t)(source - args->source-1);                                        \
827      /* write the targetUniChar  to target */                                                  \
828    if(target < targetLimit){                                                                   \
829        if(targetByteUnit <= 0xFF){                                                             \
830            *(target)++ = (uint8_t)(targetByteUnit);                                            \
831            if(offsets){                                                                        \
832                *(offsets++) = offset;                                                          \
833            }                                                                                   \
834        }else{                                                                                  \
835            if (targetByteUnit > 0xFFFF) {                                                      \
836                *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
837                if (offsets) {                                                                  \
838                    --offset;                                                                   \
839                    *(offsets++) = offset;                                                      \
840                }                                                                               \
841            }                                                                                   \
842            if (!(target < targetLimit)) {                                                      \
843                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
844                                (uint8_t)(targetByteUnit >> 8);                                 \
845                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
846                                (uint8_t)targetByteUnit;                                        \
847                *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
848            } else {                                                                            \
849                *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
850                if(offsets){                                                                    \
851                    *(offsets++) = offset;                                                      \
852                }                                                                               \
853                if(target < targetLimit){                                                       \
854                    *(target)++ = (uint8_t)  targetByteUnit;                                    \
855                    if(offsets){                                                                \
856                        *(offsets++) = offset                            ;                      \
857                    }                                                                           \
858                }else{                                                                          \
859                    args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
860                                (uint8_t) (targetByteUnit);                                     \
861                    *err = U_BUFFER_OVERFLOW_ERROR;                                             \
862                }                                                                               \
863            }                                                                                   \
864        }                                                                                       \
865    }else{                                                                                      \
866        if (targetByteUnit & 0xFF0000) {                                                        \
867            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
868                        (uint8_t) (targetByteUnit >>16);                                        \
869        }                                                                                       \
870        if(targetByteUnit & 0xFF00){                                                            \
871            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
872                        (uint8_t) (targetByteUnit >>8);                                         \
873        }                                                                                       \
874        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
875                        (uint8_t) (targetByteUnit);                                             \
876        *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
877    }                                                                                           \
878}
879
880/* Rules:
881 *    Explicit Halant :
882 *                      <HALANT> + <ZWNJ>
883 *    Soft Halant :
884 *                      <HALANT> + <ZWJ>
885 */
886
887static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
888        UConverterFromUnicodeArgs * args, UErrorCode * err) {
889    const UChar *source = args->source;
890    const UChar *sourceLimit = args->sourceLimit;
891    unsigned char *target = (unsigned char *) args->target;
892    unsigned char *targetLimit = (unsigned char *) args->targetLimit;
893    int32_t* offsets = args->offsets;
894    uint32_t targetByteUnit = 0x0000;
895    UChar32 sourceChar = 0x0000;
896    UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
897    UConverterDataISCII *converterData;
898    uint16_t newDelta=0;
899    uint16_t range = 0;
900    UBool deltaChanged = FALSE;
901
902    if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
903        *err = U_ILLEGAL_ARGUMENT_ERROR;
904        return;
905    }
906    /* initialize data */
907    converterData=(UConverterDataISCII*)args->converter->extraInfo;
908    newDelta=converterData->currentDeltaFromUnicode;
909    range = (uint16_t)(newDelta/DELTA);
910
911    if ((sourceChar = args->converter->fromUChar32)!=0) {
912        goto getTrail;
913    }
914
915    /*writing the char to the output stream */
916    while (source < sourceLimit) {
917        /* Write the language code following LF only if LF is not the last character. */
918        if (args->converter->fromUnicodeStatus == LF) {
919            targetByteUnit = ATR<<8;
920            targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
921            args->converter->fromUnicodeStatus = 0x0000;
922            /* now append ATR and language code */
923            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
924            if (U_FAILURE(*err)) {
925                break;
926            }
927        }
928
929        sourceChar = *source++;
930        tempContextFromUnicode = converterData->contextCharFromUnicode;
931
932        targetByteUnit = missingCharMarker;
933
934        /*check if input is in ASCII and C0 control codes range*/
935        if (sourceChar <= ASCII_END) {
936            args->converter->fromUnicodeStatus = sourceChar;
937            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
938            if (U_FAILURE(*err)) {
939                break;
940            }
941            continue;
942        }
943        switch (sourceChar) {
944        case ZWNJ:
945            /* contextChar has HALANT */
946            if (converterData->contextCharFromUnicode) {
947                converterData->contextCharFromUnicode = 0x00;
948                targetByteUnit = ISCII_HALANT;
949            } else {
950                /* consume ZWNJ and continue */
951                converterData->contextCharFromUnicode = 0x00;
952                continue;
953            }
954            break;
955        case ZWJ:
956            /* contextChar has HALANT */
957            if (converterData->contextCharFromUnicode) {
958                targetByteUnit = ISCII_NUKTA;
959            } else {
960                targetByteUnit =ISCII_INV;
961            }
962            converterData->contextCharFromUnicode = 0x00;
963            break;
964        default:
965            /* is the sourceChar in the INDIC_RANGE? */
966            if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
967                /* Danda and Double Danda are valid in Northern scripts.. since Unicode
968                 * does not include these codepoints in all Northern scrips we need to
969                 * filter them out
970                 */
971                if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
972                    /* find out to which block the souceChar belongs*/
973                    range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
974                    newDelta =(uint16_t)(range*DELTA);
975
976                    /* Now are we in the same block as the previous? */
977                    if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
978                        converterData->currentDeltaFromUnicode = newDelta;
979                        converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
980                        deltaChanged =TRUE;
981                        converterData->isFirstBuffer=FALSE;
982                    }
983
984                    if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
985                        if (sourceChar == PNJ_TIPPI) {
986                            /* Make sure Tippi is converterd to Bindi. */
987                            sourceChar = PNJ_BINDI;
988                        } else if (sourceChar == PNJ_ADHAK) {
989                            /* This is for consonant cluster handling. */
990                            converterData->contextCharFromUnicode = PNJ_ADHAK;
991                        }
992
993                    }
994                    /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
995                    /* now subtract the new delta from sourceChar*/
996                    sourceChar -= converterData->currentDeltaFromUnicode;
997                }
998
999                /* get the target byte unit */
1000                targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
1001
1002                /* is the code point valid in current script? */
1003                if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
1004                    /* Vocallic RR is assigned in ISCII Telugu and Unicode */
1005                    if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
1006                        targetByteUnit=missingCharMarker;
1007                    }
1008                }
1009
1010                if (deltaChanged) {
1011                    /* we are in a script block which is different than
1012                     * previous sourceChar's script block write ATR and language codes
1013                     */
1014                    uint32_t temp=0;
1015                    temp =(uint16_t)(ATR<<8);
1016                    temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
1017                    /* reset */
1018                    deltaChanged=FALSE;
1019                    /* now append ATR and language code */
1020                    WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
1021                    if (U_FAILURE(*err)) {
1022                        break;
1023                    }
1024                }
1025
1026                if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
1027                    continue;
1028                }
1029            }
1030            /* reset context char */
1031            converterData->contextCharFromUnicode = 0x00;
1032            break;
1033        }
1034        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {
1035            /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
1036            /* reset context char */
1037            converterData->contextCharFromUnicode = 0x0000;
1038            targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
1039            /* write targetByteUnit to target */
1040            WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
1041            if (U_FAILURE(*err)) {
1042                break;
1043            }
1044        } else if (targetByteUnit != missingCharMarker) {
1045            if (targetByteUnit==ISCII_HALANT) {
1046                converterData->contextCharFromUnicode = (UChar)targetByteUnit;
1047            }
1048            /* write targetByteUnit to target*/
1049            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
1050            if (U_FAILURE(*err)) {
1051                break;
1052            }
1053        } else {
1054            /* oops.. the code point is unassigned */
1055            /*check if the char is a First surrogate*/
1056            if (UTF_IS_SURROGATE(sourceChar)) {
1057                if (UTF_IS_SURROGATE_FIRST(sourceChar)) {
1058getTrail:
1059                    /*look ahead to find the trail surrogate*/
1060                    if (source < sourceLimit) {
1061                        /* test the following code unit */
1062                        UChar trail= (*source);
1063                        if (UTF_IS_SECOND_SURROGATE(trail)) {
1064                            source++;
1065                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
1066                            *err =U_INVALID_CHAR_FOUND;
1067                            /* convert this surrogate code point */
1068                            /* exit this condition tree */
1069                        } else {
1070                            /* this is an unmatched lead code unit (1st surrogate) */
1071                            /* callback(illegal) */
1072                            *err=U_ILLEGAL_CHAR_FOUND;
1073                        }
1074                    } else {
1075                        /* no more input */
1076                        *err = U_ZERO_ERROR;
1077                    }
1078                } else {
1079                    /* this is an unmatched trail code unit (2nd surrogate) */
1080                    /* callback(illegal) */
1081                    *err=U_ILLEGAL_CHAR_FOUND;
1082                }
1083            } else {
1084                /* callback(unassigned) for a BMP code point */
1085                *err = U_INVALID_CHAR_FOUND;
1086            }
1087
1088            args->converter->fromUChar32=sourceChar;
1089            break;
1090        }
1091    }/* end while(mySourceIndex<mySourceLength) */
1092
1093    /*save the state and return */
1094    args->source = source;
1095    args->target = (char*)target;
1096}
1097
1098static const uint16_t lookupTable[][2]={
1099    { ZERO,       ZERO     },     /*DEFALT*/
1100    { ZERO,       ZERO     },     /*ROMAN*/
1101    { DEVANAGARI, DEV_MASK },
1102    { BENGALI,    BNG_MASK },
1103    { TAMIL,      TML_MASK },
1104    { TELUGU,     KND_MASK },
1105    { BENGALI,    BNG_MASK },
1106    { ORIYA,      ORI_MASK },
1107    { KANNADA,    KND_MASK },
1108    { MALAYALAM,  MLM_MASK },
1109    { GUJARATI,   GJR_MASK },
1110    { GURMUKHI,   PNJ_MASK }
1111};
1112
1113#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1114    /* add offset to current Indic Block */                                              \
1115    if(targetUniChar>ASCII_END &&                                                        \
1116           targetUniChar != ZWJ &&                                                       \
1117           targetUniChar != ZWNJ &&                                                      \
1118           targetUniChar != DANDA &&                                                     \
1119           targetUniChar != DOUBLE_DANDA){                                               \
1120                                                                                         \
1121           targetUniChar+=(uint16_t)(delta);                                             \
1122    }                                                                                    \
1123    /* now write the targetUniChar */                                                    \
1124    if(target<args->targetLimit){                                                        \
1125        *(target)++ = (UChar)targetUniChar;                                              \
1126        if(offsets){                                                                     \
1127            *(offsets)++ = (int32_t)(offset);                                            \
1128        }                                                                                \
1129    }else{                                                                               \
1130        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
1131            (UChar)targetUniChar;                                                        \
1132        *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
1133    }                                                                                    \
1134}
1135
1136#define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
1137    targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
1138    /* is the code point valid in current script? */                                     \
1139    if(sourceChar> ASCII_END &&                                                          \
1140            (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){    \
1141        /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
1142        if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
1143                    targetUniChar!=VOCALLIC_RR){                                         \
1144            targetUniChar=missingCharMarker;                                             \
1145        }                                                                                \
1146    }                                                                                    \
1147}
1148
1149/***********
1150 *  Rules for ISCII to Unicode converter
1151 *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1152 *  which has both precomposed and decomposed forms characters
1153 *  pre-context and post-context need to be considered.
1154 *
1155 *  Post context
1156 *  i)  ATR : Attribute code is used to declare the font and script switching.
1157 *      Currently we only switch scripts and font codes consumed without generating an error
1158 *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1159 *      obsolete characters
1160 *  Pre context
1161 *  i)  Halant: if preceeded by a halant then it is a explicit halant
1162 *  ii) Nukta :
1163 *       a) if preceeded by a halant then it is a soft halant
1164 *       b) if preceeded by specific consonants and the ligatures have pre-composed
1165 *          characters in Unicode then convert to pre-composed characters
1166 *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1167 *
1168 */
1169
1170static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
1171    const char *source = ( char *) args->source;
1172    UChar *target = args->target;
1173    const char *sourceLimit = args->sourceLimit;
1174    const UChar* targetLimit = args->targetLimit;
1175    uint32_t targetUniChar = 0x0000;
1176    uint8_t sourceChar = 0x0000;
1177    UConverterDataISCII* data;
1178    UChar32* toUnicodeStatus=NULL;
1179    UChar32 tempTargetUniChar = 0x0000;
1180    UChar* contextCharToUnicode= NULL;
1181    UBool found;
1182    int i;
1183    int offset = 0;
1184
1185    if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
1186        *err = U_ILLEGAL_ARGUMENT_ERROR;
1187        return;
1188    }
1189
1190    data = (UConverterDataISCII*)(args->converter->extraInfo);
1191    contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1192    toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1193
1194    while (U_SUCCESS(*err) && source<sourceLimit) {
1195
1196        targetUniChar = missingCharMarker;
1197
1198        if (target < targetLimit) {
1199            sourceChar = (unsigned char)*(source)++;
1200
1201            /* look at the post-context preform special processing */
1202            if (*contextCharToUnicode==ATR) {
1203
1204                /* If we have ATR in *contextCharToUnicode then we need to change our
1205                 * state to the Indic Script specified by sourceChar
1206                 */
1207
1208                /* check if the sourceChar is supported script range*/
1209                if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
1210                    data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1211                    data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
1212                } else if (sourceChar==DEF) {
1213                    /* switch back to default */
1214                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
1215                    data->currentMaskToUnicode = data->defMaskToUnicode;
1216                } else {
1217                    if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
1218                        /* these are display codes consume and continue */
1219                    } else {
1220                        *err =U_ILLEGAL_CHAR_FOUND;
1221                        /* reset */
1222                        *contextCharToUnicode=NO_CHAR_MARKER;
1223                        goto CALLBACK;
1224                    }
1225                }
1226
1227                /* reset */
1228                *contextCharToUnicode=NO_CHAR_MARKER;
1229
1230                continue;
1231
1232            } else if (*contextCharToUnicode==EXT) {
1233                /* check if sourceChar is in 0xA1-0xEE range */
1234                if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
1235                    /* We currently support only Anudatta and Devanagari abbreviation sign */
1236                    if (sourceChar==0xBF || sourceChar == 0xB8) {
1237                        targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1238
1239                        /* find out if the mapping is valid in this state */
1240                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1241                            *contextCharToUnicode= NO_CHAR_MARKER;
1242
1243                            /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1244                            if (data->prevToUnicodeStatus) {
1245                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1246                                data->prevToUnicodeStatus = 0x0000;
1247                            }
1248                            /* write to target */
1249                            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1250
1251                            continue;
1252                        }
1253                    }
1254                    /* byte unit is unassigned */
1255                    targetUniChar = missingCharMarker;
1256                    *err= U_INVALID_CHAR_FOUND;
1257                } else {
1258                    /* only 0xA1 - 0xEE are legal after EXT char */
1259                    *contextCharToUnicode= NO_CHAR_MARKER;
1260                    *err = U_ILLEGAL_CHAR_FOUND;
1261                }
1262                goto CALLBACK;
1263            } else if (*contextCharToUnicode==ISCII_INV) {
1264                if (sourceChar==ISCII_HALANT) {
1265                    targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
1266                } else {
1267                    targetUniChar = ZWJ;
1268                }
1269
1270                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1271                if (data->prevToUnicodeStatus) {
1272                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1273                    data->prevToUnicodeStatus = 0x0000;
1274                }
1275                /* write to target */
1276                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1277                /* reset */
1278                *contextCharToUnicode=NO_CHAR_MARKER;
1279            }
1280
1281            /* look at the pre-context and perform special processing */
1282            switch (sourceChar) {
1283            case ISCII_INV:
1284            case EXT: /*falls through*/
1285            case ATR:
1286                *contextCharToUnicode = (UChar)sourceChar;
1287
1288                if (*toUnicodeStatus != missingCharMarker) {
1289                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1290                    if (data->prevToUnicodeStatus) {
1291                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1292                        data->prevToUnicodeStatus = 0x0000;
1293                    }
1294                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1295                    *toUnicodeStatus = missingCharMarker;
1296                }
1297                continue;
1298            case ISCII_DANDA:
1299                /* handle double danda*/
1300                if (*contextCharToUnicode== ISCII_DANDA) {
1301                    targetUniChar = DOUBLE_DANDA;
1302                    /* clear the context */
1303                    *contextCharToUnicode = NO_CHAR_MARKER;
1304                    *toUnicodeStatus = missingCharMarker;
1305                } else {
1306                    GET_MAPPING(sourceChar,targetUniChar,data);
1307                    *contextCharToUnicode = sourceChar;
1308                }
1309                break;
1310            case ISCII_HALANT:
1311                /* handle explicit halant */
1312                if (*contextCharToUnicode == ISCII_HALANT) {
1313                    targetUniChar = ZWNJ;
1314                    /* clear the context */
1315                    *contextCharToUnicode = NO_CHAR_MARKER;
1316                } else {
1317                    GET_MAPPING(sourceChar,targetUniChar,data);
1318                    *contextCharToUnicode = sourceChar;
1319                }
1320                break;
1321            case 0x0A:
1322                /* fall through */
1323            case 0x0D:
1324                data->resetToDefaultToUnicode = TRUE;
1325                GET_MAPPING(sourceChar,targetUniChar,data)
1326                ;
1327                *contextCharToUnicode = sourceChar;
1328                break;
1329
1330            case ISCII_VOWEL_SIGN_E:
1331                i=1;
1332                found=FALSE;
1333                for (; i<vowelSignESpecialCases[0][0]; i++) {
1334                    if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
1335                        targetUniChar=vowelSignESpecialCases[i][1];
1336                        found=TRUE;
1337                        break;
1338                    }
1339                }
1340                if (found) {
1341                    /* find out if the mapping is valid in this state */
1342                    if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1343                        /*targetUniChar += data->currentDeltaToUnicode ;*/
1344                        *contextCharToUnicode= NO_CHAR_MARKER;
1345                        *toUnicodeStatus = missingCharMarker;
1346                        break;
1347                    }
1348                }
1349                GET_MAPPING(sourceChar,targetUniChar,data);
1350                *contextCharToUnicode = sourceChar;
1351                break;
1352
1353            case ISCII_NUKTA:
1354                /* handle soft halant */
1355                if (*contextCharToUnicode == ISCII_HALANT) {
1356                    targetUniChar = ZWJ;
1357                    /* clear the context */
1358                    *contextCharToUnicode = NO_CHAR_MARKER;
1359                    break;
1360                } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
1361                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1362                    if (data->prevToUnicodeStatus) {
1363                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1364                        data->prevToUnicodeStatus = 0x0000;
1365                    }
1366                    /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
1367                     * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
1368                     */
1369                    targetUniChar = PNJ_RRA;
1370                    WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1371                    if (U_SUCCESS(*err)) {
1372                        targetUniChar = PNJ_SIGN_VIRAMA;
1373                        WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1374                        if (U_SUCCESS(*err)) {
1375                            targetUniChar = PNJ_HA;
1376                            WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1377                        } else {
1378                            args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1379                        }
1380                    } else {
1381                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
1382                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1383                    }
1384                    *toUnicodeStatus = missingCharMarker;
1385                    data->contextCharToUnicode = NO_CHAR_MARKER;
1386                    continue;
1387                } else {
1388                    /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1389                    i=1;
1390                    found =FALSE;
1391                    for (; i<nuktaSpecialCases[0][0]; i++) {
1392                        if (nuktaSpecialCases[i][0]==(uint8_t)
1393                                *contextCharToUnicode) {
1394                            targetUniChar=nuktaSpecialCases[i][1];
1395                            found =TRUE;
1396                            break;
1397                        }
1398                    }
1399                    if (found) {
1400                        /* find out if the mapping is valid in this state */
1401                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1402                            /*targetUniChar += data->currentDeltaToUnicode ;*/
1403                            *contextCharToUnicode= NO_CHAR_MARKER;
1404                            *toUnicodeStatus = missingCharMarker;
1405                            if (data->currentDeltaToUnicode == PNJ_DELTA) {
1406                                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1407                                if (data->prevToUnicodeStatus) {
1408                                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1409                                    data->prevToUnicodeStatus = 0x0000;
1410                                }
1411                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1412                                continue;
1413                            }
1414                            break;
1415                        }
1416                        /* else fall through to default */
1417                    }
1418                    /* else fall through to default */
1419                }
1420            default:GET_MAPPING(sourceChar,targetUniChar,data)
1421                ;
1422                *contextCharToUnicode = sourceChar;
1423                break;
1424            }
1425
1426            if (*toUnicodeStatus != missingCharMarker) {
1427                /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
1428                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&
1429                        (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
1430                    /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
1431                    offset = (int)(source-args->source - 3);
1432                    tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
1433                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
1434                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
1435                    data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
1436                    *toUnicodeStatus = missingCharMarker;
1437                    continue;
1438                } else {
1439                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1440                    if (data->prevToUnicodeStatus) {
1441                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1442                        data->prevToUnicodeStatus = 0x0000;
1443                    }
1444                    /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
1445                     * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
1446                     */
1447                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {
1448                        targetUniChar = PNJ_TIPPI - PNJ_DELTA;
1449                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
1450                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {
1451                        /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
1452                        data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
1453                    } else {
1454                        /* write the previously mapped codepoint */
1455                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1456                    }
1457                }
1458                *toUnicodeStatus = missingCharMarker;
1459            }
1460
1461            if (targetUniChar != missingCharMarker) {
1462                /* now save the targetUniChar for delayed write */
1463                *toUnicodeStatus = (UChar) targetUniChar;
1464                if (data->resetToDefaultToUnicode==TRUE) {
1465                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
1466                    data->currentMaskToUnicode = data->defMaskToUnicode;
1467                    data->resetToDefaultToUnicode=FALSE;
1468                }
1469            } else {
1470
1471                /* we reach here only if targetUniChar == missingCharMarker
1472                 * so assign codes to reason and err
1473                 */
1474                *err = U_INVALID_CHAR_FOUND;
1475CALLBACK:
1476                args->converter->toUBytes[0] = (uint8_t) sourceChar;
1477                args->converter->toULength = 1;
1478                break;
1479            }
1480
1481        } else {
1482            *err =U_BUFFER_OVERFLOW_ERROR;
1483            break;
1484        }
1485    }
1486
1487    if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1488        /* end of the input stream */
1489        UConverter *cnv = args->converter;
1490
1491        if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
1492            /* set toUBytes[] */
1493            cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1494            cnv->toULength = 1;
1495
1496            /* avoid looping on truncated sequences */
1497            *contextCharToUnicode = NO_CHAR_MARKER;
1498        } else {
1499            cnv->toULength = 0;
1500        }
1501
1502        if (*toUnicodeStatus != missingCharMarker) {
1503            /* output a remaining target character */
1504            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1505            *toUnicodeStatus = missingCharMarker;
1506        }
1507    }
1508
1509    args->target = target;
1510    args->source = source;
1511}
1512
1513/* structure for SafeClone calculations */
1514struct cloneISCIIStruct {
1515    UConverter cnv;
1516    UConverterDataISCII mydata;
1517};
1518
1519static UConverter *
1520_ISCII_SafeClone(const UConverter *cnv,
1521              void *stackBuffer,
1522              int32_t *pBufferSize,
1523              UErrorCode *status)
1524{
1525    struct cloneISCIIStruct * localClone;
1526    int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1527
1528    if (U_FAILURE(*status)) {
1529        return 0;
1530    }
1531
1532    if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
1533        *pBufferSize = bufferSizeNeeded;
1534        return 0;
1535    }
1536
1537    localClone = (struct cloneISCIIStruct *)stackBuffer;
1538    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1539
1540    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1541    localClone->cnv.extraInfo = &localClone->mydata;
1542    localClone->cnv.isExtraLocal = TRUE;
1543
1544    return &localClone->cnv;
1545}
1546
1547static void
1548_ISCIIGetUnicodeSet(const UConverter *cnv,
1549                    const USetAdder *sa,
1550                    UConverterUnicodeSet which,
1551                    UErrorCode *pErrorCode)
1552{
1553    int32_t idx, script;
1554    uint8_t mask;
1555
1556    /* Since all ISCII versions allow switching to other ISCII
1557    scripts, we add all roundtrippable characters to this set. */
1558    sa->addRange(sa->set, 0, ASCII_END);
1559    for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1560        mask = (uint8_t)(lookupInitialData[script].maskEnum);
1561        for (idx = 0; idx < DELTA; idx++) {
1562            /* added check for TELUGU character */
1563            if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
1564                sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1565            }
1566        }
1567    }
1568    sa->add(sa->set, DANDA);
1569    sa->add(sa->set, DOUBLE_DANDA);
1570    sa->add(sa->set, ZWNJ);
1571    sa->add(sa->set, ZWJ);
1572}
1573
1574static const UConverterImpl _ISCIIImpl={
1575
1576    UCNV_ISCII,
1577
1578    NULL,
1579    NULL,
1580
1581    _ISCIIOpen,
1582    _ISCIIClose,
1583    _ISCIIReset,
1584
1585    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1586    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1587    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1588    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1589    NULL,
1590
1591    NULL,
1592    _ISCIIgetName,
1593    NULL,
1594    _ISCII_SafeClone,
1595    _ISCIIGetUnicodeSet
1596};
1597
1598static const UConverterStaticData _ISCIIStaticData={
1599    sizeof(UConverterStaticData),
1600        "ISCII",
1601         0,
1602         UCNV_IBM,
1603         UCNV_ISCII,
1604         1,
1605         4,
1606        { 0x1a, 0, 0, 0 },
1607        0x1,
1608        FALSE,
1609        FALSE,
1610        0x0,
1611        0x0,
1612        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1613
1614};
1615
1616const UConverterSharedData _ISCIIData={
1617    sizeof(UConverterSharedData),
1618        ~((uint32_t) 0),
1619        NULL,
1620        NULL,
1621        &_ISCIIStaticData,
1622        FALSE,
1623        &_ISCIIImpl,
1624        0
1625};
1626
1627#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1628