1/*
2 **********************************************************************
3 *   Copyright (C) 2005-2009, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7
8#include "unicode/utypes.h"
9
10#include "cmemory.h"
11
12#if !UCONFIG_NO_CONVERSION
13#include "csrsbcs.h"
14
15#define N_GRAM_SIZE 3
16#define N_GRAM_MASK 0xFFFFFF
17
18U_NAMESPACE_BEGIN
19
20NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
21  :byteIndex(0), ngram(0)
22{
23    ngramList = theNgramList;
24    charMap   = theCharMap;
25
26    ngramCount = hitCount = 0;
27}
28
29/*
30 * Binary search for value in table, which must have exactly 64 entries.
31 */
32
33int32_t NGramParser::search(const int32_t *table, int32_t value)
34{
35    int32_t index = 0;
36
37    if (table[index + 32] <= value) {
38        index += 32;
39    }
40
41    if (table[index + 16] <= value) {
42        index += 16;
43    }
44
45    if (table[index + 8] <= value) {
46        index += 8;
47    }
48
49    if (table[index + 4] <= value) {
50        index += 4;
51    }
52
53    if (table[index + 2] <= value) {
54        index += 2;
55    }
56
57    if (table[index + 1] <= value) {
58        index += 1;
59    }
60
61    if (table[index] > value) {
62        index -= 1;
63    }
64
65    if (index < 0 || table[index] != value) {
66        return -1;
67    }
68
69    return index;
70}
71
72void NGramParser::lookup(int32_t thisNgram)
73{
74    ngramCount += 1;
75
76    if (search(ngramList, thisNgram) >= 0) {
77        hitCount += 1;
78    }
79
80}
81
82void NGramParser::addByte(int32_t b)
83{
84    ngram = ((ngram << 8) + b) & N_GRAM_MASK;
85    lookup(ngram);
86}
87
88int32_t NGramParser::nextByte(InputText *det)
89{
90    if (byteIndex >= det->fInputLen) {
91        return -1;
92    }
93
94    return det->fInputBytes[byteIndex++];
95}
96
97int32_t NGramParser::parse(InputText *det)
98{
99    int32_t b;
100    bool ignoreSpace = FALSE;
101
102    while ((b = nextByte(det)) >= 0) {
103        uint8_t mb = charMap[b];
104
105        // TODO: 0x20 might not be a space in all character sets...
106        if (mb != 0) {
107            if (!(mb == 0x20 && ignoreSpace)) {
108                addByte(mb);
109            }
110
111            ignoreSpace = (mb == 0x20);
112        }
113    }
114
115    // TODO: Is this OK? The buffer could have ended in the middle of a word...
116    addByte(0x20);
117
118    double rawPercent = (double) hitCount / (double) ngramCount;
119
120    //            if (rawPercent <= 2.0) {
121    //                return 0;
122    //            }
123
124    // TODO - This is a bit of a hack to take care of a case
125    // were we were getting a confidence of 135...
126    if (rawPercent > 0.33) {
127        return 98;
128    }
129
130    return (int32_t) (rawPercent * 300.0);
131}
132
133CharsetRecog_sbcs::CharsetRecog_sbcs()
134: haveC1Bytes(FALSE)
135{
136    // nothing else to do
137}
138
139CharsetRecog_sbcs::~CharsetRecog_sbcs()
140{
141    // nothing to do
142}
143
144int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[])
145{
146    NGramParser parser(ngrams, byteMap);
147    int32_t result;
148
149    haveC1Bytes = det->fC1Bytes;
150    result = parser.parse(det);
151
152    return result;
153}
154
155static const uint8_t charMap_8859_1[] = {
156    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
157    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
158    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
159    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
160    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
161    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
162    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
163    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
164    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
165    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
166    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
167    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
168    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
169    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
170    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
171    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
172    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
173    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
174    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
175    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
176    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
177    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
178    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
179    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
180    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
181    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
182    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
183    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
184    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
185    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
186    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
187    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
188};
189
190static const uint8_t charMap_8859_2[] = {
191    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
192    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
193    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
194    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
195    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
196    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
197    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
198    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
199    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
200    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
201    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
202    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
203    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
204    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
205    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
206    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
207    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
208    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
209    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
210    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
211    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
212    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
213    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
214    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
215    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
216    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
217    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
218    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
219    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
220    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
221    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
222    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
223};
224
225static const uint8_t charMap_8859_5[] = {
226    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
227    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
228    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
229    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
230    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
231    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
232    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
233    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
234    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
235    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
236    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
237    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
238    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
239    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
240    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
241    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
242    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
243    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
244    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
245    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
246    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
247    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
248    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
249    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
250    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
251    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
252    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
253    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
254    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
255    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
256    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
257    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
258};
259
260static const uint8_t charMap_8859_6[] = {
261    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
262    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
263    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
264    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
266    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
267    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
268    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
269    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
270    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
271    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
272    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
273    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
274    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
275    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
276    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
277    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
278    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
279    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
280    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
281    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
282    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
283    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
284    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
285    0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
286    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
287    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
288    0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
289    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
290    0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
291    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
292    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
293};
294
295static const uint8_t charMap_8859_7[] = {
296    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
297    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
298    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
299    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
301    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
302    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
303    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
304    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
305    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
306    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
307    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
308    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
309    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
310    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
311    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
312    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
313    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
314    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
315    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
316    0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
317    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
318    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
319    0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
320    0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
321    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
322    0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
323    0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
324    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
325    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
326    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
327    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
328};
329
330static const uint8_t charMap_8859_8[] = {
331    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
332    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
333    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
334    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
336    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
337    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
338    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
339    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
340    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
341    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
342    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
343    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
344    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
345    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
346    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
347    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
348    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
349    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
350    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
351    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
352    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
353    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
354    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
355    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
356    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
357    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
358    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
359    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
360    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
361    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
362    0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
363};
364
365static const uint8_t charMap_8859_9[] = {
366    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
367    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
368    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
369    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
371    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
372    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
373    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
374    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
375    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
376    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
377    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
378    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
379    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
380    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
381    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
382    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
383    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
384    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
385    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
387    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
388    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
389    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
390    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
391    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
392    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
393    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
394    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
395    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
396    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
397    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
398};
399
400static const int32_t ngrams_windows_1251[] = {
401    0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
402    0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
403    0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
404    0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
405};
406
407static const uint8_t charMap_windows_1251[] = {
408    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
409    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
410    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
411    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
412    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
413    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
414    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
415    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
416    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
417    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
418    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
419    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
420    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
421    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
422    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
423    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
424    0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
425    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
426    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
427    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
428    0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
429    0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
430    0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
431    0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
432    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
433    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
434    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
435    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
436    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
437    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
438    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
439    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
440};
441
442static const int32_t ngrams_windows_1256[] = {
443    0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
444    0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
445    0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
446    0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
447};
448
449static const uint8_t charMap_windows_1256[] = {
450    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
451    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
452    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
453    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
454    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
455    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
456    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
459    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
460    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
461    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
462    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
463    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
464    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
465    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
466    0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
467    0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
468    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
469    0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
470    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
471    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
472    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
473    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
474    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
475    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
476    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
477    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
478    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
479    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
480    0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
481    0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
482};
483
484static const int32_t ngrams_KOI8_R[] = {
485    0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
486    0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
487    0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
488    0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
489};
490
491static const uint8_t charMap_KOI8_R[] = {
492    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
493    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
494    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
495    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
496    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
497    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
498    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
499    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
500    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
501    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
502    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
503    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
504    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
505    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
506    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
507    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
508    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
509    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
510    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
511    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
512    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
513    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
514    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
515    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
516    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
517    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
518    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
519    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
520    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
521    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
522    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
523    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
524};
525
526static const int32_t ngrams_IBM424_he_rtl[] = {
527    0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
528    0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
529    0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
530    0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
531};
532
533static const int32_t ngrams_IBM424_he_ltr[] = {
534    0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
535    0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
536    0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
537    0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
538};
539
540static const uint8_t charMap_IBM424_he[] = {
541/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
542/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
543/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
544/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
545/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
546/* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
547/* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
548/* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
549/* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
550/* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
551/* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
552/* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
553/* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
554/* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
555/* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
556/* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
557/* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
558};
559
560static const int32_t ngrams_IBM420_ar_rtl[] = {
561    0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
562    0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
563    0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
564    0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
565};
566
567static const int32_t ngrams_IBM420_ar_ltr[] = {
568    0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
569    0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
570    0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
571    0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
572};
573
574static const uint8_t charMap_IBM420_ar[]= {
575/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
576/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
577/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
578/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
579/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
580/* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
581/* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
582/* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
583/* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
584/* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
585/* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
586/* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
587/* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
588/* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
589/* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
590/* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
591/* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
592};
593
594//ISO-8859-1,2,5,6,7,8,9 Ngrams
595static const int32_t ngrams_8859_1_en[] = {
596    0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
597    0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
598    0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
599    0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
600};
601
602static const int32_t ngrams_8859_1_da[] = {
603    0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
604    0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
605    0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
606    0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
607};
608
609static const int32_t ngrams_8859_1_de[] = {
610    0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
611    0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
612    0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
613    0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
614};
615
616static const int32_t ngrams_8859_1_es[] = {
617    0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
618    0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
619    0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
620    0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
621};
622
623static const int32_t ngrams_8859_1_fr[] = {
624    0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
625    0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
626    0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
627    0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
628};
629
630static const int32_t ngrams_8859_1_it[] = {
631    0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
632    0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
633    0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
634    0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
635};
636
637static const int32_t ngrams_8859_1_nl[] = {
638    0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
639    0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
640    0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
641    0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
642};
643
644static const int32_t ngrams_8859_1_no[] = {
645    0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
646    0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
647    0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
648    0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
649};
650
651static const int32_t ngrams_8859_1_pt[] = {
652    0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
653    0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
654    0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
655    0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
656};
657
658static const int32_t ngrams_8859_1_sv[] = {
659    0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
660    0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
661    0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
662    0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
663};
664
665static const int32_t ngrams_8859_2_cs[] = {
666    0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
667    0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
668    0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
669    0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
670};
671
672static const int32_t ngrams_8859_2_hu[] = {
673    0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
674    0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
675    0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
676    0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
677};
678
679static const int32_t ngrams_8859_2_pl[] = {
680    0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
681    0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
682    0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
683    0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
684};
685
686static const int32_t ngrams_8859_2_ro[] = {
687    0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
688    0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
689    0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
690    0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
691};
692
693static const int32_t ngrams_8859_5_ru[] = {
694    0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
695    0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
696    0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
697    0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
698};
699
700static const int32_t ngrams_8859_6_ar[] = {
701    0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
702    0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
703    0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
704    0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
705};
706
707static const int32_t ngrams_8859_7_el[] = {
708    0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
709    0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
710    0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
711    0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
712};
713
714static const int32_t ngrams_8859_8_I_he[] = {
715    0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
716    0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
717    0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
718    0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
719};
720
721static const int32_t ngrams_8859_8_he[] = {
722    0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
723    0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
724    0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
725    0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
726};
727
728static const int32_t ngrams_8859_9_tr[] = {
729    0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
730    0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
731    0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
732    0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
733};
734
735CharsetRecog_8859_1::~CharsetRecog_8859_1()
736{
737    // nothing to do
738}
739
740const char *CharsetRecog_8859_1::getName() const
741{
742    return haveC1Bytes? "windows-1252" : "ISO-8859-1";
743}
744
745const char *CharsetRecog_8859_1_en::getLanguage() const
746{
747    return "en";
748}
749
750CharsetRecog_8859_1_en::~CharsetRecog_8859_1_en()
751{
752    // nothing to do
753}
754
755int32_t CharsetRecog_8859_1_en::match(InputText *textIn)
756{
757    int32_t result = match_sbcs(textIn, ngrams_8859_1_en, charMap_8859_1);
758
759   // printf("8859_1_en: result = %d\n", result);
760    return result; //match_sbcs(textIn, ngrams, charMap);
761}
762
763CharsetRecog_8859_1_da::~CharsetRecog_8859_1_da()
764{
765    // nothing to do
766}
767
768const char *CharsetRecog_8859_1_da::getLanguage() const
769{
770    return "da";
771}
772
773int32_t CharsetRecog_8859_1_da::match(InputText *textIn)
774{
775    return match_sbcs(textIn, ngrams_8859_1_da, charMap_8859_1);
776}
777
778CharsetRecog_8859_1_de::~CharsetRecog_8859_1_de() {}
779
780const char *CharsetRecog_8859_1_de::getLanguage() const
781{
782    return "de";
783}
784
785int32_t CharsetRecog_8859_1_de::match(InputText *textIn)
786{
787    return match_sbcs(textIn, ngrams_8859_1_de, charMap_8859_1);
788}
789
790CharsetRecog_8859_1_es::~CharsetRecog_8859_1_es()
791{
792    // nothing to do
793}
794
795const char *CharsetRecog_8859_1_es::getLanguage() const
796{
797    return "es";
798}
799
800int32_t CharsetRecog_8859_1_es::match(InputText *textIn)
801{
802    return match_sbcs(textIn, ngrams_8859_1_es, charMap_8859_1);
803}
804
805CharsetRecog_8859_1_fr::~CharsetRecog_8859_1_fr()
806{
807    // nothing to do
808}
809
810const char *CharsetRecog_8859_1_fr::getLanguage() const
811{
812    return "fr";
813}
814
815int32_t CharsetRecog_8859_1_fr::match(InputText *textIn)
816{
817    return match_sbcs(textIn, ngrams_8859_1_fr, charMap_8859_1);
818}
819
820CharsetRecog_8859_1_it::~CharsetRecog_8859_1_it()
821{
822    // nothing to do
823}
824
825const char *CharsetRecog_8859_1_it::getLanguage() const
826{
827    return "it";
828}
829
830int32_t CharsetRecog_8859_1_it::match(InputText *textIn)
831{
832    return match_sbcs(textIn, ngrams_8859_1_it, charMap_8859_1);
833}
834
835CharsetRecog_8859_1_nl::~CharsetRecog_8859_1_nl()
836{
837    // nothing to do
838}
839
840const char *CharsetRecog_8859_1_nl::getLanguage() const
841{
842    return "nl";
843}
844
845int32_t CharsetRecog_8859_1_nl::match(InputText *textIn)
846{
847    return match_sbcs(textIn, ngrams_8859_1_nl, charMap_8859_1);
848}
849
850CharsetRecog_8859_1_no::~CharsetRecog_8859_1_no() {}
851
852const char *CharsetRecog_8859_1_no::getLanguage() const
853{
854    return "no";
855}
856
857int32_t CharsetRecog_8859_1_no::match(InputText *textIn)
858{
859    return match_sbcs(textIn, ngrams_8859_1_no, charMap_8859_1);
860}
861
862CharsetRecog_8859_1_pt::~CharsetRecog_8859_1_pt()
863{
864    // nothing to do
865}
866
867const char *CharsetRecog_8859_1_pt::getLanguage() const
868{
869    return "pt";
870}
871
872int32_t CharsetRecog_8859_1_pt::match(InputText *textIn)
873{
874    return match_sbcs(textIn, ngrams_8859_1_pt, charMap_8859_1);
875}
876
877CharsetRecog_8859_1_sv::~CharsetRecog_8859_1_sv() {}
878
879const char *CharsetRecog_8859_1_sv::getLanguage() const
880{
881    return "sv";
882}
883
884int32_t CharsetRecog_8859_1_sv::match(InputText *textIn)
885{
886    return match_sbcs(textIn, ngrams_8859_1_sv, charMap_8859_1);
887}
888
889CharsetRecog_8859_2::~CharsetRecog_8859_2()
890{
891    // nothing to do
892}
893
894const char *CharsetRecog_8859_2::getName() const
895{
896    return haveC1Bytes? "windows-1250" : "ISO-8859-2";
897}
898
899CharsetRecog_8859_2_cs::~CharsetRecog_8859_2_cs()
900{
901    // nothing to do
902}
903
904const char *CharsetRecog_8859_2_cs::getLanguage() const
905{
906    return "cs";
907}
908
909int32_t CharsetRecog_8859_2_cs::match(InputText *textIn)
910{
911    return match_sbcs(textIn, ngrams_8859_2_cs, charMap_8859_2);
912}
913
914CharsetRecog_8859_2_hu::~CharsetRecog_8859_2_hu()
915{
916    // nothing to do
917}
918
919const char *CharsetRecog_8859_2_hu::getLanguage() const
920{
921    return "hu";
922}
923
924int32_t CharsetRecog_8859_2_hu::match(InputText *textIn)
925{
926    return match_sbcs(textIn, ngrams_8859_2_hu, charMap_8859_2);
927}
928
929CharsetRecog_8859_2_pl::~CharsetRecog_8859_2_pl()
930{
931    // nothing to do
932}
933
934const char *CharsetRecog_8859_2_pl::getLanguage() const
935{
936    return "pl";
937}
938
939int32_t CharsetRecog_8859_2_pl::match(InputText *textIn)
940{
941    return match_sbcs(textIn, ngrams_8859_2_pl, charMap_8859_2);
942}
943
944CharsetRecog_8859_2_ro::~CharsetRecog_8859_2_ro()
945{
946    // nothing to do
947}
948
949const char *CharsetRecog_8859_2_ro::getLanguage() const
950{
951    return "ro";
952}
953
954int32_t CharsetRecog_8859_2_ro::match(InputText *textIn)
955{
956    return match_sbcs(textIn, ngrams_8859_2_ro, charMap_8859_2);
957}
958
959CharsetRecog_8859_5::~CharsetRecog_8859_5()
960{
961    // nothing to do
962}
963
964const char *CharsetRecog_8859_5::getName() const
965{
966    return "ISO-8859-5";
967}
968
969CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
970{
971    // nothing to do
972}
973
974const char *CharsetRecog_8859_5_ru::getLanguage() const
975{
976    return "ru";
977}
978
979int32_t CharsetRecog_8859_5_ru::match(InputText *textIn)
980{
981    return match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
982}
983
984CharsetRecog_8859_6::~CharsetRecog_8859_6()
985{
986    // nothing to do
987}
988
989const char *CharsetRecog_8859_6::getName() const
990{
991    return "ISO-8859-6";
992}
993
994CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
995{
996    // nothing to do
997}
998
999const char *CharsetRecog_8859_6_ar::getLanguage() const
1000{
1001    return "ar";
1002}
1003
1004int32_t CharsetRecog_8859_6_ar::match(InputText *textIn)
1005{
1006    return match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
1007}
1008
1009CharsetRecog_8859_7::~CharsetRecog_8859_7()
1010{
1011    // nothing to do
1012}
1013
1014const char *CharsetRecog_8859_7::getName() const
1015{
1016    return haveC1Bytes? "windows-1253" : "ISO-8859-7";
1017}
1018
1019CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
1020{
1021    // nothing to do
1022}
1023
1024const char *CharsetRecog_8859_7_el::getLanguage() const
1025{
1026    return "el";
1027}
1028
1029int32_t CharsetRecog_8859_7_el::match(InputText *textIn)
1030{
1031    return match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
1032}
1033
1034CharsetRecog_8859_8::~CharsetRecog_8859_8()
1035{
1036    // nothing to do
1037}
1038
1039const char *CharsetRecog_8859_8::getName() const
1040{
1041    return haveC1Bytes? "windows-1255" : "ISO-8859-8";
1042}
1043
1044CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
1045{
1046    // nothing to do
1047}
1048
1049const char *CharsetRecog_8859_8_I_he::getName() const
1050{
1051    return haveC1Bytes? "windows-1255" : "ISO-8859-8-I";
1052}
1053
1054const char *CharsetRecog_8859_8_I_he::getLanguage() const
1055{
1056    return "he";
1057}
1058
1059int32_t CharsetRecog_8859_8_I_he::match(InputText *textIn)
1060{
1061    return match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
1062}
1063
1064CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
1065{
1066    // od ot gnihton
1067}
1068
1069const char *CharsetRecog_8859_8_he::getLanguage() const
1070{
1071    return "he";
1072}
1073
1074int32_t CharsetRecog_8859_8_he::match(InputText *textIn)
1075{
1076    return match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1077}
1078
1079CharsetRecog_8859_9::~CharsetRecog_8859_9()
1080{
1081    // nothing to do
1082}
1083
1084const char *CharsetRecog_8859_9::getName() const
1085{
1086    return haveC1Bytes? "windows-1254" : "ISO-8859-9";
1087}
1088
1089CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1090{
1091    // nothing to do
1092}
1093
1094const char *CharsetRecog_8859_9_tr::getLanguage() const
1095{
1096    return "tr";
1097}
1098
1099int32_t CharsetRecog_8859_9_tr::match(InputText *textIn)
1100{
1101    return match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1102}
1103
1104CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1105{
1106    // nothing to do
1107}
1108
1109const char *CharsetRecog_windows_1256::getName() const
1110{
1111    return  "windows-1256";
1112}
1113
1114const char *CharsetRecog_windows_1256::getLanguage() const
1115{
1116    return "ar";
1117}
1118
1119int32_t CharsetRecog_windows_1256::match(InputText *textIn)
1120{
1121    return match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1122}
1123
1124CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1125{
1126    // nothing to do
1127}
1128
1129const char *CharsetRecog_windows_1251::getName() const
1130{
1131    return  "windows-1251";
1132}
1133
1134const char *CharsetRecog_windows_1251::getLanguage() const
1135{
1136    return "ru";
1137}
1138
1139int32_t CharsetRecog_windows_1251::match(InputText *textIn)
1140{
1141    return match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1142}
1143
1144CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1145{
1146    // nothing to do
1147}
1148
1149const char *CharsetRecog_KOI8_R::getName() const
1150{
1151    return  "KOI8-R";
1152}
1153
1154const char *CharsetRecog_KOI8_R::getLanguage() const
1155{
1156    return "ru";
1157}
1158
1159int32_t CharsetRecog_KOI8_R::match(InputText *textIn)
1160{
1161    return match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1162}
1163
1164CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1165{
1166    // nothing to do
1167}
1168
1169const char *CharsetRecog_IBM424_he::getLanguage() const
1170{
1171    return "he";
1172}
1173
1174CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
1175{
1176    // nothing to do
1177}
1178
1179const char *CharsetRecog_IBM424_he_rtl::getName() const
1180{
1181    return  "IBM424_rtl";
1182}
1183
1184int32_t CharsetRecog_IBM424_he_rtl::match(InputText *textIn)
1185{
1186    return match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
1187}
1188
1189CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
1190{
1191    // nothing to do
1192}
1193
1194const char *CharsetRecog_IBM424_he_ltr::getName() const
1195{
1196    return  "IBM424_ltr";
1197}
1198
1199int32_t CharsetRecog_IBM424_he_ltr::match(InputText *textIn)
1200{
1201    return match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
1202}
1203
1204static const uint8_t unshapeMap_IBM420[] = {
1205/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
1206/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
1207/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
1208/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
1209/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
1210/* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
1211/* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
1212/* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
1213/* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
1214/* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
1215/* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
1216/* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
1217/* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
1218/* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
1219/* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
1220/* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
1221/* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
1222};
1223
1224CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
1225{
1226    // nothing to do
1227}
1228
1229const char *CharsetRecog_IBM420_ar::getLanguage() const
1230{
1231    return "ar";
1232}
1233
1234void CharsetRecog_IBM420_ar::matchInit(InputText *textIn) {
1235    prev_fInputBytesLength = textIn->fInputLen;
1236    prev_fInputBytes = textIn->fInputBytes;
1237
1238    int32_t length = 0;
1239    uint8_t *bb = unshape(prev_fInputBytes, prev_fInputBytesLength, length);
1240
1241    if (bb != NULL) {
1242        textIn->fInputBytes = bb;
1243        textIn->fInputLen = length;
1244
1245        deleteBuffer = TRUE;
1246    } else {
1247        deleteBuffer = FALSE;
1248    }
1249}
1250
1251uint8_t *CharsetRecog_IBM420_ar::unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length) {
1252    uint8_t *resultArray = unshapeLamAlef(inputBytes, inputBytesLength, length);
1253
1254    if (resultArray != NULL) {
1255        for (int32_t i = 0; i < inputBytesLength; i++) {
1256            resultArray[i] = unshapeMap_IBM420[resultArray[i]];
1257        }
1258    }
1259
1260    return resultArray;
1261}
1262
1263uint8_t *CharsetRecog_IBM420_ar::unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length) {
1264    int32_t bigBufferLength = inputBytesLength * 2;
1265    uint8_t *bigBuffer = new uint8_t[bigBufferLength];
1266    uint8_t *resultBuffer = NULL;
1267
1268    if (bigBuffer != NULL) {
1269        int32_t bufferIndex;
1270        uint8_t unshapedLamAlef[] = { 0xb1, 0x56 };
1271
1272        for (int32_t i = bufferIndex = 0; i < inputBytesLength; i++) {
1273            if (isLamAlef(inputBytes[i])) {
1274                bigBuffer[bufferIndex++] = unshapedLamAlef[0];
1275                bigBuffer[bufferIndex++] = unshapedLamAlef[1];
1276            } else {
1277                bigBuffer[bufferIndex++] = inputBytes[i];
1278            }
1279        }
1280
1281        length = bufferIndex;
1282        resultBuffer = new uint8_t[length];
1283        if (resultBuffer != NULL) {
1284            uprv_memcpy(resultBuffer, bigBuffer, length);
1285        }
1286    }
1287
1288    if (bigBuffer != NULL) {
1289        delete [] bigBuffer;
1290    }
1291
1292    return resultBuffer;
1293}
1294
1295void CharsetRecog_IBM420_ar::matchFinish(InputText *textIn) {
1296    if (deleteBuffer) {
1297        delete [] textIn->fInputBytes;
1298
1299        textIn->fInputBytes = prev_fInputBytes;
1300        textIn->fInputLen = prev_fInputBytesLength;
1301    }
1302}
1303
1304UBool CharsetRecog_IBM420_ar::isLamAlef(uint8_t b) {
1305    uint8_t shapedLamAlef[] = {
1306        0xb2, 0xb3, 0xb4, 0xb5, 0xb7, 0xb8
1307    };
1308
1309    for (uint32_t i = 0; i < sizeof(shapedLamAlef); i++) {
1310        if (b == shapedLamAlef[i]) {
1311            return TRUE;
1312        }
1313    }
1314
1315    return FALSE;
1316}
1317
1318CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
1319{
1320    // nothing to do
1321}
1322
1323const char *CharsetRecog_IBM420_ar_rtl::getName() const
1324{
1325    return  "IBM420_rtl";
1326}
1327
1328int32_t CharsetRecog_IBM420_ar_rtl::match(InputText *textIn)
1329{
1330    return match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
1331}
1332
1333CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
1334{
1335    // nothing to do
1336}
1337
1338const char *CharsetRecog_IBM420_ar_ltr::getName() const
1339{
1340    return  "IBM420_ltr";
1341}
1342
1343int32_t CharsetRecog_IBM420_ar_ltr::match(InputText *textIn)
1344{
1345    return match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
1346}
1347
1348U_NAMESPACE_END
1349#endif
1350
1351