1/* 2 ********************************************************************** 3 * Copyright (C) 2005-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8#include "unicode/utypes.h" 9 10#include "cmemory.h" 11 12#if !UCONFIG_NO_CONVERSION 13#include "csrsbcs.h" 14#include "csmatch.h" 15 16#define N_GRAM_SIZE 3 17#define N_GRAM_MASK 0xFFFFFF 18#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 19 20U_NAMESPACE_BEGIN 21 22NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap) 23 :byteIndex(0), ngram(0) 24{ 25 ngramList = theNgramList; 26 charMap = theCharMap; 27 28 ngramCount = hitCount = 0; 29} 30 31/* 32 * Binary search for value in table, which must have exactly 64 entries. 33 */ 34 35int32_t NGramParser::search(const int32_t *table, int32_t value) 36{ 37 int32_t index = 0; 38 39 if (table[index + 32] <= value) { 40 index += 32; 41 } 42 43 if (table[index + 16] <= value) { 44 index += 16; 45 } 46 47 if (table[index + 8] <= value) { 48 index += 8; 49 } 50 51 if (table[index + 4] <= value) { 52 index += 4; 53 } 54 55 if (table[index + 2] <= value) { 56 index += 2; 57 } 58 59 if (table[index + 1] <= value) { 60 index += 1; 61 } 62 63 if (table[index] > value) { 64 index -= 1; 65 } 66 67 if (index < 0 || table[index] != value) { 68 return -1; 69 } 70 71 return index; 72} 73 74void NGramParser::lookup(int32_t thisNgram) 75{ 76 ngramCount += 1; 77 78 if (search(ngramList, thisNgram) >= 0) { 79 hitCount += 1; 80 } 81 82} 83 84void NGramParser::addByte(int32_t b) 85{ 86 ngram = ((ngram << 8) + b) & N_GRAM_MASK; 87 lookup(ngram); 88} 89 90int32_t NGramParser::nextByte(InputText *det) 91{ 92 if (byteIndex >= det->fInputLen) { 93 return -1; 94 } 95 96 return det->fInputBytes[byteIndex++]; 97} 98 99int32_t NGramParser::parse(InputText *det) 100{ 101 int32_t b; 102 bool ignoreSpace = FALSE; 103 104 while ((b = nextByte(det)) >= 0) { 105 uint8_t mb = charMap[b]; 106 107 // TODO: 0x20 might not be a space in all character sets... 108 if (mb != 0) { 109 if (!(mb == 0x20 && ignoreSpace)) { 110 addByte(mb); 111 } 112 113 ignoreSpace = (mb == 0x20); 114 } 115 } 116 117 // TODO: Is this OK? The buffer could have ended in the middle of a word... 118 addByte(0x20); 119 120 double rawPercent = (double) hitCount / (double) ngramCount; 121 122 // if (rawPercent <= 2.0) { 123 // return 0; 124 // } 125 126 // TODO - This is a bit of a hack to take care of a case 127 // were we were getting a confidence of 135... 128 if (rawPercent > 0.33) { 129 return 98; 130 } 131 132 return (int32_t) (rawPercent * 300.0); 133} 134 135CharsetRecog_sbcs::CharsetRecog_sbcs() 136{ 137 // nothing else to do 138} 139 140CharsetRecog_sbcs::~CharsetRecog_sbcs() 141{ 142 // nothing to do 143} 144 145int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t byteMap[]) const 146{ 147 NGramParser parser(ngrams, byteMap); 148 int32_t result; 149 150 result = parser.parse(det); 151 152 return result; 153} 154 155static const uint8_t charMap_8859_1[] = { 156 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 157 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 158 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 159 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 160 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 161 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 162 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 163 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 164 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 165 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 166 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 167 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 168 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 169 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 170 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 171 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 172 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 173 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 174 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 175 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 176 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 177 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 178 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 179 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 180 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 181 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 182 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 183 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 184 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 185 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 186 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 187 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 188}; 189 190static const uint8_t charMap_8859_2[] = { 191 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 192 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 193 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 194 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 195 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 196 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 197 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 198 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 199 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 200 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 201 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 202 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 203 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 204 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 205 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 207 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 208 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 209 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 210 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 211 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, 212 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 213 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 214 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 215 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 216 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 217 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 218 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 219 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 220 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 221 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 222 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 223}; 224 225static const uint8_t charMap_8859_5[] = { 226 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 227 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 228 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 229 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 230 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 231 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 232 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 233 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 234 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 235 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 236 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 237 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 238 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 239 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 240 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 241 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 242 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 243 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 244 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 245 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 246 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 247 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 248 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 249 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 250 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 251 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 252 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 253 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 254 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 255 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 256 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 257 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 258}; 259 260static const uint8_t charMap_8859_6[] = { 261 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 262 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 263 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 264 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 265 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 266 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 267 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 268 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 269 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 270 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 271 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 272 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 273 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 274 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 275 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 276 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 277 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 278 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 279 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 280 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 281 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 282 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 283 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 284 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 285 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 286 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 287 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 288 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, 289 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 290 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, 291 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 292 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 293}; 294 295static const uint8_t charMap_8859_7[] = { 296 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 297 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 298 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 299 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 300 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 301 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 302 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 303 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 304 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 305 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 306 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 307 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 308 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 309 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 310 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 311 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 312 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 313 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 314 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 315 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 316 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, 317 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 318 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, 319 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, 320 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 321 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 322 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 323 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, 324 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 325 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 326 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 327 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 328}; 329 330static const uint8_t charMap_8859_8[] = { 331 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 332 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 333 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 334 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 335 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 336 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 337 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 338 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 339 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 340 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 341 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 342 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 343 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 344 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 345 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 346 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 347 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 348 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 349 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 350 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 351 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 352 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 353 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 354 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 355 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 356 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 357 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 358 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 359 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 360 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 361 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 362 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, 363}; 364 365static const uint8_t charMap_8859_9[] = { 366 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 367 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 368 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 369 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 370 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 371 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 372 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 373 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 374 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 375 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 376 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 377 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 378 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 379 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 380 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 381 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 382 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 383 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 384 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 385 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 386 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 387 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 388 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 389 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 390 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 391 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 392 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 393 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, 394 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 395 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 396 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 397 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 398}; 399 400static const int32_t ngrams_windows_1251[] = { 401 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, 402 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, 403 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, 404 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, 405}; 406 407static const uint8_t charMap_windows_1251[] = { 408 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 409 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 410 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 411 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 412 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 413 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 414 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 415 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 416 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 417 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 418 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 419 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 420 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 421 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 422 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 423 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 424 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 425 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 426 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 427 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 428 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, 429 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, 430 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, 431 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, 432 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 433 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 434 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 435 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 436 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 437 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 438 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 439 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 440}; 441 442static const int32_t ngrams_windows_1256[] = { 443 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, 444 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, 445 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, 446 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, 447}; 448 449static const uint8_t charMap_windows_1256[] = { 450 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 451 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 452 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 453 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 454 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 455 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 456 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 457 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 458 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 459 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 460 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 461 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 462 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 463 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 464 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 465 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 466 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 467 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, 468 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 469 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, 470 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 471 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 472 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 473 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 474 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 475 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 476 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 477 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 478 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 479 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 480 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, 481 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, 482}; 483 484static const int32_t ngrams_KOI8_R[] = { 485 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, 486 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, 487 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, 488 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, 489}; 490 491static const uint8_t charMap_KOI8_R[] = { 492 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 493 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 494 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 495 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 496 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 497 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 498 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 499 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 500 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 501 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 502 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 503 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 504 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 505 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 506 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 507 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 508 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 509 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 510 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 511 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 512 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 513 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 514 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 515 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 516 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 517 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 518 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 519 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 520 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 521 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 522 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 523 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 524}; 525 526static const int32_t ngrams_IBM424_he_rtl[] = { 527 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 528 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 529 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, 530 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, 531}; 532 533static const int32_t ngrams_IBM424_he_ltr[] = { 534 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, 535 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, 536 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, 537 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, 538}; 539 540static const uint8_t charMap_IBM424_he[] = { 541/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 542/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 543/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 544/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 545/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 546/* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 547/* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 548/* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 549/* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, 550/* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 551/* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 552/* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 553/* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 554/* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 555/* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 556/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 557/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 558}; 559 560static const int32_t ngrams_IBM420_ar_rtl[] = { 561 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, 562 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, 563 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, 564 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, 565}; 566 567static const int32_t ngrams_IBM420_ar_ltr[] = { 568 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 569 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, 570 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, 571 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156 572}; 573 574static const uint8_t charMap_IBM420_ar[]= { 575/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 576/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 577/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 578/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 579/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 580/* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 581/* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 582/* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 583/* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 584/* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 585/* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 586/* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 587/* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 588/* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, 589/* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 590/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, 591/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, 592}; 593 594//ISO-8859-1,2,5,6,7,8,9 Ngrams 595 596struct NGramsPlusLang { 597 const int32_t ngrams[64]; 598 const char * lang; 599}; 600 601static const NGramsPlusLang ngrams_8859_1[] = { 602 { 603 { 604 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, 605 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, 606 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, 607 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, 608 }, 609 "en" 610 }, 611 { 612 { 613 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, 614 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, 615 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, 616 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, 617 }, 618 "da" 619 }, 620 { 621 { 622 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, 623 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, 624 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, 625 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, 626 }, 627 "de" 628 }, 629 { 630 { 631 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 632 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, 633 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, 634 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, 635 }, 636 "es" 637 }, 638 { 639 { 640 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, 641 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, 642 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, 643 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, 644 }, 645 "fr" 646 }, 647 { 648 { 649 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, 650 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, 651 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, 652 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, 653 }, 654 "it" 655 }, 656 { 657 { 658 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, 659 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, 660 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, 661 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, 662 }, 663 "nl" 664 }, 665 { 666 { 667 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, 668 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, 669 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, 670 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, 671 }, 672 "no" 673 }, 674 { 675 { 676 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 677 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, 678 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, 679 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, 680 }, 681 "pt" 682 }, 683 { 684 { 685 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, 686 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, 687 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, 688 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, 689 }, 690 "sv" 691 } 692}; 693 694 695static const NGramsPlusLang ngrams_8859_2[] = { 696 { 697 { 698 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, 699 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, 700 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, 701 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, 702 }, 703 "cs" 704 }, 705 { 706 { 707 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, 708 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, 709 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, 710 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, 711 }, 712 "hu" 713 }, 714 { 715 { 716 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, 717 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, 718 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, 719 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, 720 }, 721 "pl" 722 }, 723 { 724 { 725 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, 726 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, 727 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, 728 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, 729 }, 730 "ro" 731 } 732}; 733 734static const int32_t ngrams_8859_5_ru[] = { 735 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, 736 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, 737 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, 738 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, 739}; 740 741static const int32_t ngrams_8859_6_ar[] = { 742 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, 743 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, 744 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, 745 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, 746}; 747 748static const int32_t ngrams_8859_7_el[] = { 749 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, 750 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, 751 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, 752 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, 753}; 754 755static const int32_t ngrams_8859_8_I_he[] = { 756 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, 757 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, 758 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, 759 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, 760}; 761 762static const int32_t ngrams_8859_8_he[] = { 763 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, 764 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, 765 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, 766 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, 767}; 768 769static const int32_t ngrams_8859_9_tr[] = { 770 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, 771 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, 772 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, 773 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, 774}; 775 776CharsetRecog_8859_1::~CharsetRecog_8859_1() 777{ 778 // nothing to do 779} 780 781UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const { 782 const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1"; 783 uint32_t i; 784 int32_t bestConfidenceSoFar = -1; 785 for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) { 786 const int32_t *ngrams = ngrams_8859_1[i].ngrams; 787 const char *lang = ngrams_8859_1[i].lang; 788 int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1); 789 if (confidence > bestConfidenceSoFar) { 790 results->set(textIn, this, confidence, name, lang); 791 bestConfidenceSoFar = confidence; 792 } 793 } 794 return (bestConfidenceSoFar > 0); 795} 796 797const char *CharsetRecog_8859_1::getName() const 798{ 799 return "ISO-8859-1"; 800} 801 802 803CharsetRecog_8859_2::~CharsetRecog_8859_2() 804{ 805 // nothing to do 806} 807 808UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const { 809 const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2"; 810 uint32_t i; 811 int32_t bestConfidenceSoFar = -1; 812 for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) { 813 const int32_t *ngrams = ngrams_8859_2[i].ngrams; 814 const char *lang = ngrams_8859_2[i].lang; 815 int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2); 816 if (confidence > bestConfidenceSoFar) { 817 results->set(textIn, this, confidence, name, lang); 818 bestConfidenceSoFar = confidence; 819 } 820 } 821 return (bestConfidenceSoFar > 0); 822} 823 824const char *CharsetRecog_8859_2::getName() const 825{ 826 return "ISO-8859-2"; 827} 828 829 830CharsetRecog_8859_5::~CharsetRecog_8859_5() 831{ 832 // nothing to do 833} 834 835const char *CharsetRecog_8859_5::getName() const 836{ 837 return "ISO-8859-5"; 838} 839 840CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru() 841{ 842 // nothing to do 843} 844 845const char *CharsetRecog_8859_5_ru::getLanguage() const 846{ 847 return "ru"; 848} 849 850UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const 851{ 852 int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5); 853 results->set(textIn, this, confidence); 854 return (confidence > 0); 855} 856 857CharsetRecog_8859_6::~CharsetRecog_8859_6() 858{ 859 // nothing to do 860} 861 862const char *CharsetRecog_8859_6::getName() const 863{ 864 return "ISO-8859-6"; 865} 866 867CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar() 868{ 869 // nothing to do 870} 871 872const char *CharsetRecog_8859_6_ar::getLanguage() const 873{ 874 return "ar"; 875} 876 877UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const 878{ 879 int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6); 880 results->set(textIn, this, confidence); 881 return (confidence > 0); 882} 883 884CharsetRecog_8859_7::~CharsetRecog_8859_7() 885{ 886 // nothing to do 887} 888 889const char *CharsetRecog_8859_7::getName() const 890{ 891 return "ISO-8859-7"; 892} 893 894CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el() 895{ 896 // nothing to do 897} 898 899const char *CharsetRecog_8859_7_el::getLanguage() const 900{ 901 return "el"; 902} 903 904UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const 905{ 906 const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7"; 907 int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7); 908 results->set(textIn, this, confidence, name, "el"); 909 return (confidence > 0); 910} 911 912CharsetRecog_8859_8::~CharsetRecog_8859_8() 913{ 914 // nothing to do 915} 916 917const char *CharsetRecog_8859_8::getName() const 918{ 919 return "ISO-8859-8"; 920} 921 922CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he () 923{ 924 // nothing to do 925} 926 927const char *CharsetRecog_8859_8_I_he::getName() const 928{ 929 return "ISO-8859-8-I"; 930} 931 932const char *CharsetRecog_8859_8_I_he::getLanguage() const 933{ 934 return "he"; 935} 936 937UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const 938{ 939 const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I"; 940 int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8); 941 results->set(textIn, this, confidence, name, "he"); 942 return (confidence > 0); 943} 944 945CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he() 946{ 947 // od ot gnihton 948} 949 950const char *CharsetRecog_8859_8_he::getLanguage() const 951{ 952 return "he"; 953} 954 955UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const 956{ 957 const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8"; 958 int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8); 959 results->set(textIn, this, confidence, name, "he"); 960 return (confidence > 0); 961} 962 963CharsetRecog_8859_9::~CharsetRecog_8859_9() 964{ 965 // nothing to do 966} 967 968const char *CharsetRecog_8859_9::getName() const 969{ 970 return "ISO-8859-9"; 971} 972 973CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr () 974{ 975 // nothing to do 976} 977 978const char *CharsetRecog_8859_9_tr::getLanguage() const 979{ 980 return "tr"; 981} 982 983UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const 984{ 985 const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9"; 986 int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9); 987 results->set(textIn, this, confidence, name, "tr"); 988 return (confidence > 0); 989} 990 991CharsetRecog_windows_1256::~CharsetRecog_windows_1256() 992{ 993 // nothing to do 994} 995 996const char *CharsetRecog_windows_1256::getName() const 997{ 998 return "windows-1256"; 999} 1000 1001const char *CharsetRecog_windows_1256::getLanguage() const 1002{ 1003 return "ar"; 1004} 1005 1006UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const 1007{ 1008 int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256); 1009 results->set(textIn, this, confidence); 1010 return (confidence > 0); 1011} 1012 1013CharsetRecog_windows_1251::~CharsetRecog_windows_1251() 1014{ 1015 // nothing to do 1016} 1017 1018const char *CharsetRecog_windows_1251::getName() const 1019{ 1020 return "windows-1251"; 1021} 1022 1023const char *CharsetRecog_windows_1251::getLanguage() const 1024{ 1025 return "ru"; 1026} 1027 1028UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const 1029{ 1030 int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251); 1031 results->set(textIn, this, confidence); 1032 return (confidence > 0); 1033} 1034 1035CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R() 1036{ 1037 // nothing to do 1038} 1039 1040const char *CharsetRecog_KOI8_R::getName() const 1041{ 1042 return "KOI8-R"; 1043} 1044 1045const char *CharsetRecog_KOI8_R::getLanguage() const 1046{ 1047 return "ru"; 1048} 1049 1050UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const 1051{ 1052 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R); 1053 results->set(textIn, this, confidence); 1054 return (confidence > 0); 1055} 1056 1057CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() 1058{ 1059 // nothing to do 1060} 1061 1062const char *CharsetRecog_IBM424_he::getLanguage() const 1063{ 1064 return "he"; 1065} 1066 1067CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl() 1068{ 1069 // nothing to do 1070} 1071 1072const char *CharsetRecog_IBM424_he_rtl::getName() const 1073{ 1074 return "IBM424_rtl"; 1075} 1076 1077UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const 1078{ 1079 int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he); 1080 results->set(textIn, this, confidence); 1081 return (confidence > 0); 1082} 1083 1084CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr() 1085{ 1086 // nothing to do 1087} 1088 1089const char *CharsetRecog_IBM424_he_ltr::getName() const 1090{ 1091 return "IBM424_ltr"; 1092} 1093 1094UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const 1095{ 1096 int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he); 1097 results->set(textIn, this, confidence); 1098 return (confidence > 0); 1099} 1100 1101static const uint8_t unshapeMap_IBM420[] = { 1102/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 1103/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 1104/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 1105/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 1106/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 1107/* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 1108/* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 1109/* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 1110/* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 1111/* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F, 1112/* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E, 1113/* A- */ 0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF, 1114/* B- */ 0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF, 1115/* C- */ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF, 1116/* D- */ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF, 1117/* E- */ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 1118/* F- */ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 1119}; 1120 1121CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar() 1122{ 1123 // nothing to do 1124} 1125 1126const char *CharsetRecog_IBM420_ar::getLanguage() const 1127{ 1128 return "ar"; 1129} 1130 1131void CharsetRecog_IBM420_ar::matchInit(InputText *textIn) { 1132 prev_fInputBytesLength = textIn->fInputLen; 1133 prev_fInputBytes = textIn->fInputBytes; 1134 1135 int32_t length = 0; 1136 uint8_t *bb = unshape(prev_fInputBytes, prev_fInputBytesLength, length); 1137 1138 if (bb != NULL) { 1139 textIn->fInputBytes = bb; 1140 textIn->fInputLen = length; 1141 1142 deleteBuffer = TRUE; 1143 } else { 1144 deleteBuffer = FALSE; 1145 } 1146} 1147 1148uint8_t *CharsetRecog_IBM420_ar::unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length) { 1149 uint8_t *resultArray = unshapeLamAlef(inputBytes, inputBytesLength, length); 1150 1151 if (resultArray != NULL) { 1152 for (int32_t i = 0; i < inputBytesLength; i++) { 1153 resultArray[i] = unshapeMap_IBM420[resultArray[i]]; 1154 } 1155 } 1156 1157 return resultArray; 1158} 1159 1160uint8_t *CharsetRecog_IBM420_ar::unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length) { 1161 int32_t bigBufferLength = inputBytesLength * 2; 1162 uint8_t *bigBuffer = (uint8_t *)uprv_malloc(bigBufferLength); 1163 uint8_t *resultBuffer = NULL; 1164 1165 if (bigBuffer != NULL) { 1166 int32_t bufferIndex; 1167 static const uint8_t unshapedLamAlef[] = { 0xb1, 0x56 }; 1168 1169 for (int32_t i = bufferIndex = 0; i < inputBytesLength; i++) { 1170 if (isLamAlef(inputBytes[i])) { 1171 bigBuffer[bufferIndex++] = unshapedLamAlef[0]; 1172 bigBuffer[bufferIndex++] = unshapedLamAlef[1]; 1173 } else { 1174 bigBuffer[bufferIndex++] = inputBytes[i]; 1175 } 1176 } 1177 1178 length = bufferIndex; 1179 resultBuffer = (uint8_t *)uprv_malloc(length); 1180 if (resultBuffer != NULL) { 1181 uprv_memcpy(resultBuffer, bigBuffer, length); 1182 } 1183 } 1184 1185 if (bigBuffer != NULL) { 1186 uprv_free(bigBuffer); 1187 } 1188 1189 return resultBuffer; 1190} 1191 1192void CharsetRecog_IBM420_ar::matchFinish(InputText *textIn) { 1193 if (deleteBuffer) { 1194 uprv_free(textIn->fInputBytes); 1195 1196 textIn->fInputBytes = prev_fInputBytes; 1197 textIn->fInputLen = prev_fInputBytesLength; 1198 } 1199} 1200 1201UBool CharsetRecog_IBM420_ar::isLamAlef(uint8_t b) { 1202 static const uint8_t shapedLamAlef[] = { 1203 0xb2, 0xb3, 0xb4, 0xb5, 0xb7, 0xb8 1204 }; 1205 1206 for (uint32_t i = 0; i < sizeof(shapedLamAlef); i++) { 1207 if (b == shapedLamAlef[i]) { 1208 return TRUE; 1209 } 1210 } 1211 1212 return FALSE; 1213} 1214 1215CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl() 1216{ 1217 // nothing to do 1218} 1219 1220const char *CharsetRecog_IBM420_ar_rtl::getName() const 1221{ 1222 return "IBM420_rtl"; 1223} 1224 1225UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const 1226{ 1227 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar); 1228 results->set(textIn, this, confidence); 1229 return (confidence > 0); 1230} 1231 1232CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr() 1233{ 1234 // nothing to do 1235} 1236 1237const char *CharsetRecog_IBM420_ar_ltr::getName() const 1238{ 1239 return "IBM420_ltr"; 1240} 1241 1242UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const 1243{ 1244 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar); 1245 results->set(textIn, this, confidence); 1246 return (confidence > 0); 1247} 1248 1249U_NAMESPACE_END 1250#endif 1251 1252