1/* 2 ********************************************************************** 3 * Copyright (C) 2005-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8#ifndef __CSRSBCS_H 9#define __CSRSBCS_H 10 11#include "unicode/uobject.h" 12 13#if !UCONFIG_NO_CONVERSION 14 15#include "csrecog.h" 16 17U_NAMESPACE_BEGIN 18 19class NGramParser : public UMemory 20{ 21private: 22 int32_t ngram; 23 const int32_t *ngramList; 24 25 int32_t ngramCount; 26 int32_t hitCount; 27 28protected: 29 int32_t byteIndex; 30 const uint8_t *charMap; 31 32 void addByte(int32_t b); 33 34public: 35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap); 36 virtual ~NGramParser() // Google patch: declare virtual destructor 37 { 38 } 39 40private: 41 /* 42 * Binary search for value in table, which must have exactly 64 entries. 43 */ 44 int32_t search(const int32_t *table, int32_t value); 45 46 void lookup(int32_t thisNgram); 47 48 virtual int32_t nextByte(InputText *det); 49 virtual void parseCharacters(InputText *det); 50 51public: 52 int32_t parse(InputText *det); 53 54}; 55 56class NGramParser_IBM420 : public NGramParser 57{ 58private: 59 int32_t alef; 60 int32_t isLamAlef(int32_t b); 61 int32_t nextByte(InputText *det); 62 void parseCharacters(InputText *det); 63 64public: 65 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); 66}; 67 68 69class CharsetRecog_sbcs : public CharsetRecognizer 70{ 71public: 72 CharsetRecog_sbcs(); 73 virtual ~CharsetRecog_sbcs(); 74 virtual const char *getName() const = 0; 75 virtual UBool match(InputText *det, CharsetMatch *results) const = 0; 76 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; 77}; 78 79class CharsetRecog_8859_1 : public CharsetRecog_sbcs 80{ 81public: 82 virtual ~CharsetRecog_8859_1(); 83 const char *getName() const; 84 virtual UBool match(InputText *det, CharsetMatch *results) const; 85}; 86 87class CharsetRecog_8859_2 : public CharsetRecog_sbcs 88{ 89public: 90 virtual ~CharsetRecog_8859_2(); 91 const char *getName() const; 92 virtual UBool match(InputText *det, CharsetMatch *results) const; 93}; 94 95class CharsetRecog_8859_5 : public CharsetRecog_sbcs 96{ 97public: 98 virtual ~CharsetRecog_8859_5(); 99 const char *getName() const; 100}; 101 102class CharsetRecog_8859_6 : public CharsetRecog_sbcs 103{ 104public: 105 virtual ~CharsetRecog_8859_6(); 106 107 const char *getName() const; 108}; 109 110class CharsetRecog_8859_7 : public CharsetRecog_sbcs 111{ 112public: 113 virtual ~CharsetRecog_8859_7(); 114 115 const char *getName() const; 116}; 117 118class CharsetRecog_8859_8 : public CharsetRecog_sbcs 119{ 120public: 121 virtual ~CharsetRecog_8859_8(); 122 123 virtual const char *getName() const; 124}; 125 126class CharsetRecog_8859_9 : public CharsetRecog_sbcs 127{ 128public: 129 virtual ~CharsetRecog_8859_9(); 130 131 const char *getName() const; 132}; 133 134 135 136class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 137{ 138public: 139 virtual ~CharsetRecog_8859_5_ru(); 140 141 const char *getLanguage() const; 142 143 virtual UBool match(InputText *det, CharsetMatch *results) const; 144}; 145 146class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 147{ 148public: 149 virtual ~CharsetRecog_8859_6_ar(); 150 151 const char *getLanguage() const; 152 153 virtual UBool match(InputText *det, CharsetMatch *results) const; 154}; 155 156class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 157{ 158public: 159 virtual ~CharsetRecog_8859_7_el(); 160 161 const char *getLanguage() const; 162 163 virtual UBool match(InputText *det, CharsetMatch *results) const; 164}; 165 166class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 167{ 168public: 169 virtual ~CharsetRecog_8859_8_I_he(); 170 171 const char *getName() const; 172 173 const char *getLanguage() const; 174 175 virtual UBool match(InputText *det, CharsetMatch *results) const; 176}; 177 178class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 179{ 180public: 181 virtual ~CharsetRecog_8859_8_he (); 182 183 const char *getLanguage() const; 184 185 virtual UBool match(InputText *det, CharsetMatch *results) const; 186}; 187 188class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 189{ 190public: 191 virtual ~CharsetRecog_8859_9_tr (); 192 193 const char *getLanguage() const; 194 195 virtual UBool match(InputText *det, CharsetMatch *results) const; 196}; 197 198class CharsetRecog_windows_1256 : public CharsetRecog_sbcs 199{ 200public: 201 virtual ~CharsetRecog_windows_1256(); 202 203 const char *getName() const; 204 205 const char *getLanguage() const; 206 207 virtual UBool match(InputText *det, CharsetMatch *results) const; 208}; 209 210class CharsetRecog_windows_1251 : public CharsetRecog_sbcs 211{ 212public: 213 virtual ~CharsetRecog_windows_1251(); 214 215 const char *getName() const; 216 217 const char *getLanguage() const; 218 219 virtual UBool match(InputText *det, CharsetMatch *results) const; 220}; 221 222 223class CharsetRecog_KOI8_R : public CharsetRecog_sbcs 224{ 225public: 226 virtual ~CharsetRecog_KOI8_R(); 227 228 const char *getName() const; 229 230 const char *getLanguage() const; 231 232 virtual UBool match(InputText *det, CharsetMatch *results) const; 233}; 234 235class CharsetRecog_IBM424_he : public CharsetRecog_sbcs 236{ 237public: 238 virtual ~CharsetRecog_IBM424_he(); 239 240 const char *getLanguage() const; 241}; 242 243class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he { 244public: 245 virtual ~CharsetRecog_IBM424_he_rtl(); 246 247 const char *getName() const; 248 249 virtual UBool match(InputText *det, CharsetMatch *results) const; 250}; 251 252class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he { 253 virtual ~CharsetRecog_IBM424_he_ltr(); 254 255 const char *getName() const; 256 257 virtual UBool match(InputText *det, CharsetMatch *results) const; 258}; 259 260class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs 261{ 262public: 263 virtual ~CharsetRecog_IBM420_ar(); 264 265 const char *getLanguage() const; 266 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; 267 268}; 269 270class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar { 271public: 272 virtual ~CharsetRecog_IBM420_ar_rtl(); 273 274 const char *getName() const; 275 276 virtual UBool match(InputText *det, CharsetMatch *results) const; 277}; 278 279class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { 280 virtual ~CharsetRecog_IBM420_ar_ltr(); 281 282 const char *getName() const; 283 284 virtual UBool match(InputText *det, CharsetMatch *results) const; 285}; 286 287U_NAMESPACE_END 288 289#endif /* !UCONFIG_NO_CONVERSION */ 290#endif /* __CSRSBCS_H */ 291