1/*
2 **********************************************************************
3 *   Copyright (C) 2005-2013, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7
8#ifndef __CSRSBCS_H
9#define __CSRSBCS_H
10
11#include "unicode/uobject.h"
12
13#if !UCONFIG_NO_CONVERSION
14
15#include "csrecog.h"
16
17U_NAMESPACE_BEGIN
18
19class NGramParser : public UMemory
20{
21private:
22    int32_t ngram;
23    const int32_t *ngramList;
24
25    int32_t ngramCount;
26    int32_t hitCount;
27
28protected:
29	int32_t byteIndex;
30    const uint8_t *charMap;
31
32	void addByte(int32_t b);
33
34public:
35    NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
36    virtual ~NGramParser()   // Google patch: declare virtual destructor
37      {
38      }
39
40private:
41    /*
42    * Binary search for value in table, which must have exactly 64 entries.
43    */
44    int32_t search(const int32_t *table, int32_t value);
45
46    void lookup(int32_t thisNgram);
47
48    virtual int32_t nextByte(InputText *det);
49	virtual void parseCharacters(InputText *det);
50
51public:
52    int32_t parse(InputText *det);
53
54};
55
56class NGramParser_IBM420 : public NGramParser
57{
58private:
59	int32_t alef;
60	int32_t isLamAlef(int32_t b);
61	int32_t nextByte(InputText *det);
62	void parseCharacters(InputText *det);
63
64public:
65    NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
66};
67
68
69class CharsetRecog_sbcs : public CharsetRecognizer
70{
71public:
72    CharsetRecog_sbcs();
73    virtual ~CharsetRecog_sbcs();
74    virtual const char *getName() const = 0;
75    virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
76    virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
77};
78
79class CharsetRecog_8859_1 : public CharsetRecog_sbcs
80{
81public:
82    virtual ~CharsetRecog_8859_1();
83    const char *getName() const;
84    virtual UBool match(InputText *det, CharsetMatch *results) const;
85};
86
87class CharsetRecog_8859_2 : public CharsetRecog_sbcs
88{
89public:
90    virtual ~CharsetRecog_8859_2();
91    const char *getName() const;
92    virtual UBool match(InputText *det, CharsetMatch *results) const;
93};
94
95class CharsetRecog_8859_5 : public CharsetRecog_sbcs
96{
97public:
98    virtual ~CharsetRecog_8859_5();
99    const char *getName() const;
100};
101
102class CharsetRecog_8859_6 : public CharsetRecog_sbcs
103{
104public:
105    virtual ~CharsetRecog_8859_6();
106
107    const char *getName() const;
108};
109
110class CharsetRecog_8859_7 : public CharsetRecog_sbcs
111{
112public:
113    virtual ~CharsetRecog_8859_7();
114
115    const char *getName() const;
116};
117
118class CharsetRecog_8859_8 : public CharsetRecog_sbcs
119{
120public:
121    virtual ~CharsetRecog_8859_8();
122
123    virtual const char *getName() const;
124};
125
126class CharsetRecog_8859_9 : public CharsetRecog_sbcs
127{
128public:
129    virtual ~CharsetRecog_8859_9();
130
131    const char *getName() const;
132};
133
134
135
136class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
137{
138public:
139    virtual ~CharsetRecog_8859_5_ru();
140
141    const char *getLanguage() const;
142
143    virtual UBool match(InputText *det, CharsetMatch *results) const;
144};
145
146class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
147{
148public:
149    virtual ~CharsetRecog_8859_6_ar();
150
151    const char *getLanguage() const;
152
153    virtual UBool match(InputText *det, CharsetMatch *results) const;
154};
155
156class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
157{
158public:
159    virtual ~CharsetRecog_8859_7_el();
160
161    const char *getLanguage() const;
162
163    virtual UBool match(InputText *det, CharsetMatch *results) const;
164};
165
166class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
167{
168public:
169    virtual ~CharsetRecog_8859_8_I_he();
170
171    const char *getName() const;
172
173    const char *getLanguage() const;
174
175    virtual UBool match(InputText *det, CharsetMatch *results) const;
176};
177
178class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
179{
180public:
181    virtual ~CharsetRecog_8859_8_he ();
182
183    const char *getLanguage() const;
184
185    virtual UBool match(InputText *det, CharsetMatch *results) const;
186};
187
188class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
189{
190public:
191    virtual ~CharsetRecog_8859_9_tr ();
192
193    const char *getLanguage() const;
194
195    virtual UBool match(InputText *det, CharsetMatch *results) const;
196};
197
198class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
199{
200public:
201    virtual ~CharsetRecog_windows_1256();
202
203    const char *getName() const;
204
205    const char *getLanguage() const;
206
207    virtual UBool match(InputText *det, CharsetMatch *results) const;
208};
209
210class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
211{
212public:
213    virtual ~CharsetRecog_windows_1251();
214
215    const char *getName() const;
216
217    const char *getLanguage() const;
218
219    virtual UBool match(InputText *det, CharsetMatch *results) const;
220};
221
222
223class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
224{
225public:
226    virtual ~CharsetRecog_KOI8_R();
227
228    const char *getName() const;
229
230    const char *getLanguage() const;
231
232    virtual UBool match(InputText *det, CharsetMatch *results) const;
233};
234
235class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
236{
237public:
238    virtual ~CharsetRecog_IBM424_he();
239
240    const char *getLanguage() const;
241};
242
243class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
244public:
245    virtual ~CharsetRecog_IBM424_he_rtl();
246
247    const char *getName() const;
248
249    virtual UBool match(InputText *det, CharsetMatch *results) const;
250};
251
252class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
253    virtual ~CharsetRecog_IBM424_he_ltr();
254
255    const char *getName() const;
256
257    virtual UBool match(InputText *det, CharsetMatch *results) const;
258};
259
260class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
261{
262public:
263    virtual ~CharsetRecog_IBM420_ar();
264
265    const char *getLanguage() const;
266	int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
267
268};
269
270class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
271public:
272    virtual ~CharsetRecog_IBM420_ar_rtl();
273
274    const char *getName() const;
275
276    virtual UBool match(InputText *det, CharsetMatch *results) const;
277};
278
279class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
280    virtual ~CharsetRecog_IBM420_ar_ltr();
281
282    const char *getName() const;
283
284    virtual UBool match(InputText *det, CharsetMatch *results) const;
285};
286
287U_NAMESPACE_END
288
289#endif /* !UCONFIG_NO_CONVERSION */
290#endif /* __CSRSBCS_H */
291