1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (C) 2005-2008, International Business Machines 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Corporation and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef __CSRMBCS_H 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define __CSRMBCS_H 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "csrecog.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// "Character" iterated character class. 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Recognizers for specific mbcs encodings make their "characters" available 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// by providing a nextChar() function that fills in an instance of IteratedChar 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// with the next char from the input. 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The returned characters are not converted to Unicode, but remain as the raw 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// bytes (concatenated into an int) from the codepage data. 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// For Asian charsets, use the raw input rather than the input that has been 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// stripped of markup. Detection only considers multi-byte chars, effectively 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// stripping markup anyway, and double byte chars do occur in markup too. 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class IteratedChar : public UMemory 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t charValue; // 1-4 bytes from the raw input data 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t index; 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextIndex; 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool error; 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool done; 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) IteratedChar(); 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //void reset(); 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextByte(InputText* det); 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_mbcs : public CharsetRecognizer { 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)protected: 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test the match of this charset with the input text data 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which is obtained via the CharsetDetector object. 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param det The CharsetDetector, which contains the input text 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to be checked for being in this charset. 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return Two values packed into one int (Damn java, anyhow) 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <br/> 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * bits 0-7: the match confidence, ranging from 0-100 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <br/> 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * bits 8-15: The match reason, an enum-like value. 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen); 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_mbcs(); 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the IANA name of this charset. 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the charset name. 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const = 0; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const = 0; 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText* det) = 0; 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the next character (however many bytes it is) from the input data 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Subclasses for specific charset encodings must implement this function 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to get characters according to the rules of their encoding scheme. 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This function is not a method of class IteratedChar only because 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that would require a lot of extra derived classes, which is awkward. 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param it The IteratedChar "struct" into which the returned char is placed. 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param det The charset detector, which is needed to get at the input byte data 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * being iterated over. 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return True if a character was returned, false at end of input. 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UBool nextChar(IteratedChar *it, InputText *textIn) = 0; 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Shift-JIS charset recognizer. 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_sjis : public CharsetRecog_mbcs { 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_sjis(); 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool nextChar(IteratedChar *it, InputText *det); 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText *det); 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const; 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * EUC charset recognizers. One abstract class that provides the common function 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for getting the next character according to the EUC encoding scheme, 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and nested derived classes for EUC_KR, EUC_JP, EUC_CN. 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_euc : public CharsetRecog_mbcs 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_euc(); 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const = 0; 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const = 0; 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText* det) = 0; 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (non-Javadoc) 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the next character value for EUC based encodings. 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Character "value" is simply the raw bytes that make up the character 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * packed into an int. 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool nextChar(IteratedChar *it, InputText *det); 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The charset recognize for EUC-JP. A singleton instance of this class 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is created and kept by the public CharsetDetector class 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_euc_jp : public CharsetRecog_euc 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_euc_jp(); 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const; 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const; 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText *det); 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The charset recognize for EUC-KR. A singleton instance of this class 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is created and kept by the public CharsetDetector class 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_euc_kr : public CharsetRecog_euc 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_euc_kr(); 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const; 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText *det); 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Big5 charset recognizer. 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_big5 : public CharsetRecog_mbcs 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_big5(); 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool nextChar(IteratedChar* it, InputText* det); 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const; 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const; 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText *det); 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * GB-18030 recognizer. Uses simplified Chinese statistics. 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class CharsetRecog_gb_18030 : public CharsetRecog_mbcs 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~CharsetRecog_gb_18030(); 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool nextChar(IteratedChar* it, InputText* det); 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getName() const; 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *getLanguage() const; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match(InputText *det); 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* __CSRMBCS_H */ 206