1/* 2 ********************************************************************** 3 * Copyright (C) 2005-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8#ifndef __CSR2022_H 9#define __CSR2022_H 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_CONVERSION 14 15#include "csrecog.h" 16 17U_NAMESPACE_BEGIN 18 19class CharsetMatch; 20 21/** 22 * class CharsetRecog_2022 part of the ICU charset detection imlementation. 23 * This is a superclass for the individual detectors for 24 * each of the detectable members of the ISO 2022 family 25 * of encodings. 26 * 27 * The separate classes are nested within this class. 28 * 29 * @internal 30 */ 31class CharsetRecog_2022 : public CharsetRecognizer 32{ 33 34public: 35 virtual ~CharsetRecog_2022() = 0; 36 37protected: 38 39 /** 40 * Matching function shared among the 2022 detectors JP, CN and KR 41 * Counts up the number of legal an unrecognized escape sequences in 42 * the sample of text, and computes a score based on the total number & 43 * the proportion that fit the encoding. 44 * 45 * 46 * @param text the byte buffer containing text to analyse 47 * @param textLen the size of the text in the byte. 48 * @param escapeSequences the byte escape sequences to test for. 49 * @return match quality, in the range of 0-100. 50 */ 51 int32_t match_2022(const uint8_t *text, 52 int32_t textLen, 53 const uint8_t escapeSequences[][5], 54 int32_t escapeSequences_length) const; 55 56}; 57 58class CharsetRecog_2022JP :public CharsetRecog_2022 59{ 60public: 61 virtual ~CharsetRecog_2022JP(); 62 63 const char *getName() const; 64 65 UBool match(InputText *textIn, CharsetMatch *results) const; 66}; 67 68#if !UCONFIG_ONLY_HTML_CONVERSION 69class CharsetRecog_2022KR :public CharsetRecog_2022 { 70public: 71 virtual ~CharsetRecog_2022KR(); 72 73 const char *getName() const; 74 75 UBool match(InputText *textIn, CharsetMatch *results) const; 76 77}; 78 79class CharsetRecog_2022CN :public CharsetRecog_2022 80{ 81public: 82 virtual ~CharsetRecog_2022CN(); 83 84 const char* getName() const; 85 86 UBool match(InputText *textIn, CharsetMatch *results) const; 87}; 88#endif 89 90U_NAMESPACE_END 91 92#endif 93#endif /* __CSR2022_H */ 94