1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 359d709d503bab6e2b61931737e662dd293b40578ccornelius * Copyright (C) 2005-2013, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __CSDETECT_H 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __CSDETECT_H 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass InputText; 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass CharsetRecognizer; 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass CharsetMatch; 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass CharsetDetector : public UMemory 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru InputText *textIn; 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharsetMatch **resultArray; 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t resultCount; 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fStripTags; // If true, setText() will strip tags from input text. 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fFreshTextSet; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static void setRecognizers(UErrorCode &status); 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3159d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *fEnabledRecognizers; // If not null, active set of charset recognizers had 3259d709d503bab6e2b61931737e662dd293b40578ccornelius // been changed from the default. The array index is 3359d709d503bab6e2b61931737e662dd293b40578ccornelius // corresponding to fCSRecognizers. See setDetectableCharset(). 3459d709d503bab6e2b61931737e662dd293b40578ccornelius 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharsetDetector(UErrorCode &status); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~CharsetDetector(); 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setText(const char *in, int32_t len); 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharsetMatch * const *detectAll(int32_t &maxMatchesFound, UErrorCode &status); 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharsetMatch *detect(UErrorCode& status); 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setDeclaredEncoding(const char *encoding, int32_t len) const; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool setStripTagsFlag(UBool flag); 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool getStripTagsFlag() const; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// const char *getCharsetName(int32_t index, UErrorCode& status) const; 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 5459d709d503bab6e2b61931737e662dd293b40578ccornelius static int32_t getDetectableCount(); 5559d709d503bab6e2b61931737e662dd293b40578ccornelius 5659d709d503bab6e2b61931737e662dd293b40578ccornelius 5759d709d503bab6e2b61931737e662dd293b40578ccornelius static UEnumeration * getAllDetectableCharsets(UErrorCode &status); 5859d709d503bab6e2b61931737e662dd293b40578ccornelius UEnumeration * getDetectableCharsets(UErrorCode &status) const; 5959d709d503bab6e2b61931737e662dd293b40578ccornelius void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status); 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* __CSDETECT_H */ 66