1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 359d709d503bab6e2b61931737e662dd293b40578ccornelius * Copyright (C) 2005-2013, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucsdet.h" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csdetect.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csmatch.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uenumimp.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucln_in.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uarrsort.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "inputext.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrsbcs.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrmbcs.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrutf8.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrucode.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csr2022.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type)) 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DELETE_ARRAY(array) uprv_free((void *) (array)) 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3559d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN 3659d709d503bab6e2b61931737e662dd293b40578ccornelius 3759d709d503bab6e2b61931737e662dd293b40578ccorneliusstruct CSRecognizerInfo : public UMemory { 3859d709d503bab6e2b61931737e662dd293b40578ccornelius CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled) 3959d709d503bab6e2b61931737e662dd293b40578ccornelius : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {}; 4059d709d503bab6e2b61931737e662dd293b40578ccornelius 4159d709d503bab6e2b61931737e662dd293b40578ccornelius ~CSRecognizerInfo() {delete recognizer;}; 4259d709d503bab6e2b61931737e662dd293b40578ccornelius 4359d709d503bab6e2b61931737e662dd293b40578ccornelius CharsetRecognizer *recognizer; 4459d709d503bab6e2b61931737e662dd293b40578ccornelius UBool isDefaultEnabled; 4559d709d503bab6e2b61931737e662dd293b40578ccornelius}; 4659d709d503bab6e2b61931737e662dd293b40578ccornelius 4759d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4959d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::CSRecognizerInfo **fCSRecognizers = NULL; 5059d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::UInitOnce gCSRecognizersInitOnce; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t fCSRecognizers_size = 0; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 5359d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_BEGIN 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV csdet_cleanup(void) 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 5659d709d503bab6e2b61931737e662dd293b40578ccornelius U_NAMESPACE_USE 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fCSRecognizers != NULL) { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(int32_t r = 0; r < fCSRecognizers_size; r += 1) { 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete fCSRecognizers[r]; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCSRecognizers[r] = NULL; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DELETE_ARRAY(fCSRecognizers); 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCSRecognizers = NULL; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCSRecognizers_size = 0; 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 6759d709d503bab6e2b61931737e662dd293b40578ccornelius gCSRecognizersInitOnce.reset(); 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharsetMatchComparator(const void * /*context*/, const void *left, const void *right) 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_NAMESPACE_USE 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharsetMatch **csm_l = (const CharsetMatch **) left; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharsetMatch **csm_r = (const CharsetMatch **) right; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // NOTE: compare is backwards to sort from highest to lowest. 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (*csm_r)->getConfidence() - (*csm_l)->getConfidence(); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8459d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic void U_CALLCONV initRecognizers(UErrorCode &status) { 8559d709d503bab6e2b61931737e662dd293b40578ccornelius U_NAMESPACE_USE 8659d709d503bab6e2b61931737e662dd293b40578ccornelius ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup); 8759d709d503bab6e2b61931737e662dd293b40578ccornelius CSRecognizerInfo *tempArray[] = { 8859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE), 8959d709d503bab6e2b61931737e662dd293b40578ccornelius 9059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE), 9159d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE), 9259d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE), 9359d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE), 9459d709d503bab6e2b61931737e662dd293b40578ccornelius 9559d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE), 9659d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE), 9759d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE), 9859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE), 9959d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE), 10059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE), 10159d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE), 10259d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE), 10359d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE), 10459d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE), 10559d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE), 10659d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE), 10759d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE), 10859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE), 10959d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE), 11059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), 11159d709d503bab6e2b61931737e662dd293b40578ccornelius 11259d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), 11359d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), 11459d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), 11559d709d503bab6e2b61931737e662dd293b40578ccornelius 11659d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE), 11759d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), 11859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), 11959d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) 12059d709d503bab6e2b61931737e662dd293b40578ccornelius }; 12159d709d503bab6e2b61931737e662dd293b40578ccornelius int32_t rCount = ARRAY_SIZE(tempArray); 12259d709d503bab6e2b61931737e662dd293b40578ccornelius 12359d709d503bab6e2b61931737e662dd293b40578ccornelius fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount); 12459d709d503bab6e2b61931737e662dd293b40578ccornelius 12559d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers == NULL) { 12659d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 12759d709d503bab6e2b61931737e662dd293b40578ccornelius } 12859d709d503bab6e2b61931737e662dd293b40578ccornelius else { 12959d709d503bab6e2b61931737e662dd293b40578ccornelius fCSRecognizers_size = rCount; 13059d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t r = 0; r < rCount; r += 1) { 13159d709d503bab6e2b61931737e662dd293b40578ccornelius fCSRecognizers[r] = tempArray[r]; 13259d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers[r] == NULL) { 13359d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 13459d709d503bab6e2b61931737e662dd293b40578ccornelius } 13559d709d503bab6e2b61931737e662dd293b40578ccornelius } 13659d709d503bab6e2b61931737e662dd293b40578ccornelius } 13759d709d503bab6e2b61931737e662dd293b40578ccornelius} 13859d709d503bab6e2b61931737e662dd293b40578ccornelius 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setRecognizers(UErrorCode &status) 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 14559d709d503bab6e2b61931737e662dd293b40578ccornelius umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status); 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::CharsetDetector(UErrorCode &status) 14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : textIn(new InputText(status)), resultArray(NULL), 15059d709d503bab6e2b61931737e662dd293b40578ccornelius resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE), 15159d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers(NULL) 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setRecognizers(status); 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size); 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (resultArray == NULL) { 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(int32_t i = 0; i < fCSRecognizers_size; i += 1) { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultArray[i] = new CharsetMatch(); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (resultArray[i] == NULL) { 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::~CharsetDetector() 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete textIn; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(int32_t i = 0; i < fCSRecognizers_size; i += 1) { 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete resultArray[i]; 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(resultArray); 18959d709d503bab6e2b61931737e662dd293b40578ccornelius 19059d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers) { 19159d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_free(fEnabledRecognizers); 19259d709d503bab6e2b61931737e662dd293b40578ccornelius } 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setText(const char *in, int32_t len) 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textIn->setText(in, len); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFreshTextSet = TRUE; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::setStripTagsFlag(UBool flag) 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool temp = fStripTags; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStripTags = flag; 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFreshTextSet = TRUE; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return temp; 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::getStripTagsFlag() const 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fStripTags; 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textIn->setDeclaredEncoding(encoding,len); 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t CharsetDetector::getDetectableCount() 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setRecognizers(status); 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fCSRecognizers_size; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch *CharsetDetector::detect(UErrorCode &status) 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t maxMatchesFound = 0; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru detectAll(maxMatchesFound, status); 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(maxMatchesFound > 0) { 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return resultArray[0]; 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status) 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!textIn->isSet()) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MISSING_RESOURCE_ERROR;// TODO: Need to set proper status code for input text not set 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 24754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } else if (fFreshTextSet) { 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharsetRecognizer *csr; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textIn->MungeInput(fStripTags); 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Iterate over all possible charsets, remember all that 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // give a match quality > 0. 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultCount = 0; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i = 0; i < fCSRecognizers_size; i += 1) { 25759d709d503bab6e2b61931737e662dd293b40578ccornelius csr = fCSRecognizers[i]->recognizer; 25854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (csr->match(textIn, resultArray[resultCount])) { 25954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius resultCount++; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (resultCount > 1) { 26454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status); 26585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFreshTextSet = FALSE; 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxMatchesFound = resultCount; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return resultArray; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27459d709d503bab6e2b61931737e662dd293b40578ccorneliusvoid CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status) 27559d709d503bab6e2b61931737e662dd293b40578ccornelius{ 27659d709d503bab6e2b61931737e662dd293b40578ccornelius if (U_FAILURE(status)) { 27759d709d503bab6e2b61931737e662dd293b40578ccornelius return; 27859d709d503bab6e2b61931737e662dd293b40578ccornelius } 27959d709d503bab6e2b61931737e662dd293b40578ccornelius 28059d709d503bab6e2b61931737e662dd293b40578ccornelius int32_t modIdx = -1; 28159d709d503bab6e2b61931737e662dd293b40578ccornelius UBool isDefaultVal = FALSE; 28259d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 28359d709d503bab6e2b61931737e662dd293b40578ccornelius CSRecognizerInfo *csrinfo = fCSRecognizers[i]; 28459d709d503bab6e2b61931737e662dd293b40578ccornelius if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) { 28559d709d503bab6e2b61931737e662dd293b40578ccornelius modIdx = i; 28659d709d503bab6e2b61931737e662dd293b40578ccornelius isDefaultVal = (csrinfo->isDefaultEnabled == enabled); 28759d709d503bab6e2b61931737e662dd293b40578ccornelius break; 28859d709d503bab6e2b61931737e662dd293b40578ccornelius } 28959d709d503bab6e2b61931737e662dd293b40578ccornelius } 29059d709d503bab6e2b61931737e662dd293b40578ccornelius if (modIdx < 0) { 29159d709d503bab6e2b61931737e662dd293b40578ccornelius // No matching encoding found 29259d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_ILLEGAL_ARGUMENT_ERROR; 29359d709d503bab6e2b61931737e662dd293b40578ccornelius return; 29459d709d503bab6e2b61931737e662dd293b40578ccornelius } 29559d709d503bab6e2b61931737e662dd293b40578ccornelius 29659d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers == NULL && !isDefaultVal) { 29759d709d503bab6e2b61931737e662dd293b40578ccornelius // Create an array storing the non default setting 29859d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size); 29959d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers == NULL) { 30059d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 30159d709d503bab6e2b61931737e662dd293b40578ccornelius return; 30259d709d503bab6e2b61931737e662dd293b40578ccornelius } 30359d709d503bab6e2b61931737e662dd293b40578ccornelius // Initialize the array with default info 30459d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 30559d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled; 30659d709d503bab6e2b61931737e662dd293b40578ccornelius } 30759d709d503bab6e2b61931737e662dd293b40578ccornelius } 30859d709d503bab6e2b61931737e662dd293b40578ccornelius 30959d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers != NULL) { 31059d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers[modIdx] = enabled; 31159d709d503bab6e2b61931737e662dd293b40578ccornelius } 31259d709d503bab6e2b61931737e662dd293b40578ccornelius} 31359d709d503bab6e2b61931737e662dd293b40578ccornelius 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( index > fCSRecognizers_size-1 || index < 0) { 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fCSRecognizers[index]->getName(); 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct { 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t currIndex; 33059d709d503bab6e2b61931737e662dd293b40578ccornelius UBool all; 33159d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *enabledRecognizers; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} Context; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumClose(UEnumeration *en) { 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(en->context != NULL) { 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DELETE_ARRAY(en->context); 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DELETE_ARRAY(en); 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 34659d709d503bab6e2b61931737e662dd293b40578ccorneliusenumCount(UEnumeration *en, UErrorCode *) { 34759d709d503bab6e2b61931737e662dd293b40578ccornelius if (((Context *)en->context)->all) { 34859d709d503bab6e2b61931737e662dd293b40578ccornelius // ucsdet_getAllDetectableCharsets, all charset detector names 34959d709d503bab6e2b61931737e662dd293b40578ccornelius return fCSRecognizers_size; 35059d709d503bab6e2b61931737e662dd293b40578ccornelius } 35159d709d503bab6e2b61931737e662dd293b40578ccornelius 35259d709d503bab6e2b61931737e662dd293b40578ccornelius // Otherwise, ucsdet_getDetectableCharsets - only enabled ones 35359d709d503bab6e2b61931737e662dd293b40578ccornelius int32_t count = 0; 35459d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; 35559d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray != NULL) { 35659d709d503bab6e2b61931737e662dd293b40578ccornelius // custom set 35759d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 35859d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray[i]) { 35959d709d503bab6e2b61931737e662dd293b40578ccornelius count++; 36059d709d503bab6e2b61931737e662dd293b40578ccornelius } 36159d709d503bab6e2b61931737e662dd293b40578ccornelius } 36259d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 36359d709d503bab6e2b61931737e662dd293b40578ccornelius // default set 36459d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 36559d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers[i]->isDefaultEnabled) { 36659d709d503bab6e2b61931737e662dd293b40578ccornelius count++; 36759d709d503bab6e2b61931737e662dd293b40578ccornelius } 36859d709d503bab6e2b61931737e662dd293b40578ccornelius } 36959d709d503bab6e2b61931737e662dd293b40578ccornelius } 37059d709d503bab6e2b61931737e662dd293b40578ccornelius return count; 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* U_CALLCONV 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) { 37559d709d503bab6e2b61931737e662dd293b40578ccornelius const char *currName = NULL; 37659d709d503bab6e2b61931737e662dd293b40578ccornelius 37759d709d503bab6e2b61931737e662dd293b40578ccornelius if (((Context *)en->context)->currIndex < fCSRecognizers_size) { 37859d709d503bab6e2b61931737e662dd293b40578ccornelius if (((Context *)en->context)->all) { 37959d709d503bab6e2b61931737e662dd293b40578ccornelius // ucsdet_getAllDetectableCharsets, all charset detector names 38059d709d503bab6e2b61931737e662dd293b40578ccornelius currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); 38159d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context *)en->context)->currIndex++; 38259d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 38359d709d503bab6e2b61931737e662dd293b40578ccornelius // ucsdet_getDetectableCharsets 38459d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; 38559d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray != NULL) { 38659d709d503bab6e2b61931737e662dd293b40578ccornelius // custome set 38759d709d503bab6e2b61931737e662dd293b40578ccornelius while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { 38859d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray[((Context *)en->context)->currIndex]) { 38959d709d503bab6e2b61931737e662dd293b40578ccornelius currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); 39059d709d503bab6e2b61931737e662dd293b40578ccornelius } 39159d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context *)en->context)->currIndex++; 39259d709d503bab6e2b61931737e662dd293b40578ccornelius } 39359d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 39459d709d503bab6e2b61931737e662dd293b40578ccornelius // default set 39559d709d503bab6e2b61931737e662dd293b40578ccornelius while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { 39659d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) { 39759d709d503bab6e2b61931737e662dd293b40578ccornelius currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); 39859d709d503bab6e2b61931737e662dd293b40578ccornelius } 39959d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context *)en->context)->currIndex++; 40059d709d503bab6e2b61931737e662dd293b40578ccornelius } 40159d709d503bab6e2b61931737e662dd293b40578ccornelius } 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 40459d709d503bab6e2b61931737e662dd293b40578ccornelius 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(resultLength != NULL) { 40659d709d503bab6e2b61931737e662dd293b40578ccornelius *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName); 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return currName; 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41259d709d503bab6e2b61931737e662dd293b40578ccornelius 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumReset(UEnumeration *en, UErrorCode *) { 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((Context *)en->context)->currIndex = 0; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UEnumeration gCSDetEnumeration = { 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumClose, 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumCount, 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uenum_unextDefault, 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumNext, 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumReset 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42859d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_END 42959d709d503bab6e2b61931737e662dd293b40578ccornelius 43059d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN 43159d709d503bab6e2b61931737e662dd293b40578ccornelius 43259d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status) 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43559d709d503bab6e2b61931737e662dd293b40578ccornelius /* Initialize recognized charsets. */ 43659d709d503bab6e2b61931737e662dd293b40578ccornelius setRecognizers(status); 43759d709d503bab6e2b61931737e662dd293b40578ccornelius 43859d709d503bab6e2b61931737e662dd293b40578ccornelius if(U_FAILURE(status)) { 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44259d709d503bab6e2b61931737e662dd293b40578ccornelius UEnumeration *en = NEW_ARRAY(UEnumeration, 1); 44359d709d503bab6e2b61931737e662dd293b40578ccornelius if (en == NULL) { 44459d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 44559d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 44659d709d503bab6e2b61931737e662dd293b40578ccornelius } 44759d709d503bab6e2b61931737e662dd293b40578ccornelius memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration)); 44859d709d503bab6e2b61931737e662dd293b40578ccornelius en->context = (void*)NEW_ARRAY(Context, 1); 44959d709d503bab6e2b61931737e662dd293b40578ccornelius if (en->context == NULL) { 45059d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 45159d709d503bab6e2b61931737e662dd293b40578ccornelius DELETE_ARRAY(en); 45259d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 45359d709d503bab6e2b61931737e662dd293b40578ccornelius } 45459d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_memset(en->context, 0, sizeof(Context)); 45559d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context*)en->context)->all = TRUE; 45659d709d503bab6e2b61931737e662dd293b40578ccornelius return en; 45759d709d503bab6e2b61931737e662dd293b40578ccornelius} 45859d709d503bab6e2b61931737e662dd293b40578ccornelius 45959d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const 46059d709d503bab6e2b61931737e662dd293b40578ccornelius{ 46159d709d503bab6e2b61931737e662dd293b40578ccornelius if(U_FAILURE(status)) { 46259d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 46359d709d503bab6e2b61931737e662dd293b40578ccornelius } 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UEnumeration *en = NEW_ARRAY(UEnumeration, 1); 46659d709d503bab6e2b61931737e662dd293b40578ccornelius if (en == NULL) { 46759d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 46859d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 46959d709d503bab6e2b61931737e662dd293b40578ccornelius } 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration)); 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru en->context = (void*)NEW_ARRAY(Context, 1); 47259d709d503bab6e2b61931737e662dd293b40578ccornelius if (en->context == NULL) { 47359d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 47459d709d503bab6e2b61931737e662dd293b40578ccornelius DELETE_ARRAY(en); 47559d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 47659d709d503bab6e2b61931737e662dd293b40578ccornelius } 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(en->context, 0, sizeof(Context)); 47859d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context*)en->context)->all = FALSE; 47959d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers; 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return en; 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48359d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48559d709d503bab6e2b61931737e662dd293b40578ccornelius#endif 486