10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 58de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert * Copyright (C) 2005-2016, International Business Machines 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucsdet.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csdetect.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csmatch.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uenumimp.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucln_in.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uarrsort.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "inputext.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrsbcs.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrmbcs.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrutf8.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrucode.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csr2022.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type)) 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DELETE_ARRAY(array) uprv_free((void *) (array)) 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3559d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN 3659d709d503bab6e2b61931737e662dd293b40578ccornelius 3759d709d503bab6e2b61931737e662dd293b40578ccorneliusstruct CSRecognizerInfo : public UMemory { 3859d709d503bab6e2b61931737e662dd293b40578ccornelius CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled) 3959d709d503bab6e2b61931737e662dd293b40578ccornelius : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {}; 4059d709d503bab6e2b61931737e662dd293b40578ccornelius 4159d709d503bab6e2b61931737e662dd293b40578ccornelius ~CSRecognizerInfo() {delete recognizer;}; 4259d709d503bab6e2b61931737e662dd293b40578ccornelius 4359d709d503bab6e2b61931737e662dd293b40578ccornelius CharsetRecognizer *recognizer; 4459d709d503bab6e2b61931737e662dd293b40578ccornelius UBool isDefaultEnabled; 4559d709d503bab6e2b61931737e662dd293b40578ccornelius}; 4659d709d503bab6e2b61931737e662dd293b40578ccornelius 4759d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4959d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::CSRecognizerInfo **fCSRecognizers = NULL; 5059d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::UInitOnce gCSRecognizersInitOnce; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t fCSRecognizers_size = 0; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 5359d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_BEGIN 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV csdet_cleanup(void) 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 5659d709d503bab6e2b61931737e662dd293b40578ccornelius U_NAMESPACE_USE 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fCSRecognizers != NULL) { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(int32_t r = 0; r < fCSRecognizers_size; r += 1) { 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete fCSRecognizers[r]; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCSRecognizers[r] = NULL; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DELETE_ARRAY(fCSRecognizers); 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCSRecognizers = NULL; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCSRecognizers_size = 0; 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 6759d709d503bab6e2b61931737e662dd293b40578ccornelius gCSRecognizersInitOnce.reset(); 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharsetMatchComparator(const void * /*context*/, const void *left, const void *right) 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_NAMESPACE_USE 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharsetMatch **csm_l = (const CharsetMatch **) left; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharsetMatch **csm_r = (const CharsetMatch **) right; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // NOTE: compare is backwards to sort from highest to lowest. 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (*csm_r)->getConfidence() - (*csm_l)->getConfidence(); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8459d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic void U_CALLCONV initRecognizers(UErrorCode &status) { 8559d709d503bab6e2b61931737e662dd293b40578ccornelius U_NAMESPACE_USE 8659d709d503bab6e2b61931737e662dd293b40578ccornelius ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup); 8759d709d503bab6e2b61931737e662dd293b40578ccornelius CSRecognizerInfo *tempArray[] = { 8859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE), 8959d709d503bab6e2b61931737e662dd293b40578ccornelius 9059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE), 9159d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE), 9259d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE), 9359d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE), 9459d709d503bab6e2b61931737e662dd293b40578ccornelius 9559d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE), 9659d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE), 9759d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE), 9859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE), 9959d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE), 10059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE), 10159d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE), 10259d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE), 10359d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE), 10459d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE), 10559d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE), 10659d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE), 10759d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE), 10859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE), 10959d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE), 11059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), 11159d709d503bab6e2b61931737e662dd293b40578ccornelius 11259d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), 1131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#if !UCONFIG_ONLY_HTML_CONVERSION 11459d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), 11559d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), 11659d709d503bab6e2b61931737e662dd293b40578ccornelius 11759d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE), 11859d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), 11959d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), 12059d709d503bab6e2b61931737e662dd293b40578ccornelius new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) 1211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#endif 12259d709d503bab6e2b61931737e662dd293b40578ccornelius }; 1238de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert int32_t rCount = UPRV_LENGTHOF(tempArray); 12459d709d503bab6e2b61931737e662dd293b40578ccornelius 12559d709d503bab6e2b61931737e662dd293b40578ccornelius fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount); 12659d709d503bab6e2b61931737e662dd293b40578ccornelius 12759d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers == NULL) { 12859d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 12959d709d503bab6e2b61931737e662dd293b40578ccornelius } 13059d709d503bab6e2b61931737e662dd293b40578ccornelius else { 13159d709d503bab6e2b61931737e662dd293b40578ccornelius fCSRecognizers_size = rCount; 13259d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t r = 0; r < rCount; r += 1) { 13359d709d503bab6e2b61931737e662dd293b40578ccornelius fCSRecognizers[r] = tempArray[r]; 13459d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers[r] == NULL) { 13559d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 13659d709d503bab6e2b61931737e662dd293b40578ccornelius } 13759d709d503bab6e2b61931737e662dd293b40578ccornelius } 13859d709d503bab6e2b61931737e662dd293b40578ccornelius } 13959d709d503bab6e2b61931737e662dd293b40578ccornelius} 14059d709d503bab6e2b61931737e662dd293b40578ccornelius 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setRecognizers(UErrorCode &status) 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 14759d709d503bab6e2b61931737e662dd293b40578ccornelius umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status); 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::CharsetDetector(UErrorCode &status) 15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : textIn(new InputText(status)), resultArray(NULL), 15259d709d503bab6e2b61931737e662dd293b40578ccornelius resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE), 15359d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers(NULL) 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setRecognizers(status); 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size); 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (resultArray == NULL) { 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(int32_t i = 0; i < fCSRecognizers_size; i += 1) { 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultArray[i] = new CharsetMatch(); 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (resultArray[i] == NULL) { 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::~CharsetDetector() 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete textIn; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(int32_t i = 0; i < fCSRecognizers_size; i += 1) { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete resultArray[i]; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(resultArray); 19159d709d503bab6e2b61931737e662dd293b40578ccornelius 19259d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers) { 19359d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_free(fEnabledRecognizers); 19459d709d503bab6e2b61931737e662dd293b40578ccornelius } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setText(const char *in, int32_t len) 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textIn->setText(in, len); 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFreshTextSet = TRUE; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::setStripTagsFlag(UBool flag) 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool temp = fStripTags; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStripTags = flag; 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFreshTextSet = TRUE; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return temp; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::getStripTagsFlag() const 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fStripTags; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textIn->setDeclaredEncoding(encoding,len); 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t CharsetDetector::getDetectableCount() 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setRecognizers(status); 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fCSRecognizers_size; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch *CharsetDetector::detect(UErrorCode &status) 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t maxMatchesFound = 0; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru detectAll(maxMatchesFound, status); 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(maxMatchesFound > 0) { 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return resultArray[0]; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status) 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!textIn->isSet()) { 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MISSING_RESOURCE_ERROR;// TODO: Need to set proper status code for input text not set 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 24954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } else if (fFreshTextSet) { 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharsetRecognizer *csr; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textIn->MungeInput(fStripTags); 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Iterate over all possible charsets, remember all that 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // give a match quality > 0. 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultCount = 0; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i = 0; i < fCSRecognizers_size; i += 1) { 25959d709d503bab6e2b61931737e662dd293b40578ccornelius csr = fCSRecognizers[i]->recognizer; 26054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (csr->match(textIn, resultArray[resultCount])) { 26154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius resultCount++; 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (resultCount > 1) { 26654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status); 26785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFreshTextSet = FALSE; 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxMatchesFound = resultCount; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return resultArray; 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27659d709d503bab6e2b61931737e662dd293b40578ccorneliusvoid CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status) 27759d709d503bab6e2b61931737e662dd293b40578ccornelius{ 27859d709d503bab6e2b61931737e662dd293b40578ccornelius if (U_FAILURE(status)) { 27959d709d503bab6e2b61931737e662dd293b40578ccornelius return; 28059d709d503bab6e2b61931737e662dd293b40578ccornelius } 28159d709d503bab6e2b61931737e662dd293b40578ccornelius 28259d709d503bab6e2b61931737e662dd293b40578ccornelius int32_t modIdx = -1; 28359d709d503bab6e2b61931737e662dd293b40578ccornelius UBool isDefaultVal = FALSE; 28459d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 28559d709d503bab6e2b61931737e662dd293b40578ccornelius CSRecognizerInfo *csrinfo = fCSRecognizers[i]; 28659d709d503bab6e2b61931737e662dd293b40578ccornelius if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) { 28759d709d503bab6e2b61931737e662dd293b40578ccornelius modIdx = i; 28859d709d503bab6e2b61931737e662dd293b40578ccornelius isDefaultVal = (csrinfo->isDefaultEnabled == enabled); 28959d709d503bab6e2b61931737e662dd293b40578ccornelius break; 29059d709d503bab6e2b61931737e662dd293b40578ccornelius } 29159d709d503bab6e2b61931737e662dd293b40578ccornelius } 29259d709d503bab6e2b61931737e662dd293b40578ccornelius if (modIdx < 0) { 29359d709d503bab6e2b61931737e662dd293b40578ccornelius // No matching encoding found 29459d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_ILLEGAL_ARGUMENT_ERROR; 29559d709d503bab6e2b61931737e662dd293b40578ccornelius return; 29659d709d503bab6e2b61931737e662dd293b40578ccornelius } 29759d709d503bab6e2b61931737e662dd293b40578ccornelius 29859d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers == NULL && !isDefaultVal) { 29959d709d503bab6e2b61931737e662dd293b40578ccornelius // Create an array storing the non default setting 30059d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size); 30159d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers == NULL) { 30259d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 30359d709d503bab6e2b61931737e662dd293b40578ccornelius return; 30459d709d503bab6e2b61931737e662dd293b40578ccornelius } 30559d709d503bab6e2b61931737e662dd293b40578ccornelius // Initialize the array with default info 30659d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 30759d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled; 30859d709d503bab6e2b61931737e662dd293b40578ccornelius } 30959d709d503bab6e2b61931737e662dd293b40578ccornelius } 31059d709d503bab6e2b61931737e662dd293b40578ccornelius 31159d709d503bab6e2b61931737e662dd293b40578ccornelius if (fEnabledRecognizers != NULL) { 31259d709d503bab6e2b61931737e662dd293b40578ccornelius fEnabledRecognizers[modIdx] = enabled; 31359d709d503bab6e2b61931737e662dd293b40578ccornelius } 31459d709d503bab6e2b61931737e662dd293b40578ccornelius} 31559d709d503bab6e2b61931737e662dd293b40578ccornelius 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( index > fCSRecognizers_size-1 || index < 0) { 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fCSRecognizers[index]->getName(); 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct { 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t currIndex; 33259d709d503bab6e2b61931737e662dd293b40578ccornelius UBool all; 33359d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *enabledRecognizers; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} Context; 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumClose(UEnumeration *en) { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(en->context != NULL) { 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DELETE_ARRAY(en->context); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DELETE_ARRAY(en); 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 34859d709d503bab6e2b61931737e662dd293b40578ccorneliusenumCount(UEnumeration *en, UErrorCode *) { 34959d709d503bab6e2b61931737e662dd293b40578ccornelius if (((Context *)en->context)->all) { 35059d709d503bab6e2b61931737e662dd293b40578ccornelius // ucsdet_getAllDetectableCharsets, all charset detector names 35159d709d503bab6e2b61931737e662dd293b40578ccornelius return fCSRecognizers_size; 35259d709d503bab6e2b61931737e662dd293b40578ccornelius } 35359d709d503bab6e2b61931737e662dd293b40578ccornelius 35459d709d503bab6e2b61931737e662dd293b40578ccornelius // Otherwise, ucsdet_getDetectableCharsets - only enabled ones 35559d709d503bab6e2b61931737e662dd293b40578ccornelius int32_t count = 0; 35659d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; 35759d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray != NULL) { 35859d709d503bab6e2b61931737e662dd293b40578ccornelius // custom set 35959d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 36059d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray[i]) { 36159d709d503bab6e2b61931737e662dd293b40578ccornelius count++; 36259d709d503bab6e2b61931737e662dd293b40578ccornelius } 36359d709d503bab6e2b61931737e662dd293b40578ccornelius } 36459d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 36559d709d503bab6e2b61931737e662dd293b40578ccornelius // default set 36659d709d503bab6e2b61931737e662dd293b40578ccornelius for (int32_t i = 0; i < fCSRecognizers_size; i++) { 36759d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers[i]->isDefaultEnabled) { 36859d709d503bab6e2b61931737e662dd293b40578ccornelius count++; 36959d709d503bab6e2b61931737e662dd293b40578ccornelius } 37059d709d503bab6e2b61931737e662dd293b40578ccornelius } 37159d709d503bab6e2b61931737e662dd293b40578ccornelius } 37259d709d503bab6e2b61931737e662dd293b40578ccornelius return count; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* U_CALLCONV 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) { 37759d709d503bab6e2b61931737e662dd293b40578ccornelius const char *currName = NULL; 37859d709d503bab6e2b61931737e662dd293b40578ccornelius 37959d709d503bab6e2b61931737e662dd293b40578ccornelius if (((Context *)en->context)->currIndex < fCSRecognizers_size) { 38059d709d503bab6e2b61931737e662dd293b40578ccornelius if (((Context *)en->context)->all) { 38159d709d503bab6e2b61931737e662dd293b40578ccornelius // ucsdet_getAllDetectableCharsets, all charset detector names 38259d709d503bab6e2b61931737e662dd293b40578ccornelius currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); 38359d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context *)en->context)->currIndex++; 38459d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 38559d709d503bab6e2b61931737e662dd293b40578ccornelius // ucsdet_getDetectableCharsets 38659d709d503bab6e2b61931737e662dd293b40578ccornelius UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; 38759d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray != NULL) { 38859d709d503bab6e2b61931737e662dd293b40578ccornelius // custome set 38959d709d503bab6e2b61931737e662dd293b40578ccornelius while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { 39059d709d503bab6e2b61931737e662dd293b40578ccornelius if (enabledArray[((Context *)en->context)->currIndex]) { 39159d709d503bab6e2b61931737e662dd293b40578ccornelius currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); 39259d709d503bab6e2b61931737e662dd293b40578ccornelius } 39359d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context *)en->context)->currIndex++; 39459d709d503bab6e2b61931737e662dd293b40578ccornelius } 39559d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 39659d709d503bab6e2b61931737e662dd293b40578ccornelius // default set 39759d709d503bab6e2b61931737e662dd293b40578ccornelius while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { 39859d709d503bab6e2b61931737e662dd293b40578ccornelius if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) { 39959d709d503bab6e2b61931737e662dd293b40578ccornelius currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); 40059d709d503bab6e2b61931737e662dd293b40578ccornelius } 40159d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context *)en->context)->currIndex++; 40259d709d503bab6e2b61931737e662dd293b40578ccornelius } 40359d709d503bab6e2b61931737e662dd293b40578ccornelius } 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 40659d709d503bab6e2b61931737e662dd293b40578ccornelius 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(resultLength != NULL) { 40859d709d503bab6e2b61931737e662dd293b40578ccornelius *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName); 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return currName; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41459d709d503bab6e2b61931737e662dd293b40578ccornelius 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumReset(UEnumeration *en, UErrorCode *) { 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((Context *)en->context)->currIndex = 0; 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UEnumeration gCSDetEnumeration = { 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumClose, 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumCount, 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uenum_unextDefault, 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumNext, 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enumReset 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43059d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_END 43159d709d503bab6e2b61931737e662dd293b40578ccornelius 43259d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN 43359d709d503bab6e2b61931737e662dd293b40578ccornelius 43459d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status) 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43759d709d503bab6e2b61931737e662dd293b40578ccornelius /* Initialize recognized charsets. */ 43859d709d503bab6e2b61931737e662dd293b40578ccornelius setRecognizers(status); 43959d709d503bab6e2b61931737e662dd293b40578ccornelius 44059d709d503bab6e2b61931737e662dd293b40578ccornelius if(U_FAILURE(status)) { 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44459d709d503bab6e2b61931737e662dd293b40578ccornelius UEnumeration *en = NEW_ARRAY(UEnumeration, 1); 44559d709d503bab6e2b61931737e662dd293b40578ccornelius if (en == NULL) { 44659d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 44759d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 44859d709d503bab6e2b61931737e662dd293b40578ccornelius } 44959d709d503bab6e2b61931737e662dd293b40578ccornelius memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration)); 45059d709d503bab6e2b61931737e662dd293b40578ccornelius en->context = (void*)NEW_ARRAY(Context, 1); 45159d709d503bab6e2b61931737e662dd293b40578ccornelius if (en->context == NULL) { 45259d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 45359d709d503bab6e2b61931737e662dd293b40578ccornelius DELETE_ARRAY(en); 45459d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 45559d709d503bab6e2b61931737e662dd293b40578ccornelius } 45659d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_memset(en->context, 0, sizeof(Context)); 45759d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context*)en->context)->all = TRUE; 45859d709d503bab6e2b61931737e662dd293b40578ccornelius return en; 45959d709d503bab6e2b61931737e662dd293b40578ccornelius} 46059d709d503bab6e2b61931737e662dd293b40578ccornelius 46159d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const 46259d709d503bab6e2b61931737e662dd293b40578ccornelius{ 46359d709d503bab6e2b61931737e662dd293b40578ccornelius if(U_FAILURE(status)) { 46459d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 46559d709d503bab6e2b61931737e662dd293b40578ccornelius } 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UEnumeration *en = NEW_ARRAY(UEnumeration, 1); 46859d709d503bab6e2b61931737e662dd293b40578ccornelius if (en == NULL) { 46959d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 47059d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 47159d709d503bab6e2b61931737e662dd293b40578ccornelius } 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration)); 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru en->context = (void*)NEW_ARRAY(Context, 1); 47459d709d503bab6e2b61931737e662dd293b40578ccornelius if (en->context == NULL) { 47559d709d503bab6e2b61931737e662dd293b40578ccornelius status = U_MEMORY_ALLOCATION_ERROR; 47659d709d503bab6e2b61931737e662dd293b40578ccornelius DELETE_ARRAY(en); 47759d709d503bab6e2b61931737e662dd293b40578ccornelius return 0; 47859d709d503bab6e2b61931737e662dd293b40578ccornelius } 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(en->context, 0, sizeof(Context)); 48059d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context*)en->context)->all = FALSE; 48159d709d503bab6e2b61931737e662dd293b40578ccornelius ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return en; 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48559d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48759d709d503bab6e2b61931737e662dd293b40578ccornelius#endif 488