1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
359d709d503bab6e2b61931737e662dd293b40578ccornelius *   Copyright (C) 2005-2013, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucsdet.h"
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csdetect.h"
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csmatch.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uenumimp.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucln_in.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uarrsort.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "inputext.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrsbcs.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrmbcs.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrutf8.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrucode.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csr2022.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DELETE_ARRAY(array) uprv_free((void *) (array))
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
3559d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN
3659d709d503bab6e2b61931737e662dd293b40578ccornelius
3759d709d503bab6e2b61931737e662dd293b40578ccorneliusstruct CSRecognizerInfo : public UMemory {
3859d709d503bab6e2b61931737e662dd293b40578ccornelius    CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
3959d709d503bab6e2b61931737e662dd293b40578ccornelius        : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
4059d709d503bab6e2b61931737e662dd293b40578ccornelius
4159d709d503bab6e2b61931737e662dd293b40578ccornelius    ~CSRecognizerInfo() {delete recognizer;};
4259d709d503bab6e2b61931737e662dd293b40578ccornelius
4359d709d503bab6e2b61931737e662dd293b40578ccornelius    CharsetRecognizer *recognizer;
4459d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool isDefaultEnabled;
4559d709d503bab6e2b61931737e662dd293b40578ccornelius};
4659d709d503bab6e2b61931737e662dd293b40578ccornelius
4759d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
4959d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::CSRecognizerInfo **fCSRecognizers = NULL;
5059d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::UInitOnce gCSRecognizersInitOnce;
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t fCSRecognizers_size = 0;
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
5359d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_BEGIN
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV csdet_cleanup(void)
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
5659d709d503bab6e2b61931737e662dd293b40578ccornelius    U_NAMESPACE_USE
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fCSRecognizers != NULL) {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete fCSRecognizers[r];
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fCSRecognizers[r] = NULL;
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        DELETE_ARRAY(fCSRecognizers);
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fCSRecognizers = NULL;
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fCSRecognizers_size = 0;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
6759d709d503bab6e2b61931737e662dd293b40578ccornelius    gCSRecognizersInitOnce.reset();
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharsetMatchComparator(const void * /*context*/, const void *left, const void *right)
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_NAMESPACE_USE
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const CharsetMatch **csm_l = (const CharsetMatch **) left;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const CharsetMatch **csm_r = (const CharsetMatch **) right;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // NOTE: compare is backwards to sort from highest to lowest.
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (*csm_r)->getConfidence() - (*csm_l)->getConfidence();
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8459d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic void U_CALLCONV initRecognizers(UErrorCode &status) {
8559d709d503bab6e2b61931737e662dd293b40578ccornelius    U_NAMESPACE_USE
8659d709d503bab6e2b61931737e662dd293b40578ccornelius    ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
8759d709d503bab6e2b61931737e662dd293b40578ccornelius    CSRecognizerInfo *tempArray[] = {
8859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
8959d709d503bab6e2b61931737e662dd293b40578ccornelius
9059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
9159d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
9259d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
9359d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
9459d709d503bab6e2b61931737e662dd293b40578ccornelius
9559d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
9659d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
9759d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
9859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
9959d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
10059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
10159d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
10259d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
10359d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
10459d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
10559d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
10659d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
10759d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
10859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
10959d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
11059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
11159d709d503bab6e2b61931737e662dd293b40578ccornelius
11259d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
11359d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
11459d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
11559d709d503bab6e2b61931737e662dd293b40578ccornelius
11659d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
11759d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
11859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
11959d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
12059d709d503bab6e2b61931737e662dd293b40578ccornelius    };
12159d709d503bab6e2b61931737e662dd293b40578ccornelius    int32_t rCount = ARRAY_SIZE(tempArray);
12259d709d503bab6e2b61931737e662dd293b40578ccornelius
12359d709d503bab6e2b61931737e662dd293b40578ccornelius    fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
12459d709d503bab6e2b61931737e662dd293b40578ccornelius
12559d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fCSRecognizers == NULL) {
12659d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
12759d709d503bab6e2b61931737e662dd293b40578ccornelius    }
12859d709d503bab6e2b61931737e662dd293b40578ccornelius    else {
12959d709d503bab6e2b61931737e662dd293b40578ccornelius        fCSRecognizers_size = rCount;
13059d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t r = 0; r < rCount; r += 1) {
13159d709d503bab6e2b61931737e662dd293b40578ccornelius            fCSRecognizers[r] = tempArray[r];
13259d709d503bab6e2b61931737e662dd293b40578ccornelius            if (fCSRecognizers[r] == NULL) {
13359d709d503bab6e2b61931737e662dd293b40578ccornelius                status = U_MEMORY_ALLOCATION_ERROR;
13459d709d503bab6e2b61931737e662dd293b40578ccornelius            }
13559d709d503bab6e2b61931737e662dd293b40578ccornelius        }
13659d709d503bab6e2b61931737e662dd293b40578ccornelius    }
13759d709d503bab6e2b61931737e662dd293b40578ccornelius}
13859d709d503bab6e2b61931737e662dd293b40578ccornelius
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setRecognizers(UErrorCode &status)
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
14559d709d503bab6e2b61931737e662dd293b40578ccornelius    umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status);
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::CharsetDetector(UErrorCode &status)
14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho  : textIn(new InputText(status)), resultArray(NULL),
15059d709d503bab6e2b61931737e662dd293b40578ccornelius    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
15159d709d503bab6e2b61931737e662dd293b40578ccornelius    fEnabledRecognizers(NULL)
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setRecognizers(status);
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size);
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (resultArray == NULL) {
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultArray[i] = new CharsetMatch();
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (resultArray[i] == NULL) {
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::~CharsetDetector()
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete textIn;
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete resultArray[i];
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(resultArray);
18959d709d503bab6e2b61931737e662dd293b40578ccornelius
19059d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fEnabledRecognizers) {
19159d709d503bab6e2b61931737e662dd293b40578ccornelius        uprv_free(fEnabledRecognizers);
19259d709d503bab6e2b61931737e662dd293b40578ccornelius    }
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setText(const char *in, int32_t len)
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textIn->setText(in, len);
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fFreshTextSet = TRUE;
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::setStripTagsFlag(UBool flag)
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool temp = fStripTags;
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fStripTags = flag;
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fFreshTextSet = TRUE;
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return temp;
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::getStripTagsFlag() const
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fStripTags;
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textIn->setDeclaredEncoding(encoding,len);
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t CharsetDetector::getDetectableCount()
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setRecognizers(status);
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fCSRecognizers_size;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch *CharsetDetector::detect(UErrorCode &status)
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t maxMatchesFound = 0;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    detectAll(maxMatchesFound, status);
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(maxMatchesFound > 0) {
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return resultArray[0];
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(!textIn->isSet()) {
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_MISSING_RESOURCE_ERROR;// TODO:  Need to set proper status code for input text not set
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
24754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    } else if (fFreshTextSet) {
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CharsetRecognizer *csr;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t            i;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        textIn->MungeInput(fStripTags);
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Iterate over all possible charsets, remember all that
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // give a match quality > 0.
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultCount = 0;
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (i = 0; i < fCSRecognizers_size; i += 1) {
25759d709d503bab6e2b61931737e662dd293b40578ccornelius            csr = fCSRecognizers[i]->recognizer;
25854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            if (csr->match(textIn, resultArray[resultCount])) {
25954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                resultCount++;
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        if (resultCount > 1) {
26454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);
26585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fFreshTextSet = FALSE;
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    maxMatchesFound = resultCount;
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return resultArray;
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27459d709d503bab6e2b61931737e662dd293b40578ccorneliusvoid CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
27559d709d503bab6e2b61931737e662dd293b40578ccornelius{
27659d709d503bab6e2b61931737e662dd293b40578ccornelius    if (U_FAILURE(status)) {
27759d709d503bab6e2b61931737e662dd293b40578ccornelius        return;
27859d709d503bab6e2b61931737e662dd293b40578ccornelius    }
27959d709d503bab6e2b61931737e662dd293b40578ccornelius
28059d709d503bab6e2b61931737e662dd293b40578ccornelius    int32_t modIdx = -1;
28159d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool isDefaultVal = FALSE;
28259d709d503bab6e2b61931737e662dd293b40578ccornelius    for (int32_t i = 0; i < fCSRecognizers_size; i++) {
28359d709d503bab6e2b61931737e662dd293b40578ccornelius        CSRecognizerInfo *csrinfo = fCSRecognizers[i];
28459d709d503bab6e2b61931737e662dd293b40578ccornelius        if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
28559d709d503bab6e2b61931737e662dd293b40578ccornelius            modIdx = i;
28659d709d503bab6e2b61931737e662dd293b40578ccornelius            isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
28759d709d503bab6e2b61931737e662dd293b40578ccornelius            break;
28859d709d503bab6e2b61931737e662dd293b40578ccornelius        }
28959d709d503bab6e2b61931737e662dd293b40578ccornelius    }
29059d709d503bab6e2b61931737e662dd293b40578ccornelius    if (modIdx < 0) {
29159d709d503bab6e2b61931737e662dd293b40578ccornelius        // No matching encoding found
29259d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_ILLEGAL_ARGUMENT_ERROR;
29359d709d503bab6e2b61931737e662dd293b40578ccornelius        return;
29459d709d503bab6e2b61931737e662dd293b40578ccornelius    }
29559d709d503bab6e2b61931737e662dd293b40578ccornelius
29659d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fEnabledRecognizers == NULL && !isDefaultVal) {
29759d709d503bab6e2b61931737e662dd293b40578ccornelius        // Create an array storing the non default setting
29859d709d503bab6e2b61931737e662dd293b40578ccornelius        fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
29959d709d503bab6e2b61931737e662dd293b40578ccornelius        if (fEnabledRecognizers == NULL) {
30059d709d503bab6e2b61931737e662dd293b40578ccornelius            status = U_MEMORY_ALLOCATION_ERROR;
30159d709d503bab6e2b61931737e662dd293b40578ccornelius            return;
30259d709d503bab6e2b61931737e662dd293b40578ccornelius        }
30359d709d503bab6e2b61931737e662dd293b40578ccornelius        // Initialize the array with default info
30459d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
30559d709d503bab6e2b61931737e662dd293b40578ccornelius            fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
30659d709d503bab6e2b61931737e662dd293b40578ccornelius        }
30759d709d503bab6e2b61931737e662dd293b40578ccornelius    }
30859d709d503bab6e2b61931737e662dd293b40578ccornelius
30959d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fEnabledRecognizers != NULL) {
31059d709d503bab6e2b61931737e662dd293b40578ccornelius        fEnabledRecognizers[modIdx] = enabled;
31159d709d503bab6e2b61931737e662dd293b40578ccornelius    }
31259d709d503bab6e2b61931737e662dd293b40578ccornelius}
31359d709d503bab6e2b61931737e662dd293b40578ccornelius
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( index > fCSRecognizers_size-1 || index < 0) {
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return fCSRecognizers[index]->getName();
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct {
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t currIndex;
33059d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool all;
33159d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool *enabledRecognizers;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} Context;
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumClose(UEnumeration *en) {
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(en->context != NULL) {
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        DELETE_ARRAY(en->context);
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    DELETE_ARRAY(en);
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
34659d709d503bab6e2b61931737e662dd293b40578ccorneliusenumCount(UEnumeration *en, UErrorCode *) {
34759d709d503bab6e2b61931737e662dd293b40578ccornelius    if (((Context *)en->context)->all) {
34859d709d503bab6e2b61931737e662dd293b40578ccornelius        // ucsdet_getAllDetectableCharsets, all charset detector names
34959d709d503bab6e2b61931737e662dd293b40578ccornelius        return fCSRecognizers_size;
35059d709d503bab6e2b61931737e662dd293b40578ccornelius    }
35159d709d503bab6e2b61931737e662dd293b40578ccornelius
35259d709d503bab6e2b61931737e662dd293b40578ccornelius    // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
35359d709d503bab6e2b61931737e662dd293b40578ccornelius    int32_t count = 0;
35459d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
35559d709d503bab6e2b61931737e662dd293b40578ccornelius    if (enabledArray != NULL) {
35659d709d503bab6e2b61931737e662dd293b40578ccornelius        // custom set
35759d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
35859d709d503bab6e2b61931737e662dd293b40578ccornelius            if (enabledArray[i]) {
35959d709d503bab6e2b61931737e662dd293b40578ccornelius                count++;
36059d709d503bab6e2b61931737e662dd293b40578ccornelius            }
36159d709d503bab6e2b61931737e662dd293b40578ccornelius        }
36259d709d503bab6e2b61931737e662dd293b40578ccornelius    } else {
36359d709d503bab6e2b61931737e662dd293b40578ccornelius        // default set
36459d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
36559d709d503bab6e2b61931737e662dd293b40578ccornelius            if (fCSRecognizers[i]->isDefaultEnabled) {
36659d709d503bab6e2b61931737e662dd293b40578ccornelius                count++;
36759d709d503bab6e2b61931737e662dd293b40578ccornelius            }
36859d709d503bab6e2b61931737e662dd293b40578ccornelius        }
36959d709d503bab6e2b61931737e662dd293b40578ccornelius    }
37059d709d503bab6e2b61931737e662dd293b40578ccornelius    return count;
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* U_CALLCONV
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
37559d709d503bab6e2b61931737e662dd293b40578ccornelius    const char *currName = NULL;
37659d709d503bab6e2b61931737e662dd293b40578ccornelius
37759d709d503bab6e2b61931737e662dd293b40578ccornelius    if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
37859d709d503bab6e2b61931737e662dd293b40578ccornelius        if (((Context *)en->context)->all) {
37959d709d503bab6e2b61931737e662dd293b40578ccornelius            // ucsdet_getAllDetectableCharsets, all charset detector names
38059d709d503bab6e2b61931737e662dd293b40578ccornelius            currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
38159d709d503bab6e2b61931737e662dd293b40578ccornelius            ((Context *)en->context)->currIndex++;
38259d709d503bab6e2b61931737e662dd293b40578ccornelius        } else {
38359d709d503bab6e2b61931737e662dd293b40578ccornelius            // ucsdet_getDetectableCharsets
38459d709d503bab6e2b61931737e662dd293b40578ccornelius            UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
38559d709d503bab6e2b61931737e662dd293b40578ccornelius            if (enabledArray != NULL) {
38659d709d503bab6e2b61931737e662dd293b40578ccornelius                // custome set
38759d709d503bab6e2b61931737e662dd293b40578ccornelius                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
38859d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (enabledArray[((Context *)en->context)->currIndex]) {
38959d709d503bab6e2b61931737e662dd293b40578ccornelius                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
39059d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
39159d709d503bab6e2b61931737e662dd293b40578ccornelius                    ((Context *)en->context)->currIndex++;
39259d709d503bab6e2b61931737e662dd293b40578ccornelius                }
39359d709d503bab6e2b61931737e662dd293b40578ccornelius            } else {
39459d709d503bab6e2b61931737e662dd293b40578ccornelius                // default set
39559d709d503bab6e2b61931737e662dd293b40578ccornelius                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
39659d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
39759d709d503bab6e2b61931737e662dd293b40578ccornelius                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
39859d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
39959d709d503bab6e2b61931737e662dd293b40578ccornelius                    ((Context *)en->context)->currIndex++;
40059d709d503bab6e2b61931737e662dd293b40578ccornelius                }
40159d709d503bab6e2b61931737e662dd293b40578ccornelius            }
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
40459d709d503bab6e2b61931737e662dd293b40578ccornelius
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(resultLength != NULL) {
40659d709d503bab6e2b61931737e662dd293b40578ccornelius        *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return currName;
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41259d709d503bab6e2b61931737e662dd293b40578ccornelius
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumReset(UEnumeration *en, UErrorCode *) {
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((Context *)en->context)->currIndex = 0;
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UEnumeration gCSDetEnumeration = {
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumClose,
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumCount,
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uenum_unextDefault,
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumNext,
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumReset
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
42859d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_END
42959d709d503bab6e2b61931737e662dd293b40578ccornelius
43059d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN
43159d709d503bab6e2b61931737e662dd293b40578ccornelius
43259d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43559d709d503bab6e2b61931737e662dd293b40578ccornelius    /* Initialize recognized charsets. */
43659d709d503bab6e2b61931737e662dd293b40578ccornelius    setRecognizers(status);
43759d709d503bab6e2b61931737e662dd293b40578ccornelius
43859d709d503bab6e2b61931737e662dd293b40578ccornelius    if(U_FAILURE(status)) {
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44259d709d503bab6e2b61931737e662dd293b40578ccornelius    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
44359d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en == NULL) {
44459d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
44559d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
44659d709d503bab6e2b61931737e662dd293b40578ccornelius    }
44759d709d503bab6e2b61931737e662dd293b40578ccornelius    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
44859d709d503bab6e2b61931737e662dd293b40578ccornelius    en->context = (void*)NEW_ARRAY(Context, 1);
44959d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en->context == NULL) {
45059d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
45159d709d503bab6e2b61931737e662dd293b40578ccornelius        DELETE_ARRAY(en);
45259d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
45359d709d503bab6e2b61931737e662dd293b40578ccornelius    }
45459d709d503bab6e2b61931737e662dd293b40578ccornelius    uprv_memset(en->context, 0, sizeof(Context));
45559d709d503bab6e2b61931737e662dd293b40578ccornelius    ((Context*)en->context)->all = TRUE;
45659d709d503bab6e2b61931737e662dd293b40578ccornelius    return en;
45759d709d503bab6e2b61931737e662dd293b40578ccornelius}
45859d709d503bab6e2b61931737e662dd293b40578ccornelius
45959d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
46059d709d503bab6e2b61931737e662dd293b40578ccornelius{
46159d709d503bab6e2b61931737e662dd293b40578ccornelius    if(U_FAILURE(status)) {
46259d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
46359d709d503bab6e2b61931737e662dd293b40578ccornelius    }
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
46659d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en == NULL) {
46759d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
46859d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
46959d709d503bab6e2b61931737e662dd293b40578ccornelius    }
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    en->context = (void*)NEW_ARRAY(Context, 1);
47259d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en->context == NULL) {
47359d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
47459d709d503bab6e2b61931737e662dd293b40578ccornelius        DELETE_ARRAY(en);
47559d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
47659d709d503bab6e2b61931737e662dd293b40578ccornelius    }
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memset(en->context, 0, sizeof(Context));
47859d709d503bab6e2b61931737e662dd293b40578ccornelius    ((Context*)en->context)->all = FALSE;
47959d709d503bab6e2b61931737e662dd293b40578ccornelius    ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return en;
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48359d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48559d709d503bab6e2b61931737e662dd293b40578ccornelius#endif
486