10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
58de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert *   Copyright (C) 2005-2016, International Business Machines
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Corporation and others.  All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucsdet.h"
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csdetect.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csmatch.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uenumimp.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucln_in.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uarrsort.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "inputext.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrsbcs.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrmbcs.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrutf8.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csrucode.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csr2022.h"
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DELETE_ARRAY(array) uprv_free((void *) (array))
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
3559d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN
3659d709d503bab6e2b61931737e662dd293b40578ccornelius
3759d709d503bab6e2b61931737e662dd293b40578ccorneliusstruct CSRecognizerInfo : public UMemory {
3859d709d503bab6e2b61931737e662dd293b40578ccornelius    CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
3959d709d503bab6e2b61931737e662dd293b40578ccornelius        : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
4059d709d503bab6e2b61931737e662dd293b40578ccornelius
4159d709d503bab6e2b61931737e662dd293b40578ccornelius    ~CSRecognizerInfo() {delete recognizer;};
4259d709d503bab6e2b61931737e662dd293b40578ccornelius
4359d709d503bab6e2b61931737e662dd293b40578ccornelius    CharsetRecognizer *recognizer;
4459d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool isDefaultEnabled;
4559d709d503bab6e2b61931737e662dd293b40578ccornelius};
4659d709d503bab6e2b61931737e662dd293b40578ccornelius
4759d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
4959d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::CSRecognizerInfo **fCSRecognizers = NULL;
5059d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic icu::UInitOnce gCSRecognizersInitOnce;
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t fCSRecognizers_size = 0;
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
5359d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_BEGIN
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV csdet_cleanup(void)
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
5659d709d503bab6e2b61931737e662dd293b40578ccornelius    U_NAMESPACE_USE
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fCSRecognizers != NULL) {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete fCSRecognizers[r];
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fCSRecognizers[r] = NULL;
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        DELETE_ARRAY(fCSRecognizers);
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fCSRecognizers = NULL;
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fCSRecognizers_size = 0;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
6759d709d503bab6e2b61931737e662dd293b40578ccornelius    gCSRecognizersInitOnce.reset();
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharsetMatchComparator(const void * /*context*/, const void *left, const void *right)
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_NAMESPACE_USE
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const CharsetMatch **csm_l = (const CharsetMatch **) left;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const CharsetMatch **csm_r = (const CharsetMatch **) right;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // NOTE: compare is backwards to sort from highest to lowest.
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (*csm_r)->getConfidence() - (*csm_l)->getConfidence();
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8459d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic void U_CALLCONV initRecognizers(UErrorCode &status) {
8559d709d503bab6e2b61931737e662dd293b40578ccornelius    U_NAMESPACE_USE
8659d709d503bab6e2b61931737e662dd293b40578ccornelius    ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
8759d709d503bab6e2b61931737e662dd293b40578ccornelius    CSRecognizerInfo *tempArray[] = {
8859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
8959d709d503bab6e2b61931737e662dd293b40578ccornelius
9059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
9159d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
9259d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
9359d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
9459d709d503bab6e2b61931737e662dd293b40578ccornelius
9559d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
9659d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
9759d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
9859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
9959d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
10059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
10159d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
10259d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
10359d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
10459d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
10559d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
10659d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
10759d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
10859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
10959d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
11059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
11159d709d503bab6e2b61931737e662dd293b40578ccornelius
11259d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
1131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#if !UCONFIG_ONLY_HTML_CONVERSION
11459d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
11559d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
11659d709d503bab6e2b61931737e662dd293b40578ccornelius
11759d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
11859d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
11959d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
12059d709d503bab6e2b61931737e662dd293b40578ccornelius        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
1211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#endif
12259d709d503bab6e2b61931737e662dd293b40578ccornelius    };
1238de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    int32_t rCount = UPRV_LENGTHOF(tempArray);
12459d709d503bab6e2b61931737e662dd293b40578ccornelius
12559d709d503bab6e2b61931737e662dd293b40578ccornelius    fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
12659d709d503bab6e2b61931737e662dd293b40578ccornelius
12759d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fCSRecognizers == NULL) {
12859d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
12959d709d503bab6e2b61931737e662dd293b40578ccornelius    }
13059d709d503bab6e2b61931737e662dd293b40578ccornelius    else {
13159d709d503bab6e2b61931737e662dd293b40578ccornelius        fCSRecognizers_size = rCount;
13259d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t r = 0; r < rCount; r += 1) {
13359d709d503bab6e2b61931737e662dd293b40578ccornelius            fCSRecognizers[r] = tempArray[r];
13459d709d503bab6e2b61931737e662dd293b40578ccornelius            if (fCSRecognizers[r] == NULL) {
13559d709d503bab6e2b61931737e662dd293b40578ccornelius                status = U_MEMORY_ALLOCATION_ERROR;
13659d709d503bab6e2b61931737e662dd293b40578ccornelius            }
13759d709d503bab6e2b61931737e662dd293b40578ccornelius        }
13859d709d503bab6e2b61931737e662dd293b40578ccornelius    }
13959d709d503bab6e2b61931737e662dd293b40578ccornelius}
14059d709d503bab6e2b61931737e662dd293b40578ccornelius
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setRecognizers(UErrorCode &status)
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
14759d709d503bab6e2b61931737e662dd293b40578ccornelius    umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status);
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::CharsetDetector(UErrorCode &status)
15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho  : textIn(new InputText(status)), resultArray(NULL),
15259d709d503bab6e2b61931737e662dd293b40578ccornelius    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
15359d709d503bab6e2b61931737e662dd293b40578ccornelius    fEnabledRecognizers(NULL)
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setRecognizers(status);
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size);
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (resultArray == NULL) {
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultArray[i] = new CharsetMatch();
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (resultArray[i] == NULL) {
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCharsetDetector::~CharsetDetector()
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete textIn;
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete resultArray[i];
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(resultArray);
19159d709d503bab6e2b61931737e662dd293b40578ccornelius
19259d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fEnabledRecognizers) {
19359d709d503bab6e2b61931737e662dd293b40578ccornelius        uprv_free(fEnabledRecognizers);
19459d709d503bab6e2b61931737e662dd293b40578ccornelius    }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setText(const char *in, int32_t len)
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textIn->setText(in, len);
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fFreshTextSet = TRUE;
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::setStripTagsFlag(UBool flag)
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool temp = fStripTags;
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fStripTags = flag;
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fFreshTextSet = TRUE;
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return temp;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CharsetDetector::getStripTagsFlag() const
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fStripTags;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textIn->setDeclaredEncoding(encoding,len);
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t CharsetDetector::getDetectableCount()
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setRecognizers(status);
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fCSRecognizers_size;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch *CharsetDetector::detect(UErrorCode &status)
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t maxMatchesFound = 0;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    detectAll(maxMatchesFound, status);
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(maxMatchesFound > 0) {
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return resultArray[0];
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(!textIn->isSet()) {
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_MISSING_RESOURCE_ERROR;// TODO:  Need to set proper status code for input text not set
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
24954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    } else if (fFreshTextSet) {
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CharsetRecognizer *csr;
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t            i;
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        textIn->MungeInput(fStripTags);
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Iterate over all possible charsets, remember all that
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // give a match quality > 0.
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultCount = 0;
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (i = 0; i < fCSRecognizers_size; i += 1) {
25959d709d503bab6e2b61931737e662dd293b40578ccornelius            csr = fCSRecognizers[i]->recognizer;
26054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            if (csr->match(textIn, resultArray[resultCount])) {
26154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                resultCount++;
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        if (resultCount > 1) {
26654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);
26785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fFreshTextSet = FALSE;
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    maxMatchesFound = resultCount;
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return resultArray;
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27659d709d503bab6e2b61931737e662dd293b40578ccorneliusvoid CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
27759d709d503bab6e2b61931737e662dd293b40578ccornelius{
27859d709d503bab6e2b61931737e662dd293b40578ccornelius    if (U_FAILURE(status)) {
27959d709d503bab6e2b61931737e662dd293b40578ccornelius        return;
28059d709d503bab6e2b61931737e662dd293b40578ccornelius    }
28159d709d503bab6e2b61931737e662dd293b40578ccornelius
28259d709d503bab6e2b61931737e662dd293b40578ccornelius    int32_t modIdx = -1;
28359d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool isDefaultVal = FALSE;
28459d709d503bab6e2b61931737e662dd293b40578ccornelius    for (int32_t i = 0; i < fCSRecognizers_size; i++) {
28559d709d503bab6e2b61931737e662dd293b40578ccornelius        CSRecognizerInfo *csrinfo = fCSRecognizers[i];
28659d709d503bab6e2b61931737e662dd293b40578ccornelius        if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
28759d709d503bab6e2b61931737e662dd293b40578ccornelius            modIdx = i;
28859d709d503bab6e2b61931737e662dd293b40578ccornelius            isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
28959d709d503bab6e2b61931737e662dd293b40578ccornelius            break;
29059d709d503bab6e2b61931737e662dd293b40578ccornelius        }
29159d709d503bab6e2b61931737e662dd293b40578ccornelius    }
29259d709d503bab6e2b61931737e662dd293b40578ccornelius    if (modIdx < 0) {
29359d709d503bab6e2b61931737e662dd293b40578ccornelius        // No matching encoding found
29459d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_ILLEGAL_ARGUMENT_ERROR;
29559d709d503bab6e2b61931737e662dd293b40578ccornelius        return;
29659d709d503bab6e2b61931737e662dd293b40578ccornelius    }
29759d709d503bab6e2b61931737e662dd293b40578ccornelius
29859d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fEnabledRecognizers == NULL && !isDefaultVal) {
29959d709d503bab6e2b61931737e662dd293b40578ccornelius        // Create an array storing the non default setting
30059d709d503bab6e2b61931737e662dd293b40578ccornelius        fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
30159d709d503bab6e2b61931737e662dd293b40578ccornelius        if (fEnabledRecognizers == NULL) {
30259d709d503bab6e2b61931737e662dd293b40578ccornelius            status = U_MEMORY_ALLOCATION_ERROR;
30359d709d503bab6e2b61931737e662dd293b40578ccornelius            return;
30459d709d503bab6e2b61931737e662dd293b40578ccornelius        }
30559d709d503bab6e2b61931737e662dd293b40578ccornelius        // Initialize the array with default info
30659d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
30759d709d503bab6e2b61931737e662dd293b40578ccornelius            fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
30859d709d503bab6e2b61931737e662dd293b40578ccornelius        }
30959d709d503bab6e2b61931737e662dd293b40578ccornelius    }
31059d709d503bab6e2b61931737e662dd293b40578ccornelius
31159d709d503bab6e2b61931737e662dd293b40578ccornelius    if (fEnabledRecognizers != NULL) {
31259d709d503bab6e2b61931737e662dd293b40578ccornelius        fEnabledRecognizers[modIdx] = enabled;
31359d709d503bab6e2b61931737e662dd293b40578ccornelius    }
31459d709d503bab6e2b61931737e662dd293b40578ccornelius}
31559d709d503bab6e2b61931737e662dd293b40578ccornelius
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( index > fCSRecognizers_size-1 || index < 0) {
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return fCSRecognizers[index]->getName();
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct {
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t currIndex;
33259d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool all;
33359d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool *enabledRecognizers;
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} Context;
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumClose(UEnumeration *en) {
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(en->context != NULL) {
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        DELETE_ARRAY(en->context);
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    DELETE_ARRAY(en);
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
34859d709d503bab6e2b61931737e662dd293b40578ccorneliusenumCount(UEnumeration *en, UErrorCode *) {
34959d709d503bab6e2b61931737e662dd293b40578ccornelius    if (((Context *)en->context)->all) {
35059d709d503bab6e2b61931737e662dd293b40578ccornelius        // ucsdet_getAllDetectableCharsets, all charset detector names
35159d709d503bab6e2b61931737e662dd293b40578ccornelius        return fCSRecognizers_size;
35259d709d503bab6e2b61931737e662dd293b40578ccornelius    }
35359d709d503bab6e2b61931737e662dd293b40578ccornelius
35459d709d503bab6e2b61931737e662dd293b40578ccornelius    // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
35559d709d503bab6e2b61931737e662dd293b40578ccornelius    int32_t count = 0;
35659d709d503bab6e2b61931737e662dd293b40578ccornelius    UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
35759d709d503bab6e2b61931737e662dd293b40578ccornelius    if (enabledArray != NULL) {
35859d709d503bab6e2b61931737e662dd293b40578ccornelius        // custom set
35959d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
36059d709d503bab6e2b61931737e662dd293b40578ccornelius            if (enabledArray[i]) {
36159d709d503bab6e2b61931737e662dd293b40578ccornelius                count++;
36259d709d503bab6e2b61931737e662dd293b40578ccornelius            }
36359d709d503bab6e2b61931737e662dd293b40578ccornelius        }
36459d709d503bab6e2b61931737e662dd293b40578ccornelius    } else {
36559d709d503bab6e2b61931737e662dd293b40578ccornelius        // default set
36659d709d503bab6e2b61931737e662dd293b40578ccornelius        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
36759d709d503bab6e2b61931737e662dd293b40578ccornelius            if (fCSRecognizers[i]->isDefaultEnabled) {
36859d709d503bab6e2b61931737e662dd293b40578ccornelius                count++;
36959d709d503bab6e2b61931737e662dd293b40578ccornelius            }
37059d709d503bab6e2b61931737e662dd293b40578ccornelius        }
37159d709d503bab6e2b61931737e662dd293b40578ccornelius    }
37259d709d503bab6e2b61931737e662dd293b40578ccornelius    return count;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* U_CALLCONV
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
37759d709d503bab6e2b61931737e662dd293b40578ccornelius    const char *currName = NULL;
37859d709d503bab6e2b61931737e662dd293b40578ccornelius
37959d709d503bab6e2b61931737e662dd293b40578ccornelius    if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
38059d709d503bab6e2b61931737e662dd293b40578ccornelius        if (((Context *)en->context)->all) {
38159d709d503bab6e2b61931737e662dd293b40578ccornelius            // ucsdet_getAllDetectableCharsets, all charset detector names
38259d709d503bab6e2b61931737e662dd293b40578ccornelius            currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
38359d709d503bab6e2b61931737e662dd293b40578ccornelius            ((Context *)en->context)->currIndex++;
38459d709d503bab6e2b61931737e662dd293b40578ccornelius        } else {
38559d709d503bab6e2b61931737e662dd293b40578ccornelius            // ucsdet_getDetectableCharsets
38659d709d503bab6e2b61931737e662dd293b40578ccornelius            UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
38759d709d503bab6e2b61931737e662dd293b40578ccornelius            if (enabledArray != NULL) {
38859d709d503bab6e2b61931737e662dd293b40578ccornelius                // custome set
38959d709d503bab6e2b61931737e662dd293b40578ccornelius                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
39059d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (enabledArray[((Context *)en->context)->currIndex]) {
39159d709d503bab6e2b61931737e662dd293b40578ccornelius                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
39259d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
39359d709d503bab6e2b61931737e662dd293b40578ccornelius                    ((Context *)en->context)->currIndex++;
39459d709d503bab6e2b61931737e662dd293b40578ccornelius                }
39559d709d503bab6e2b61931737e662dd293b40578ccornelius            } else {
39659d709d503bab6e2b61931737e662dd293b40578ccornelius                // default set
39759d709d503bab6e2b61931737e662dd293b40578ccornelius                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
39859d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
39959d709d503bab6e2b61931737e662dd293b40578ccornelius                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
40059d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
40159d709d503bab6e2b61931737e662dd293b40578ccornelius                    ((Context *)en->context)->currIndex++;
40259d709d503bab6e2b61931737e662dd293b40578ccornelius                }
40359d709d503bab6e2b61931737e662dd293b40578ccornelius            }
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
40659d709d503bab6e2b61931737e662dd293b40578ccornelius
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(resultLength != NULL) {
40859d709d503bab6e2b61931737e662dd293b40578ccornelius        *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return currName;
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41459d709d503bab6e2b61931737e662dd293b40578ccornelius
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruenumReset(UEnumeration *en, UErrorCode *) {
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((Context *)en->context)->currIndex = 0;
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UEnumeration gCSDetEnumeration = {
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumClose,
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumCount,
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uenum_unextDefault,
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumNext,
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enumReset
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43059d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_END
43159d709d503bab6e2b61931737e662dd293b40578ccornelius
43259d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_BEGIN
43359d709d503bab6e2b61931737e662dd293b40578ccornelius
43459d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43759d709d503bab6e2b61931737e662dd293b40578ccornelius    /* Initialize recognized charsets. */
43859d709d503bab6e2b61931737e662dd293b40578ccornelius    setRecognizers(status);
43959d709d503bab6e2b61931737e662dd293b40578ccornelius
44059d709d503bab6e2b61931737e662dd293b40578ccornelius    if(U_FAILURE(status)) {
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44459d709d503bab6e2b61931737e662dd293b40578ccornelius    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
44559d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en == NULL) {
44659d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
44759d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
44859d709d503bab6e2b61931737e662dd293b40578ccornelius    }
44959d709d503bab6e2b61931737e662dd293b40578ccornelius    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
45059d709d503bab6e2b61931737e662dd293b40578ccornelius    en->context = (void*)NEW_ARRAY(Context, 1);
45159d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en->context == NULL) {
45259d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
45359d709d503bab6e2b61931737e662dd293b40578ccornelius        DELETE_ARRAY(en);
45459d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
45559d709d503bab6e2b61931737e662dd293b40578ccornelius    }
45659d709d503bab6e2b61931737e662dd293b40578ccornelius    uprv_memset(en->context, 0, sizeof(Context));
45759d709d503bab6e2b61931737e662dd293b40578ccornelius    ((Context*)en->context)->all = TRUE;
45859d709d503bab6e2b61931737e662dd293b40578ccornelius    return en;
45959d709d503bab6e2b61931737e662dd293b40578ccornelius}
46059d709d503bab6e2b61931737e662dd293b40578ccornelius
46159d709d503bab6e2b61931737e662dd293b40578ccorneliusUEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
46259d709d503bab6e2b61931737e662dd293b40578ccornelius{
46359d709d503bab6e2b61931737e662dd293b40578ccornelius    if(U_FAILURE(status)) {
46459d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
46559d709d503bab6e2b61931737e662dd293b40578ccornelius    }
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
46859d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en == NULL) {
46959d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
47059d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
47159d709d503bab6e2b61931737e662dd293b40578ccornelius    }
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    en->context = (void*)NEW_ARRAY(Context, 1);
47459d709d503bab6e2b61931737e662dd293b40578ccornelius    if (en->context == NULL) {
47559d709d503bab6e2b61931737e662dd293b40578ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
47659d709d503bab6e2b61931737e662dd293b40578ccornelius        DELETE_ARRAY(en);
47759d709d503bab6e2b61931737e662dd293b40578ccornelius        return 0;
47859d709d503bab6e2b61931737e662dd293b40578ccornelius    }
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memset(en->context, 0, sizeof(Context));
48059d709d503bab6e2b61931737e662dd293b40578ccornelius    ((Context*)en->context)->all = FALSE;
48159d709d503bab6e2b61931737e662dd293b40578ccornelius    ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return en;
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48559d709d503bab6e2b61931737e662dd293b40578ccorneliusU_NAMESPACE_END
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48759d709d503bab6e2b61931737e662dd293b40578ccornelius#endif
488