1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Copyright (C) 2005-2007, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucsdet.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csdetect.h"
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "csmatch.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_USE
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DELETE_ARRAY(array) uprv_free((void *) (array))
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UCharsetDetector * U_EXPORT2
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_open(UErrorCode   *status)
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CharsetDetector* csd = new CharsetDetector(*status);
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(*status)) {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete csd;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csd = NULL;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (UCharsetDetector *) csd;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_close(UCharsetDetector *ucsd)
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CharsetDetector *csd = (CharsetDetector *) ucsd;
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete csd;
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((CharsetDetector *) ucsd)->setText(textIn, len);
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const char * U_EXPORT2
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharsetMatch *) ucsm)->getName();
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharsetMatch *) ucsm)->getConfidence();
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const char * U_EXPORT2
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharsetMatch *) ucsm)->getLanguage();
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const UCharsetMatch * U_EXPORT2
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const UCharsetMatch**
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_detectAll(UCharsetDetector *ucsd,
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 int32_t *maxMatchesFound, UErrorCode *status)
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CharsetDetector *csd = (CharsetDetector *) ucsd;
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// U_CAPI  const char * U_EXPORT2
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// {
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     if(U_FAILURE(*status)) {
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//         return 0;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     }
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     return csd->getCharsetName(index,*status);
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// }
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// U_CAPI  int32_t U_EXPORT2
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// {
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     if(U_FAILURE(*status)) {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//         return -1;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     }
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     return UCharsetDetector::getDetectableCount();
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// }
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI  UBool U_EXPORT2
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // todo: could use an error return...
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (ucsd == NULL) {
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharsetDetector *) ucsd)->getStripTagsFlag();
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI  UBool U_EXPORT2
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // todo: could use an error return...
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (ucsd == NULL) {
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CharsetDetector *csd = (CharsetDetector *) ucsd;
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool prev = csd->getStripTagsFlag();
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    csd->setStripTagsFlag(filter);
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return prev;
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI  int32_t U_EXPORT2
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucsdet_getUChars(const UCharsetMatch *ucsm,
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UChar *buf, int32_t cap, UErrorCode *status)
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)) {
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
181