1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   Copyright (C) 2005-2007, International Business Machines
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   Corporation and others.  All Rights Reserved.
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************************
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucsdet.h"
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "csdetect.h"
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "csmatch.h"
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_USE
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define DELETE_ARRAY(array) uprv_free((void *) (array))
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UCharsetDetector * U_EXPORT2
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_open(UErrorCode   *status)
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharsetDetector* csd = new CharsetDetector(*status);
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(*status)) {
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete csd;
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        csd = NULL;
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UCharsetDetector *) csd;
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_close(UCharsetDetector *ucsd)
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharsetDetector *csd = (CharsetDetector *) ucsd;
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete csd;
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ((CharsetDetector *) ucsd)->setText(textIn, len);
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const char * U_EXPORT2
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return ((CharsetMatch *) ucsm)->getName();
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return ((CharsetMatch *) ucsm)->getConfidence();
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const char * U_EXPORT2
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return ((CharsetMatch *) ucsm)->getLanguage();
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const UCharsetMatch * U_EXPORT2
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const UCharsetMatch**
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_detectAll(UCharsetDetector *ucsd,
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 int32_t *maxMatchesFound, UErrorCode *status)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharsetDetector *csd = (CharsetDetector *) ucsd;
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// U_CAPI  const char * U_EXPORT2
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// {
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     if(U_FAILURE(*status)) {
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//         return 0;
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     }
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     return csd->getCharsetName(index,*status);
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// }
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// U_CAPI  int32_t U_EXPORT2
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// {
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     if(U_FAILURE(*status)) {
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//         return -1;
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     }
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     return UCharsetDetector::getDetectableCount();
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// }
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI  UBool U_EXPORT2
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // todo: could use an error return...
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (ucsd == NULL) {
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return ((CharsetDetector *) ucsd)->getStripTagsFlag();
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI  UBool U_EXPORT2
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // todo: could use an error return...
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (ucsd == NULL) {
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharsetDetector *csd = (CharsetDetector *) ucsd;
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool prev = csd->getStripTagsFlag();
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    csd->setStripTagsFlag(filter);
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return prev;
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI  int32_t U_EXPORT2
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getUChars(const UCharsetMatch *ucsm,
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 UChar *buf, int32_t cap, UErrorCode *status)
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*status)) {
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
181