1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ****************************************************************************
3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Copyright (c) 2005-2009, International Business Machines Corporation and *
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.                                             *
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ****************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucsdet.h"
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) malloc((count) * sizeof(type))
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DELETE_ARRAY(array) free(array)
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestConstruction(void);
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF8(void);
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF16(void);
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestC1Bytes(void);
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestInputFilter(void);
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestChaining(void);
30c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void TestBufferOverflow(void);
31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM424(void);
32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM420(void);
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid addUCsdetTest(TestNode** root);
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid addUCsdetTest(TestNode** root)
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, &TestConstruction, "ucsdetst/TestConstruction");
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, &TestUTF8, "ucsdetst/TestUTF8");
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, &TestUTF16, "ucsdetst/TestUTF16");
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, &TestC1Bytes, "ucsdetst/TestC1Bytes");
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, &TestInputFilter, "ucsdetst/TestInputFilter");
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, &TestChaining, "ucsdetst/TestErrorChaining");
44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    addTest(root, &TestBufferOverflow, "ucsdetst/TestBufferOverflow");
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_LEGACY_CONVERSION
46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    addTest(root, &TestIBM424, "ucsdetst/TestIBM424");
47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    addTest(root, &TestIBM420, "ucsdetst/TestIBM420");
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t preflight(const UChar *src, int32_t length, UConverter *cnv)
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status;
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buffer[1024];
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *dest, *destLimit = buffer + sizeof(buffer);
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *srcLimit = src + length;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = 0;
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        dest = buffer;
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result += (int32_t) (dest - buffer);
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while (status == U_BUFFER_OVERFLOW_ERROR);
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic char *extractBytes(const UChar *src, int32_t length, const char *codepage, int32_t *byteLength)
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *cnv = ucnv_open(codepage, &status);
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t byteCount = preflight(src, length, cnv);
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *srcLimit = src + length;
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bytes = NEW_ARRAY(char, byteCount + 1);
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *dest = bytes, *destLimit = bytes + byteCount + 1;
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(cnv);
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *byteLength = byteCount;
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return bytes;
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void freeBytes(char *bytes)
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    DELETE_ARRAY(bytes);
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestConstruction(void)
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status);
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *name;
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count = uenum_count(e, &status);
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, length;
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i = 0; i < count; i += 1) {
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        name = uenum_next(e, &length, &status);
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(name == NULL || length <= 0) {
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_err("ucsdet_getAllDetectableCharsets() returned a null or empty name!\n");
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* one past the list of all names must return NULL */
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name = uenum_next(e, &length, &status);
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(name != NULL || length != 0 || U_FAILURE(status)) {
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("ucsdet_getAllDetectableCharsets(past the list) returned a non-null name!\n");
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uenum_close(e);
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_close(csd);
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF8(void)
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char ss[] = "This is a string with some non-ascii characters that will "
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               "be converted to UTF-8, then shoved through the detection process.  "
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               "\\u0391\\u0392\\u0393\\u0394\\u0395"
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               "Sure would be nice if our source could contain Unicode directly!";
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t byteLength = 0, sLength = 0, dLength = 0;
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar s[sizeof(ss)];
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bytes;
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCharsetMatch *match;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar detected[sizeof(ss)];
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sLength = u_unescape(ss, s, sizeof(ss));
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bytes = extractBytes(s, sLength, "UTF-8", &byteLength);
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, bytes, byteLength, &status);
134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        log_err("status is %s\n", u_errorName(status));
136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        goto bail;
137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Detection failure for UTF-8: got no matches.\n");
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto bail;
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dLength = ucsdet_getUChars(match, detected, sLength, &status);
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (u_strCompare(detected, dLength, s, sLength, FALSE) != 0) {
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Round-trip test failed!\n");
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setDeclaredEncoding(csd, "UTF-8", 5, &status); /* for coverage */
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail:
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    freeBytes(bytes);
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_close(csd);
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF16(void)
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Notice the BOM on the start of this string */
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const UChar chars[] = {
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xFEFF, 0x0623, 0x0648, 0x0631, 0x0648, 0x0628, 0x0627, 0x002C,
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x0020, 0x0628, 0x0631, 0x0645, 0x062c, 0x064a, 0x0627, 0x062a,
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x0020, 0x0627, 0x0644, 0x062d, 0x0627, 0x0633, 0x0648, 0x0628,
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x0020, 0x002b, 0x0020, 0x0627, 0x0646, 0x062a, 0x0631, 0x0646,
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x064a, 0x062a, 0x0000};
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t beLength = 0, leLength = 0, cLength = ARRAY_SIZE(chars);
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *beBytes = extractBytes(chars, cLength, "UTF-16BE", &beLength);
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *leBytes = extractBytes(chars, cLength, "UTF-16LE", &leLength);
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCharsetMatch *match;
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *name;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t conf;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, beBytes, beLength, &status);
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Encoding detection failure for UTF-16BE: got no matches.\n");
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto try_le;
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name  = ucsdet_getName(match, &status);
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    conf  = ucsdet_getConfidence(match, &status);
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (strcmp(name, "UTF-16BE") != 0) {
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Encoding detection failure for UTF-16BE: got %s\n", name);
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (conf != 100) {
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Did not get 100%% confidence for UTF-16BE: got %d\n", conf);
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutry_le:
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, leBytes, leLength, &status);
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Encoding detection failure for UTF-16LE: got no matches.\n");
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto bail;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name  = ucsdet_getName(match, &status);
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    conf = ucsdet_getConfidence(match, &status);
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (strcmp(name, "UTF-16LE") != 0) {
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Enconding detection failure for UTF-16LE: got %s\n", name);
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (conf != 100) {
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Did not get 100%% confidence for UTF-16LE: got %d\n", conf);
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail:
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    freeBytes(leBytes);
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    freeBytes(beBytes);
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_close(csd);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestC1Bytes(void)
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_LEGACY_CONVERSION
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char ssISO[] = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char ssWindows[] = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.";
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sISOLength = 0, sWindowsLength = 0;
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar sISO[sizeof(ssISO)];
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar sWindows[sizeof(ssWindows)];
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t lISO = 0, lWindows = 0;
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bISO;
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bWindows;
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCharsetMatch *match;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *name;
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sISOLength = u_unescape(ssISO, sISO, sizeof(ssISO));
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sWindowsLength = u_unescape(ssWindows, sWindows, sizeof(ssWindows));
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bISO = extractBytes(sISO, sISOLength, "ISO-8859-1", &lISO);
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bWindows = extractBytes(sWindows, sWindowsLength, "windows-1252", &lWindows);
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, bWindows, lWindows, &status);
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("English test with C1 bytes got no matches.\n");
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto bail;
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name  = ucsdet_getName(match, &status);
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (strcmp(name, "windows-1252") != 0) {
2556d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        log_data_err("English text with C1 bytes does not detect as windows-1252, but as %s. (Are you missing data?)\n", name);
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, bISO, lISO, &status);
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("English text without C1 bytes got no matches.\n");
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto bail;
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name  = ucsdet_getName(match, &status);
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (strcmp(name, "ISO-8859-1") != 0) {
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("English text without C1 bytes does not detect as ISO-8859-1, but as %s\n", name);
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail:
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    freeBytes(bWindows);
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    freeBytes(bISO);
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_close(csd);
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestInputFilter(void)
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char ss[] = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\\u00E8s petit peu de Fran\\u00E7ais. <to> <confuse> <the> <detector>";
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sLength = 0;
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar s[sizeof(ss)];
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t byteLength = 0;
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bytes;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UCharsetMatch *match;
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *lang, *name;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sLength = u_unescape(ss, s, sizeof(ss));
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bytes = extractBytes(s, sLength, "ISO-8859-1", &byteLength);
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_enableInputFilter(csd, TRUE);
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!ucsdet_isInputFilterEnabled(csd)) {
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("ucsdet_enableInputFilter(csd, TRUE) did not enable input filter!\n");
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, bytes, byteLength, &status);
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Turning on the input filter resulted in no matches.\n");
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto turn_off;
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name = ucsdet_getName(match, &status);
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (name == NULL || strcmp(name, "ISO-8859-1") != 0) {
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Turning on the input filter resulted in %s rather than ISO-8859-1\n", name);
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lang = ucsdet_getLanguage(match, &status);
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lang == NULL || strcmp(lang, "fr") != 0) {
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_err("Input filter did not strip markup!\n");
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruturn_off:
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_enableInputFilter(csd, FALSE);
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(csd, bytes, byteLength, &status);
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    match = ucsdet_detect(csd, &status);
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (match == NULL) {
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Turning off the input filter resulted in no matches.\n");
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto bail;
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    name = ucsdet_getName(match, &status);
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (name == NULL || strcmp(name, "ISO-8859-1") != 0) {
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Turning off the input filter resulted in %s rather than ISO-8859-1\n", name);
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lang = ucsdet_getLanguage(match, &status);
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lang == NULL || strcmp(lang, "en") != 0) {
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_err("Unfiltered input did not detect as English!\n");
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail:
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    freeBytes(bytes);
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_close(csd);
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestChaining(void) {
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_USELESS_COLLATOR_ERROR;
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_open(&status);
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setText(NULL, NULL, 0, &status);
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_getName(NULL, &status);
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_getConfidence(NULL, &status);
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_getLanguage(NULL, &status);
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_detect(NULL, &status);
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_setDeclaredEncoding(NULL, NULL, 0, &status);
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_detectAll(NULL, NULL, &status);
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_getUChars(NULL, NULL, 0, &status);
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_getUChars(NULL, NULL, 0, &status);
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucsdet_close(NULL);
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* All of this code should have done nothing. */
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status != U_USELESS_COLLATOR_ERROR) {
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Status got changed to %s\n", u_errorName(status));
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void TestBufferOverflow(void) {
371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    static const char *testStrings[] = {
373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b", /* A partial ISO-2022 shift state at the end */
374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24", /* A partial ISO-2022 shift state at the end */
375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28", /* A partial ISO-2022 shift state at the end */
376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end with a bad one at the start */
377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end */
378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\xa1", /* Could be a single byte shift-jis at the end */
379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x74\x68\xa1", /* Could be a single byte shift-jis at the end */
380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "\x74\x68\x65\xa1" /* Could be a single byte shift-jis at the end, but now we have English creeping in. */
381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    };
382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    static const char *testResults[] = {
383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "windows-1252",
384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "windows-1252",
385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "windows-1252",
386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "windows-1252",
387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "ISO-2022-JP",
388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        NULL,
389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        NULL,
390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "ISO-8859-1"
391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    };
392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t idx = 0;
393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UCharsetMatch *match;
395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucsdet_setDeclaredEncoding(csd, "ISO-2022-JP", -1, &status);
397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        log_err("Couldn't open detector. %s\n", u_errorName(status));
400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        goto bail;
401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    for (idx = 0; idx < ARRAY_SIZE(testStrings); idx++) {
404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ucsdet_setText(csd, testStrings[idx], -1, &status);
405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        match = ucsdet_detect(csd, &status);
406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (match == NULL) {
408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (testResults[idx] != NULL) {
409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                log_err("Unexpectedly got no results at index %d.\n", idx);
410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            else {
412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                log_verbose("Got no result as expected at index %d.\n", idx);
413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            continue;
415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (testResults[idx] == NULL || strcmp(ucsdet_getName(match, &status), testResults[idx]) != 0) {
418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            log_err("Unexpectedly got %s instead of %s at index %d with confidence %d.\n",
419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                ucsdet_getName(match, &status), testResults[idx], idx, ucsdet_getConfidence(match, &status));
420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto bail;
421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querubail:
425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ucsdet_close(csd);
426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM424(void)
429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{
430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    static const UChar chars[] = {
433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D4, 0x05E4, 0x05E8, 0x05E7, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05D4, 0x05E6, 0x05D1, 0x05D0, 0x05D9, 0x0020, 0x05D4, 0x05E8,
434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D0, 0x05E9, 0x05D9, 0x002C, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x05D0, 0x05DC, 0x05D5, 0x05E3, 0x0020, 0x05D0, 0x05D1, 0x05D9,
435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D7, 0x05D9, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x002C, 0x0020, 0x05D4, 0x05D5, 0x05E8,
436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D4, 0x0020, 0x05E2, 0x05DC, 0x0020, 0x05E4, 0x05EA, 0x05D9, 0x05D7, 0x05EA, 0x0020, 0x05D7, 0x05E7, 0x05D9, 0x05E8, 0x05EA,
437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x0020, 0x05DE, 0x05E6, 0x0022, 0x05D7, 0x0020, 0x05D1, 0x05E2, 0x05E7, 0x05D1, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D3, 0x05D5,
438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 0x05D9, 0x0020, 0x05E6, 0x05D4, 0x0022, 0x05DC, 0x0020, 0x05DE,
439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4,
440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x0020, 0x05D1, 0x002B, 0x0020, 0x05E8, 0x05E6, 0x05D5, 0x05E2, 0x05EA, 0x0020, 0x05E2, 0x05D6, 0x05D4, 0x002E, 0x0020, 0x05DC,
441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x0020, 0x05D4, 0x05E4, 0x05E6, 0x0022, 0x05E8, 0x002C, 0x0020, 0x05DE, 0x05D4, 0x05E2, 0x05D3,
442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D5, 0x05DC, 0x05D4, 0x0020, 0x05EA, 0x05DE, 0x05D5, 0x05E0, 0x05D4, 0x0020,
443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05E9, 0x05DC, 0x0020, 0x0022, 0x05D4, 0x05EA, 0x05E0, 0x05D4, 0x05D2, 0x05D5, 0x05EA, 0x0020, 0x05E4, 0x05E1, 0x05D5, 0x05DC,
444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D4, 0x0020, 0x05DC, 0x05DB, 0x05D0, 0x05D5, 0x05E8, 0x05D4, 0x0020, 0x05E9, 0x05DC, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC,
445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D9, 0x05DD, 0x0020, 0x05D1, 0x05DE, 0x05D4, 0x05DC, 0x05DA, 0x0020, 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5,
446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 0x0022, 0x002E, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC,
447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05E7, 0x05D9, 0x05D1, 0x05DC, 0x0020, 0x05D0, 0x05EA, 0x0020, 0x05D4, 0x05D7, 0x05DC,
448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D8, 0x05EA, 0x05D5, 0x0020, 0x05DC, 0x05D0, 0x05D7, 0x05E8, 0x0020, 0x05E9, 0x05E2, 0x05D9, 0x05D9, 0x05DF, 0x0020, 0x05D1,
449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05EA, 0x05DE, 0x05DC, 0x05D9, 0x05DC, 0x0020, 0x05D4, 0x05E2, 0x05D3, 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0000
450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    };
451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    static const UChar chars_reverse[] = {
453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05EA, 0x05D5, 0x05D9, 0x05D5, 0x05D3, 0x05E2, 0x05D4, 0x0020, 0x05DC, 0x05D9, 0x05DC, 0x05DE, 0x05EA,
454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D1, 0x0020, 0x05DF, 0x05D9, 0x05D9, 0x05E2, 0x05E9, 0x0020, 0x05E8, 0x05D7, 0x05D0, 0x05DC, 0x0020, 0x05D5, 0x05EA, 0x05D8,
455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DC, 0x05D7, 0x05D4, 0x0020, 0x05EA, 0x05D0, 0x0020, 0x05DC, 0x05D1, 0x05D9, 0x05E7, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05D1,
456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x002E, 0x0022, 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4,
457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 0x0020, 0x05DA, 0x05DC, 0x05D4, 0x05DE, 0x05D1, 0x0020, 0x05DD, 0x05D9,
458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05DC, 0x05E9, 0x0020, 0x05D4, 0x05E8, 0x05D5, 0x05D0, 0x05DB, 0x05DC, 0x0020, 0x05D4,
459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DC, 0x05D5, 0x05E1, 0x05E4, 0x0020, 0x05EA, 0x05D5, 0x05D2, 0x05D4, 0x05E0, 0x05EA, 0x05D4, 0x0022, 0x0020, 0x05DC, 0x05E9,
460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x0020, 0x05D4, 0x05E0, 0x05D5, 0x05DE, 0x05EA, 0x0020, 0x05D4, 0x05DC, 0x05D5, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D9, 0x05D5,
461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D3, 0x05E2, 0x05D4, 0x05DE, 0x0020, 0x002C, 0x05E8, 0x0022, 0x05E6, 0x05E4, 0x05D4, 0x0020, 0x05D9, 0x05E8, 0x05D1, 0x05D3,
462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DC, 0x0020, 0x002E, 0x05D4, 0x05D6, 0x05E2, 0x0020, 0x05EA, 0x05E2, 0x05D5, 0x05E6, 0x05E8, 0x0020, 0x002B, 0x05D1, 0x0020,
463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE,
464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05DE, 0x0020, 0x05DC, 0x0022, 0x05D4, 0x05E6, 0x0020, 0x05D9, 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05EA, 0x05D5, 0x05D9,
465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D5, 0x05D3, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D1, 0x05E7, 0x05E2, 0x05D1, 0x0020, 0x05D7, 0x0022, 0x05E6, 0x05DE, 0x0020,
466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05EA, 0x05E8, 0x05D9, 0x05E7, 0x05D7, 0x0020, 0x05EA, 0x05D7, 0x05D9, 0x05EA, 0x05E4, 0x0020, 0x05DC, 0x05E2, 0x0020, 0x05D4,
467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05E8, 0x05D5, 0x05D4, 0x0020, 0x002C, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x05D9, 0x05D7,
468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05D9, 0x05D1, 0x05D0, 0x0020, 0x05E3, 0x05D5, 0x05DC, 0x05D0, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x002C, 0x05D9, 0x05E9, 0x05D0,
469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x05E8, 0x05D4, 0x0020, 0x05D9, 0x05D0, 0x05D1, 0x05E6, 0x05D4, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05E7, 0x05E8, 0x05E4, 0x05D4,
470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            0x0000
471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    };
472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse);
474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    char *bytes = extractBytes(chars, cLength, "IBM424", &bLength);
476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    char *bytes_r = extractBytes(chars_reverse, crLength, "IBM424", &brLength);
477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UCharsetMatch *match;
480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const char *name;
481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucsdet_setText(csd, bytes, bLength, &status);
483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    match = ucsdet_detect(csd, &status);
484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (match == NULL) {
486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        log_err("Encoding detection failure for IBM424_rtl: got no matches.\n");
487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        goto bail;
488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    name  = ucsdet_getName(match, &status);
491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (strcmp(name, "IBM424_rtl") != 0) {
4926d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        log_data_err("Encoding detection failure for IBM424_rtl: got %s. (Are you missing data?)\n", name);
493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucsdet_setText(csd, bytes_r, brLength, &status);
496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    match = ucsdet_detect(csd, &status);
497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (match == NULL) {
499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        log_err("Encoding detection failure for IBM424_ltr: got no matches.\n");
500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        goto bail;
501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    name  = ucsdet_getName(match, &status);
504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (strcmp(name, "IBM424_ltr") != 0) {
5056d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        log_data_err("Encoding detection failure for IBM424_ltr: got %s. (Are you missing data?)\n", name);
506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubail:
509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    freeBytes(bytes);
510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    freeBytes(bytes_r);
511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucsdet_close(csd);
512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM420(void)
515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{
516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    static const UChar chars[] = {
519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0648, 0x064F, 0x0636, 0x0639, 0x062A, 0x0020, 0x0648, 0x0646, 0x064F, 0x0641, 0x0630, 0x062A, 0x0020, 0x0628, 0x0631, 0x0627,
520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0645, 0x062C, 0x0020, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 0x0639, 0x062F, 0x064A, 0x062F, 0x0629, 0x0020, 0x0641,
521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x064A, 0x0020, 0x0645, 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0627, 0x0644, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020,
522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0627, 0x0644, 0x0648, 0x0637, 0x0646, 0x064A, 0x002C, 0x0020, 0x0645, 0x0639, 0x0020, 0x0645, 0x0644, 0x0627, 0x0626, 0x0645,
523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x062A, 0x0647, 0x0627, 0x0020, 0x062F, 0x0627, 0x0626, 0x0645, 0x0627, 0x064B, 0x0020, 0x0644, 0x0644, 0x0627, 0x062D, 0x062A,
524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x064A, 0x0627, 0x062C, 0x0627, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 0x062A, 0x063A, 0x064A, 0x0631, 0x0629, 0x0020, 0x0644,
525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0644, 0x0645, 0x062C, 0x062A, 0x0645, 0x0639, 0x0020, 0x0648, 0x0644, 0x0644, 0x062F, 0x0648, 0x0644, 0x0629, 0x002E, 0x0020,
526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x062A, 0x0648, 0x0633, 0x0639, 0x062A, 0x0020, 0x0648, 0x062A, 0x0637, 0x0648, 0x0631, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645,
527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0628, 0x0647, 0x062F, 0x0641, 0x0020, 0x0636, 0x0645, 0x0627, 0x0646, 0x0020, 0x0634,
528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0628, 0x0643, 0x0629, 0x0020, 0x0623, 0x0645, 0x0627, 0x0646, 0x0020, 0x0644, 0x0633, 0x0643, 0x0627, 0x0646, 0x0020, 0x062F,
529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0648, 0x0644, 0x0629, 0x0020, 0x0627, 0x0633, 0x0631, 0x0627, 0x0626, 0x064A, 0x0644, 0x0020, 0x0628, 0x0648, 0x062C, 0x0647,
530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0020, 0x0627, 0x0644, 0x0645, 0x062E, 0x0627, 0x0637, 0x0631, 0x0020, 0x0627, 0x0644, 0x0627, 0x0642, 0x062A, 0x0635, 0x0627,
531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x062F, 0x064A, 0x0629, 0x0020, 0x0648, 0x0627, 0x0644, 0x0627, 0x062C, 0x062A, 0x0645, 0x0627, 0x0639, 0x064A, 0x0629, 0x002E,
532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0000
533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    };
534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    static const UChar chars_reverse[] = {
535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x002E, 0x0629, 0x064A, 0x0639, 0x0627, 0x0645, 0x062A, 0x062C, 0x0627, 0x0644, 0x0627, 0x0648, 0x0020, 0x0629, 0x064A, 0x062F,
536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0627, 0x0635, 0x062A, 0x0642, 0x0627, 0x0644, 0x0627, 0x0020, 0x0631, 0x0637, 0x0627, 0x062E, 0x0645, 0x0644, 0x0627, 0x0020,
537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0647, 0x062C, 0x0648, 0x0628, 0x0020, 0x0644, 0x064A, 0x0626, 0x0627, 0x0631, 0x0633, 0x0627, 0x0020, 0x0629, 0x0644, 0x0648,
538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x062F, 0x0020, 0x0646, 0x0627, 0x0643, 0x0633, 0x0644, 0x0020, 0x0646, 0x0627, 0x0645, 0x0623, 0x0020, 0x0629, 0x0643, 0x0628,
539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0634, 0x0020, 0x0646, 0x0627, 0x0645, 0x0636, 0x0020, 0x0641, 0x062F, 0x0647, 0x0628, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624,
540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0631, 0x0648, 0x0637, 0x062A, 0x0648, 0x0020, 0x062A, 0x0639, 0x0633, 0x0648, 0x062A,
541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0020, 0x002E, 0x0629, 0x0644, 0x0648, 0x062F, 0x0644, 0x0644, 0x0648, 0x0020, 0x0639, 0x0645, 0x062A, 0x062C, 0x0645, 0x0644,
542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0644, 0x0020, 0x0629, 0x0631, 0x064A, 0x063A, 0x062A, 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0627, 0x062C, 0x0627, 0x064A,
543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x062A, 0x062D, 0x0627, 0x0644, 0x0644, 0x0020, 0x064B, 0x0627, 0x0645, 0x0626, 0x0627, 0x062F, 0x0020, 0x0627, 0x0647, 0x062A,
544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0645, 0x0626, 0x0627, 0x0644, 0x0645, 0x0020, 0x0639, 0x0645, 0x0020, 0x002C, 0x064A, 0x0646, 0x0637, 0x0648, 0x0644, 0x0627,
545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0644, 0x0627, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 0x0645, 0x0020, 0x064A,
546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0641, 0x0020, 0x0629, 0x062F, 0x064A, 0x062F, 0x0639, 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0020, 0x062C, 0x0645,
547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0627, 0x0631, 0x0628, 0x0020, 0x062A, 0x0630, 0x0641, 0x064F, 0x0646, 0x0648, 0x0020, 0x062A, 0x0639, 0x0636, 0x064F, 0x0648,
548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        0x0000,
549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    };
550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse);
552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    char *bytes = extractBytes(chars, cLength, "IBM420", &bLength);
554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    char *bytes_r = extractBytes(chars_reverse, crLength, "IBM420", &brLength);
555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UCharsetDetector *csd = ucsdet_open(&status);
557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UCharsetMatch *match;
558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const char *name;
559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucsdet_setText(csd, bytes, bLength, &status);
561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    match = ucsdet_detect(csd, &status);
562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (match == NULL) {
564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        log_err("Encoding detection failure for IBM420_rtl: got no matches.\n");
565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        goto bail;
566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    name  = ucsdet_getName(match, &status);
569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (strcmp(name, "IBM420_rtl") != 0) {
5706d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        log_data_err("Encoding detection failure for IBM420_rtl: got %s. (Are you missing data?)\n", name);
571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucsdet_setText(csd, bytes_r, brLength, &status);
574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    match = ucsdet_detect(csd, &status);
575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (match == NULL) {
577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        log_err("Encoding detection failure for IBM420_ltr: got no matches.\n");
578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        goto bail;
579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    name  = ucsdet_getName(match, &status);
582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (strcmp(name, "IBM420_ltr") != 0) {
5836d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        log_data_err("Encoding detection failure for IBM420_ltr: got %s. (Are you missing data?)\n", name);
584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubail:
587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    freeBytes(bytes);
588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    freeBytes(bytes_r);
589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucsdet_close(csd);
590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
591