1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 2005-2009, International Business Machines Corporation and *
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved.                                             *
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucsdet.h"
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h"
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h"
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cintltst.h"
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdlib.h>
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h>
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define NEW_ARRAY(type,count) (type *) malloc((count) * sizeof(type))
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define DELETE_ARRAY(array) free(array)
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestConstruction(void);
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestUTF8(void);
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestUTF16(void);
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestC1Bytes(void);
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestInputFilter(void);
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestChaining(void);
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestBufferOverflow(void);
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestIBM424(void);
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestIBM420(void);
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void addUCsdetTest(TestNode** root);
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void addUCsdetTest(TestNode** root)
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestConstruction, "ucsdetst/TestConstruction");
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestUTF8, "ucsdetst/TestUTF8");
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestUTF16, "ucsdetst/TestUTF16");
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestC1Bytes, "ucsdetst/TestC1Bytes");
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestInputFilter, "ucsdetst/TestInputFilter");
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestChaining, "ucsdetst/TestErrorChaining");
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestBufferOverflow, "ucsdetst/TestBufferOverflow");
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_LEGACY_CONVERSION
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestIBM424, "ucsdetst/TestIBM424");
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    addTest(root, &TestIBM420, "ucsdetst/TestIBM420");
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t preflight(const UChar *src, int32_t length, UConverter *cnv)
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status;
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char buffer[1024];
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *dest, *destLimit = buffer + sizeof(buffer);
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *srcLimit = src + length;
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t result = 0;
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dest = buffer;
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_ZERO_ERROR;
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result += (int32_t) (dest - buffer);
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } while (status == U_BUFFER_OVERFLOW_ERROR);
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static char *extractBytes(const UChar *src, int32_t length, const char *codepage, int32_t *byteLength)
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UConverter *cnv = ucnv_open(codepage, &status);
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t byteCount = preflight(src, length, cnv);
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *srcLimit = src + length;
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes = NEW_ARRAY(char, byteCount + 1);
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *dest = bytes, *destLimit = bytes + byteCount + 1;
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_close(cnv);
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *byteLength = byteCount;
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return bytes;
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void freeBytes(char *bytes)
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    DELETE_ARRAY(bytes);
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestConstruction(void)
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status);
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *name;
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t count = uenum_count(e, &status);
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, length;
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i = 0; i < count; i += 1) {
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        name = uenum_next(e, &length, &status);
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(name == NULL || length <= 0) {
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            log_err("ucsdet_getAllDetectableCharsets() returned a null or empty name!\n");
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* one past the list of all names must return NULL */
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name = uenum_next(e, &length, &status);
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(name != NULL || length != 0 || U_FAILURE(status)) {
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("ucsdet_getAllDetectableCharsets(past the list) returned a non-null name!\n");
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uenum_close(e);
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestUTF8(void)
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char ss[] = "This is a string with some non-ascii characters that will "
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               "be converted to UTF-8, then shoved through the detection process.  "
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               "\\u0391\\u0392\\u0393\\u0394\\u0395"
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               "Sure would be nice if our source could contain Unicode directly!";
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t byteLength = 0, sLength = 0, dLength = 0;
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar s[sizeof(ss)];
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes;
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar detected[sizeof(ss)];
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sLength = u_unescape(ss, s, sizeof(ss));
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bytes = extractBytes(s, sLength, "UTF-8", &byteLength);
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes, byteLength, &status);
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("status is %s\n", u_errorName(status));
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Detection failure for UTF-8: got no matches.\n");
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    dLength = ucsdet_getUChars(match, detected, sLength, &status);
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (u_strCompare(detected, dLength, s, sLength, FALSE) != 0) {
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Round-trip test failed!\n");
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setDeclaredEncoding(csd, "UTF-8", 5, &status); /* for coverage */
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bytes);
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestUTF16(void)
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Notice the BOM on the start of this string */
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar chars[] = {
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xFEFF, 0x0623, 0x0648, 0x0631, 0x0648, 0x0628, 0x0627, 0x002C,
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0020, 0x0628, 0x0631, 0x0645, 0x062c, 0x064a, 0x0627, 0x062a,
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0020, 0x0627, 0x0644, 0x062d, 0x0627, 0x0633, 0x0648, 0x0628,
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0020, 0x002b, 0x0020, 0x0627, 0x0646, 0x062a, 0x0631, 0x0646,
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x064a, 0x062a, 0x0000};
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t beLength = 0, leLength = 0, cLength = ARRAY_SIZE(chars);
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *beBytes = extractBytes(chars, cLength, "UTF-16BE", &beLength);
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *leBytes = extractBytes(chars, cLength, "UTF-16LE", &leLength);
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *name;
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t conf;
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, beBytes, beLength, &status);
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for UTF-16BE: got no matches.\n");
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto try_le;
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    conf  = ucsdet_getConfidence(match, &status);
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "UTF-16BE") != 0) {
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for UTF-16BE: got %s\n", name);
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (conf != 100) {
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Did not get 100%% confidence for UTF-16BE: got %d\n", conf);
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)try_le:
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, leBytes, leLength, &status);
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for UTF-16LE: got no matches.\n");
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    conf = ucsdet_getConfidence(match, &status);
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "UTF-16LE") != 0) {
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Enconding detection failure for UTF-16LE: got %s\n", name);
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (conf != 100) {
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Did not get 100%% confidence for UTF-16LE: got %d\n", conf);
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(leBytes);
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(beBytes);
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestC1Bytes(void)
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_LEGACY_CONVERSION
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char ssISO[] = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char ssWindows[] = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.";
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t sISOLength = 0, sWindowsLength = 0;
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar sISO[sizeof(ssISO)];
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar sWindows[sizeof(ssWindows)];
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t lISO = 0, lWindows = 0;
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bISO;
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bWindows;
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *name;
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sISOLength = u_unescape(ssISO, sISO, sizeof(ssISO));
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sWindowsLength = u_unescape(ssWindows, sWindows, sizeof(ssWindows));
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bISO = extractBytes(sISO, sISOLength, "ISO-8859-1", &lISO);
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bWindows = extractBytes(sWindows, sWindowsLength, "windows-1252", &lWindows);
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bWindows, lWindows, &status);
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("English test with C1 bytes got no matches.\n");
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "windows-1252") != 0) {
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_data_err("English text with C1 bytes does not detect as windows-1252, but as %s. (Are you missing data?)\n", name);
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bISO, lISO, &status);
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("English text without C1 bytes got no matches.\n");
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "ISO-8859-1") != 0) {
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("English text without C1 bytes does not detect as ISO-8859-1, but as %s\n", name);
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bWindows);
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bISO);
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestInputFilter(void)
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char ss[] = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\\u00E8s petit peu de Fran\\u00E7ais. <to> <confuse> <the> <detector>";
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t sLength = 0;
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar s[sizeof(ss)];
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t byteLength = 0;
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes;
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *lang, *name;
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sLength = u_unescape(ss, s, sizeof(ss));
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bytes = extractBytes(s, sLength, "ISO-8859-1", &byteLength);
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_enableInputFilter(csd, TRUE);
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!ucsdet_isInputFilterEnabled(csd)) {
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("ucsdet_enableInputFilter(csd, TRUE) did not enable input filter!\n");
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes, byteLength, &status);
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Turning on the input filter resulted in no matches.\n");
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto turn_off;
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name = ucsdet_getName(match, &status);
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (name == NULL || strcmp(name, "ISO-8859-1") != 0) {
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Turning on the input filter resulted in %s rather than ISO-8859-1\n", name);
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lang = ucsdet_getLanguage(match, &status);
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (lang == NULL || strcmp(lang, "fr") != 0) {
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            log_err("Input filter did not strip markup!\n");
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)turn_off:
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_enableInputFilter(csd, FALSE);
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes, byteLength, &status);
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Turning off the input filter resulted in no matches.\n");
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name = ucsdet_getName(match, &status);
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (name == NULL || strcmp(name, "ISO-8859-1") != 0) {
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Turning off the input filter resulted in %s rather than ISO-8859-1\n", name);
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lang = ucsdet_getLanguage(match, &status);
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (lang == NULL || strcmp(lang, "en") != 0) {
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            log_err("Unfiltered input did not detect as English!\n");
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bytes);
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestChaining(void) {
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_USELESS_COLLATOR_ERROR;
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_open(&status);
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(NULL, NULL, 0, &status);
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_getName(NULL, &status);
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_getConfidence(NULL, &status);
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_getLanguage(NULL, &status);
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_detect(NULL, &status);
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setDeclaredEncoding(NULL, NULL, 0, &status);
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_detectAll(NULL, NULL, &status);
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_getUChars(NULL, NULL, 0, &status);
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_getUChars(NULL, NULL, 0, &status);
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(NULL);
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* All of this code should have done nothing. */
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (status != U_USELESS_COLLATOR_ERROR) {
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Status got changed to %s\n", u_errorName(status));
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestBufferOverflow(void) {
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *testStrings[] = {
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b", /* A partial ISO-2022 shift state at the end */
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24", /* A partial ISO-2022 shift state at the end */
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28", /* A partial ISO-2022 shift state at the end */
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end with a bad one at the start */
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end */
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xa1", /* Could be a single byte shift-jis at the end */
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x74\x68\xa1", /* Could be a single byte shift-jis at the end */
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x74\x68\x65\xa1" /* Could be a single byte shift-jis at the end, but now we have English creeping in. */
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *testResults[] = {
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "windows-1252",
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "windows-1252",
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "windows-1252",
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "windows-1252",
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ISO-2022-JP",
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        NULL,
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        NULL,
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ISO-8859-1"
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t idx = 0;
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setDeclaredEncoding(csd, "ISO-2022-JP", -1, &status);
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Couldn't open detector. %s\n", u_errorName(status));
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (idx = 0; idx < ARRAY_SIZE(testStrings); idx++) {
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucsdet_setText(csd, testStrings[idx], -1, &status);
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        match = ucsdet_detect(csd, &status);
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (match == NULL) {
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testResults[idx] != NULL) {
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                log_err("Unexpectedly got no results at index %d.\n", idx);
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            else {
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                log_verbose("Got no result as expected at index %d.\n", idx);
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (testResults[idx] == NULL || strcmp(ucsdet_getName(match, &status), testResults[idx]) != 0) {
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            log_err("Unexpectedly got %s instead of %s at index %d with confidence %d.\n",
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                ucsdet_getName(match, &status), testResults[idx], idx, ucsdet_getConfidence(match, &status));
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto bail;
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestIBM424(void)
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar chars[] = {
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D4, 0x05E4, 0x05E8, 0x05E7, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05D4, 0x05E6, 0x05D1, 0x05D0, 0x05D9, 0x0020, 0x05D4, 0x05E8,
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D0, 0x05E9, 0x05D9, 0x002C, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x05D0, 0x05DC, 0x05D5, 0x05E3, 0x0020, 0x05D0, 0x05D1, 0x05D9,
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D7, 0x05D9, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x002C, 0x0020, 0x05D4, 0x05D5, 0x05E8,
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D4, 0x0020, 0x05E2, 0x05DC, 0x0020, 0x05E4, 0x05EA, 0x05D9, 0x05D7, 0x05EA, 0x0020, 0x05D7, 0x05E7, 0x05D9, 0x05E8, 0x05EA,
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0020, 0x05DE, 0x05E6, 0x0022, 0x05D7, 0x0020, 0x05D1, 0x05E2, 0x05E7, 0x05D1, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D3, 0x05D5,
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 0x05D9, 0x0020, 0x05E6, 0x05D4, 0x0022, 0x05DC, 0x0020, 0x05DE,
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4,
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0020, 0x05D1, 0x002B, 0x0020, 0x05E8, 0x05E6, 0x05D5, 0x05E2, 0x05EA, 0x0020, 0x05E2, 0x05D6, 0x05D4, 0x002E, 0x0020, 0x05DC,
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x0020, 0x05D4, 0x05E4, 0x05E6, 0x0022, 0x05E8, 0x002C, 0x0020, 0x05DE, 0x05D4, 0x05E2, 0x05D3,
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D5, 0x05DC, 0x05D4, 0x0020, 0x05EA, 0x05DE, 0x05D5, 0x05E0, 0x05D4, 0x0020,
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05E9, 0x05DC, 0x0020, 0x0022, 0x05D4, 0x05EA, 0x05E0, 0x05D4, 0x05D2, 0x05D5, 0x05EA, 0x0020, 0x05E4, 0x05E1, 0x05D5, 0x05DC,
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D4, 0x0020, 0x05DC, 0x05DB, 0x05D0, 0x05D5, 0x05E8, 0x05D4, 0x0020, 0x05E9, 0x05DC, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC,
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D9, 0x05DD, 0x0020, 0x05D1, 0x05DE, 0x05D4, 0x05DC, 0x05DA, 0x0020, 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5,
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 0x0022, 0x002E, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC,
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05E7, 0x05D9, 0x05D1, 0x05DC, 0x0020, 0x05D0, 0x05EA, 0x0020, 0x05D4, 0x05D7, 0x05DC,
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D8, 0x05EA, 0x05D5, 0x0020, 0x05DC, 0x05D0, 0x05D7, 0x05E8, 0x0020, 0x05E9, 0x05E2, 0x05D9, 0x05D9, 0x05DF, 0x0020, 0x05D1,
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05EA, 0x05DE, 0x05DC, 0x05D9, 0x05DC, 0x0020, 0x05D4, 0x05E2, 0x05D3, 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0000
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar chars_reverse[] = {
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05EA, 0x05D5, 0x05D9, 0x05D5, 0x05D3, 0x05E2, 0x05D4, 0x0020, 0x05DC, 0x05D9, 0x05DC, 0x05DE, 0x05EA,
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D1, 0x0020, 0x05DF, 0x05D9, 0x05D9, 0x05E2, 0x05E9, 0x0020, 0x05E8, 0x05D7, 0x05D0, 0x05DC, 0x0020, 0x05D5, 0x05EA, 0x05D8,
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DC, 0x05D7, 0x05D4, 0x0020, 0x05EA, 0x05D0, 0x0020, 0x05DC, 0x05D1, 0x05D9, 0x05E7, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05D1,
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x002E, 0x0022, 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4,
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 0x0020, 0x05DA, 0x05DC, 0x05D4, 0x05DE, 0x05D1, 0x0020, 0x05DD, 0x05D9,
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05DC, 0x05E9, 0x0020, 0x05D4, 0x05E8, 0x05D5, 0x05D0, 0x05DB, 0x05DC, 0x0020, 0x05D4,
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DC, 0x05D5, 0x05E1, 0x05E4, 0x0020, 0x05EA, 0x05D5, 0x05D2, 0x05D4, 0x05E0, 0x05EA, 0x05D4, 0x0022, 0x0020, 0x05DC, 0x05E9,
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0020, 0x05D4, 0x05E0, 0x05D5, 0x05DE, 0x05EA, 0x0020, 0x05D4, 0x05DC, 0x05D5, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D9, 0x05D5,
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D3, 0x05E2, 0x05D4, 0x05DE, 0x0020, 0x002C, 0x05E8, 0x0022, 0x05E6, 0x05E4, 0x05D4, 0x0020, 0x05D9, 0x05E8, 0x05D1, 0x05D3,
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DC, 0x0020, 0x002E, 0x05D4, 0x05D6, 0x05E2, 0x0020, 0x05EA, 0x05E2, 0x05D5, 0x05E6, 0x05E8, 0x0020, 0x002B, 0x05D1, 0x0020,
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE,
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05DE, 0x0020, 0x05DC, 0x0022, 0x05D4, 0x05E6, 0x0020, 0x05D9, 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05EA, 0x05D5, 0x05D9,
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D5, 0x05D3, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D1, 0x05E7, 0x05E2, 0x05D1, 0x0020, 0x05D7, 0x0022, 0x05E6, 0x05DE, 0x0020,
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05EA, 0x05E8, 0x05D9, 0x05E7, 0x05D7, 0x0020, 0x05EA, 0x05D7, 0x05D9, 0x05EA, 0x05E4, 0x0020, 0x05DC, 0x05E2, 0x0020, 0x05D4,
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05E8, 0x05D5, 0x05D4, 0x0020, 0x002C, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x05D9, 0x05D7,
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05D9, 0x05D1, 0x05D0, 0x0020, 0x05E3, 0x05D5, 0x05DC, 0x05D0, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x002C, 0x05D9, 0x05E9, 0x05D0,
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x05E8, 0x05D4, 0x0020, 0x05D9, 0x05D0, 0x05D1, 0x05E6, 0x05D4, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05E7, 0x05E8, 0x05E4, 0x05D4,
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0000
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse);
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes = extractBytes(chars, cLength, "IBM424", &bLength);
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes_r = extractBytes(chars_reverse, crLength, "IBM424", &brLength);
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *name;
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes, bLength, &status);
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for IBM424_rtl: got no matches.\n");
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "IBM424_rtl") != 0) {
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_data_err("Encoding detection failure for IBM424_rtl: got %s. (Are you missing data?)\n", name);
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes_r, brLength, &status);
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for IBM424_ltr: got no matches.\n");
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "IBM424_ltr") != 0) {
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_data_err("Encoding detection failure for IBM424_ltr: got %s. (Are you missing data?)\n", name);
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bytes);
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bytes_r);
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void TestIBM420(void)
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar chars[] = {
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0648, 0x064F, 0x0636, 0x0639, 0x062A, 0x0020, 0x0648, 0x0646, 0x064F, 0x0641, 0x0630, 0x062A, 0x0020, 0x0628, 0x0631, 0x0627,
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0645, 0x062C, 0x0020, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 0x0639, 0x062F, 0x064A, 0x062F, 0x0629, 0x0020, 0x0641,
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x064A, 0x0020, 0x0645, 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0627, 0x0644, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020,
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0627, 0x0644, 0x0648, 0x0637, 0x0646, 0x064A, 0x002C, 0x0020, 0x0645, 0x0639, 0x0020, 0x0645, 0x0644, 0x0627, 0x0626, 0x0645,
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x062A, 0x0647, 0x0627, 0x0020, 0x062F, 0x0627, 0x0626, 0x0645, 0x0627, 0x064B, 0x0020, 0x0644, 0x0644, 0x0627, 0x062D, 0x062A,
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x064A, 0x0627, 0x062C, 0x0627, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 0x062A, 0x063A, 0x064A, 0x0631, 0x0629, 0x0020, 0x0644,
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0644, 0x0645, 0x062C, 0x062A, 0x0645, 0x0639, 0x0020, 0x0648, 0x0644, 0x0644, 0x062F, 0x0648, 0x0644, 0x0629, 0x002E, 0x0020,
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x062A, 0x0648, 0x0633, 0x0639, 0x062A, 0x0020, 0x0648, 0x062A, 0x0637, 0x0648, 0x0631, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645,
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0628, 0x0647, 0x062F, 0x0641, 0x0020, 0x0636, 0x0645, 0x0627, 0x0646, 0x0020, 0x0634,
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0628, 0x0643, 0x0629, 0x0020, 0x0623, 0x0645, 0x0627, 0x0646, 0x0020, 0x0644, 0x0633, 0x0643, 0x0627, 0x0646, 0x0020, 0x062F,
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0648, 0x0644, 0x0629, 0x0020, 0x0627, 0x0633, 0x0631, 0x0627, 0x0626, 0x064A, 0x0644, 0x0020, 0x0628, 0x0648, 0x062C, 0x0647,
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0020, 0x0627, 0x0644, 0x0645, 0x062E, 0x0627, 0x0637, 0x0631, 0x0020, 0x0627, 0x0644, 0x0627, 0x0642, 0x062A, 0x0635, 0x0627,
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x062F, 0x064A, 0x0629, 0x0020, 0x0648, 0x0627, 0x0644, 0x0627, 0x062C, 0x062A, 0x0645, 0x0627, 0x0639, 0x064A, 0x0629, 0x002E,
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0000
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar chars_reverse[] = {
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x002E, 0x0629, 0x064A, 0x0639, 0x0627, 0x0645, 0x062A, 0x062C, 0x0627, 0x0644, 0x0627, 0x0648, 0x0020, 0x0629, 0x064A, 0x062F,
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0627, 0x0635, 0x062A, 0x0642, 0x0627, 0x0644, 0x0627, 0x0020, 0x0631, 0x0637, 0x0627, 0x062E, 0x0645, 0x0644, 0x0627, 0x0020,
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0647, 0x062C, 0x0648, 0x0628, 0x0020, 0x0644, 0x064A, 0x0626, 0x0627, 0x0631, 0x0633, 0x0627, 0x0020, 0x0629, 0x0644, 0x0648,
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x062F, 0x0020, 0x0646, 0x0627, 0x0643, 0x0633, 0x0644, 0x0020, 0x0646, 0x0627, 0x0645, 0x0623, 0x0020, 0x0629, 0x0643, 0x0628,
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0634, 0x0020, 0x0646, 0x0627, 0x0645, 0x0636, 0x0020, 0x0641, 0x062F, 0x0647, 0x0628, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624,
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0631, 0x0648, 0x0637, 0x062A, 0x0648, 0x0020, 0x062A, 0x0639, 0x0633, 0x0648, 0x062A,
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0020, 0x002E, 0x0629, 0x0644, 0x0648, 0x062F, 0x0644, 0x0644, 0x0648, 0x0020, 0x0639, 0x0645, 0x062A, 0x062C, 0x0645, 0x0644,
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0644, 0x0020, 0x0629, 0x0631, 0x064A, 0x063A, 0x062A, 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0627, 0x062C, 0x0627, 0x064A,
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x062A, 0x062D, 0x0627, 0x0644, 0x0644, 0x0020, 0x064B, 0x0627, 0x0645, 0x0626, 0x0627, 0x062F, 0x0020, 0x0627, 0x0647, 0x062A,
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0645, 0x0626, 0x0627, 0x0644, 0x0645, 0x0020, 0x0639, 0x0645, 0x0020, 0x002C, 0x064A, 0x0646, 0x0637, 0x0648, 0x0644, 0x0627,
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0644, 0x0627, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 0x0645, 0x0020, 0x064A,
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0641, 0x0020, 0x0629, 0x062F, 0x064A, 0x062F, 0x0639, 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0020, 0x062C, 0x0645,
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0627, 0x0631, 0x0628, 0x0020, 0x062A, 0x0630, 0x0641, 0x064F, 0x0646, 0x0648, 0x0020, 0x062A, 0x0639, 0x0636, 0x064F, 0x0648,
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x0000,
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse);
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes = extractBytes(chars, cLength, "IBM420", &bLength);
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bytes_r = extractBytes(chars_reverse, crLength, "IBM420", &brLength);
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharsetDetector *csd = ucsdet_open(&status);
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UCharsetMatch *match;
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *name;
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes, bLength, &status);
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for IBM420_rtl: got no matches.\n");
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "IBM420_rtl") != 0) {
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_data_err("Encoding detection failure for IBM420_rtl: got %s. (Are you missing data?)\n", name);
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_setText(csd, bytes_r, brLength, &status);
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    match = ucsdet_detect(csd, &status);
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (match == NULL) {
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_err("Encoding detection failure for IBM420_ltr: got no matches.\n");
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto bail;
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name  = ucsdet_getName(match, &status);
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (strcmp(name, "IBM420_ltr") != 0) {
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        log_data_err("Encoding detection failure for IBM420_ltr: got %s. (Are you missing data?)\n", name);
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)bail:
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bytes);
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    freeBytes(bytes_r);
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucsdet_close(csd);
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
591