ucsdetst.c revision b0ac937921a2c196d8b9da665135bf6ba01a1ccf
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru **************************************************************************** 3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Copyright (c) 2005-2009, International Business Machines Corporation and * 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. * 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru **************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucsdet.h" 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) malloc((count) * sizeof(type)) 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DELETE_ARRAY(array) free(array) 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestConstruction(void); 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF8(void); 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF16(void); 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestC1Bytes(void); 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestInputFilter(void); 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestChaining(void); 30c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void TestBufferOverflow(void); 31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM424(void); 32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM420(void); 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid addUCsdetTest(TestNode** root); 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid addUCsdetTest(TestNode** root) 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestConstruction, "ucsdetst/TestConstruction"); 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestUTF8, "ucsdetst/TestUTF8"); 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestUTF16, "ucsdetst/TestUTF16"); 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestC1Bytes, "ucsdetst/TestC1Bytes"); 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestInputFilter, "ucsdetst/TestInputFilter"); 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestChaining, "ucsdetst/TestErrorChaining"); 44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addTest(root, &TestBufferOverflow, "ucsdetst/TestBufferOverflow"); 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru addTest(root, &TestIBM424, "ucsdetst/TestIBM424"); 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru addTest(root, &TestIBM420, "ucsdetst/TestIBM420"); 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t preflight(const UChar *src, int32_t length, UConverter *cnv) 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buffer[1024]; 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest, *destLimit = buffer + sizeof(buffer); 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcLimit = src + length; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = 0; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = buffer; 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status); 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result += (int32_t) (dest - buffer); 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (status == U_BUFFER_OVERFLOW_ERROR); 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic char *extractBytes(const UChar *src, int32_t length, const char *codepage, int32_t *byteLength) 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *cnv = ucnv_open(codepage, &status); 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteCount = preflight(src, length, cnv); 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcLimit = src + length; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bytes = NEW_ARRAY(char, byteCount + 1); 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest = bytes, *destLimit = bytes + byteCount + 1; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(cnv); 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *byteLength = byteCount; 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return bytes; 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void freeBytes(char *bytes) 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru DELETE_ARRAY(bytes); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestConstruction(void) 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = uenum_count(e, &status); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, length; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i = 0; i < count; i += 1) { 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = uenum_next(e, &length, &status); 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(name == NULL || length <= 0) { 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucsdet_getAllDetectableCharsets() returned a null or empty name!\n"); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* one past the list of all names must return NULL */ 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = uenum_next(e, &length, &status); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(name != NULL || length != 0 || U_FAILURE(status)) { 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucsdet_getAllDetectableCharsets(past the list) returned a non-null name!\n"); 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uenum_close(e); 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF8(void) 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ss[] = "This is a string with some non-ascii characters that will " 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "be converted to UTF-8, then shoved through the detection process. " 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0391\\u0392\\u0393\\u0394\\u0395" 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Sure would be nice if our source could contain Unicode directly!"; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteLength = 0, sLength = 0, dLength = 0; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar s[sizeof(ss)]; 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bytes; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar detected[sizeof(ss)]; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sLength = u_unescape(ss, s, sizeof(ss)); 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bytes = extractBytes(s, sLength, "UTF-8", &byteLength); 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bytes, byteLength, &status); 132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("status is %s\n", u_errorName(status)); 134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto bail; 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Detection failure for UTF-8: got no matches.\n"); 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dLength = ucsdet_getUChars(match, detected, sLength, &status); 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_strCompare(detected, dLength, s, sLength, FALSE) != 0) { 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Round-trip test failed!\n"); 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setDeclaredEncoding(csd, "UTF-8", 5, &status); /* for coverage */ 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bytes); 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF16(void) 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Notice the BOM on the start of this string */ 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar chars[] = { 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xFEFF, 0x0623, 0x0648, 0x0631, 0x0648, 0x0628, 0x0627, 0x002C, 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0020, 0x0628, 0x0631, 0x0645, 0x062c, 0x064a, 0x0627, 0x062a, 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0020, 0x0627, 0x0644, 0x062d, 0x0627, 0x0633, 0x0648, 0x0628, 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0020, 0x002b, 0x0020, 0x0627, 0x0646, 0x062a, 0x0631, 0x0646, 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x064a, 0x062a, 0x0000}; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t beLength = 0, leLength = 0, cLength = ARRAY_SIZE(chars); 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *beBytes = extractBytes(chars, cLength, "UTF-16BE", &beLength); 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *leBytes = extractBytes(chars, cLength, "UTF-16LE", &leLength); 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t conf; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, beBytes, beLength, &status); 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Encoding detection failure for UTF-16BE: got no matches.\n"); 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto try_le; 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru conf = ucsdet_getConfidence(match, &status); 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "UTF-16BE") != 0) { 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Encoding detection failure for UTF-16BE: got %s\n", name); 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (conf != 100) { 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Did not get 100%% confidence for UTF-16BE: got %d\n", conf); 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutry_le: 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, leBytes, leLength, &status); 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Encoding detection failure for UTF-16LE: got no matches.\n"); 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru conf = ucsdet_getConfidence(match, &status); 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "UTF-16LE") != 0) { 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Enconding detection failure for UTF-16LE: got %s\n", name); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (conf != 100) { 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Did not get 100%% confidence for UTF-16LE: got %d\n", conf); 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(leBytes); 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(beBytes); 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestC1Bytes(void) 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_LEGACY_CONVERSION 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ssISO[] = "This is a small sample of some English text. Just enough to be sure that it detects correctly."; 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ssWindows[] = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes."; 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sISOLength = 0, sWindowsLength = 0; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar sISO[sizeof(ssISO)]; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar sWindows[sizeof(ssWindows)]; 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lISO = 0, lWindows = 0; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bISO; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bWindows; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sISOLength = u_unescape(ssISO, sISO, sizeof(ssISO)); 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sWindowsLength = u_unescape(ssWindows, sWindows, sizeof(ssWindows)); 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bISO = extractBytes(sISO, sISOLength, "ISO-8859-1", &lISO); 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bWindows = extractBytes(sWindows, sWindowsLength, "windows-1252", &lWindows); 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bWindows, lWindows, &status); 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English test with C1 bytes got no matches.\n"); 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "windows-1252") != 0) { 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English text with C1 bytes does not detect as windows-1252, but as %s\n", name); 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bISO, lISO, &status); 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English text without C1 bytes got no matches.\n"); 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "ISO-8859-1") != 0) { 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English text without C1 bytes does not detect as ISO-8859-1, but as %s\n", name); 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bWindows); 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bISO); 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestInputFilter(void) 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ss[] = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\\u00E8s petit peu de Fran\\u00E7ais. <to> <confuse> <the> <detector>"; 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sLength = 0; 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar s[sizeof(ss)]; 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteLength = 0; 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bytes; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *lang, *name; 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sLength = u_unescape(ss, s, sizeof(ss)); 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bytes = extractBytes(s, sLength, "ISO-8859-1", &byteLength); 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_enableInputFilter(csd, TRUE); 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ucsdet_isInputFilterEnabled(csd)) { 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucsdet_enableInputFilter(csd, TRUE) did not enable input filter!\n"); 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bytes, byteLength, &status); 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning on the input filter resulted in no matches.\n"); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto turn_off; 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (name == NULL || strcmp(name, "ISO-8859-1") != 0) { 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning on the input filter resulted in %s rather than ISO-8859-1\n", name); 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lang = ucsdet_getLanguage(match, &status); 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lang == NULL || strcmp(lang, "fr") != 0) { 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Input filter did not strip markup!\n"); 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruturn_off: 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_enableInputFilter(csd, FALSE); 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bytes, byteLength, &status); 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning off the input filter resulted in no matches.\n"); 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (name == NULL || strcmp(name, "ISO-8859-1") != 0) { 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning off the input filter resulted in %s rather than ISO-8859-1\n", name); 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lang = ucsdet_getLanguage(match, &status); 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lang == NULL || strcmp(lang, "en") != 0) { 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Unfiltered input did not detect as English!\n"); 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bytes); 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestChaining(void) { 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_USELESS_COLLATOR_ERROR; 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_open(&status); 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(NULL, NULL, 0, &status); 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getName(NULL, &status); 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getConfidence(NULL, &status); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getLanguage(NULL, &status); 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_detect(NULL, &status); 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setDeclaredEncoding(NULL, NULL, 0, &status); 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_detectAll(NULL, NULL, &status); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getUChars(NULL, NULL, 0, &status); 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getUChars(NULL, NULL, 0, &status); 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(NULL); 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* All of this code should have done nothing. */ 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status != U_USELESS_COLLATOR_ERROR) { 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Status got changed to %s\n", u_errorName(status)); 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void TestBufferOverflow(void) { 369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const char *testStrings[] = { 371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b", /* A partial ISO-2022 shift state at the end */ 372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24", /* A partial ISO-2022 shift state at the end */ 373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28", /* A partial ISO-2022 shift state at the end */ 374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end with a bad one at the start */ 375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end */ 376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\xa1", /* Could be a single byte shift-jis at the end */ 377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x74\x68\xa1", /* Could be a single byte shift-jis at the end */ 378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x74\x68\x65\xa1" /* Could be a single byte shift-jis at the end, but now we have English creeping in. */ 379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const char *testResults[] = { 381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "ISO-2022-JP", 386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NULL, 387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NULL, 388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "ISO-8859-1" 389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t idx = 0; 391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UCharsetMatch *match; 393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_setDeclaredEncoding(csd, "ISO-2022-JP", -1, &status); 395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("Couldn't open detector. %s\n", u_errorName(status)); 398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto bail; 399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (idx = 0; idx < ARRAY_SIZE(testStrings); idx++) { 402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_setText(csd, testStrings[idx], -1, &status); 403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (match == NULL) { 406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testResults[idx] != NULL) { 407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("Unexpectedly got no results at index %d.\n", idx); 408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_verbose("Got no result as expected at index %d.\n", idx); 411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testResults[idx] == NULL || strcmp(ucsdet_getName(match, &status), testResults[idx]) != 0) { 416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("Unexpectedly got %s instead of %s at index %d with confidence %d.\n", 417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_getName(match, &status), testResults[idx], idx, ucsdet_getConfidence(match, &status)); 418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto bail; 419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querubail: 423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_close(csd); 424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 426b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM424(void) 427b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars[] = { 431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x05E4, 0x05E8, 0x05E7, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05D4, 0x05E6, 0x05D1, 0x05D0, 0x05D9, 0x0020, 0x05D4, 0x05E8, 432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D0, 0x05E9, 0x05D9, 0x002C, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x05D0, 0x05DC, 0x05D5, 0x05E3, 0x0020, 0x05D0, 0x05D1, 0x05D9, 433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D7, 0x05D9, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x002C, 0x0020, 0x05D4, 0x05D5, 0x05E8, 434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x0020, 0x05E2, 0x05DC, 0x0020, 0x05E4, 0x05EA, 0x05D9, 0x05D7, 0x05EA, 0x0020, 0x05D7, 0x05E7, 0x05D9, 0x05E8, 0x05EA, 435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x05DE, 0x05E6, 0x0022, 0x05D7, 0x0020, 0x05D1, 0x05E2, 0x05E7, 0x05D1, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D3, 0x05D5, 436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 0x05D9, 0x0020, 0x05E6, 0x05D4, 0x0022, 0x05DC, 0x0020, 0x05DE, 437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x05D1, 0x002B, 0x0020, 0x05E8, 0x05E6, 0x05D5, 0x05E2, 0x05EA, 0x0020, 0x05E2, 0x05D6, 0x05D4, 0x002E, 0x0020, 0x05DC, 439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x0020, 0x05D4, 0x05E4, 0x05E6, 0x0022, 0x05E8, 0x002C, 0x0020, 0x05DE, 0x05D4, 0x05E2, 0x05D3, 440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D5, 0x05DC, 0x05D4, 0x0020, 0x05EA, 0x05DE, 0x05D5, 0x05E0, 0x05D4, 0x0020, 441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E9, 0x05DC, 0x0020, 0x0022, 0x05D4, 0x05EA, 0x05E0, 0x05D4, 0x05D2, 0x05D5, 0x05EA, 0x0020, 0x05E4, 0x05E1, 0x05D5, 0x05DC, 442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x0020, 0x05DC, 0x05DB, 0x05D0, 0x05D5, 0x05E8, 0x05D4, 0x0020, 0x05E9, 0x05DC, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D9, 0x05DD, 0x0020, 0x05D1, 0x05DE, 0x05D4, 0x05DC, 0x05DA, 0x0020, 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 0x0022, 0x002E, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05E7, 0x05D9, 0x05D1, 0x05DC, 0x0020, 0x05D0, 0x05EA, 0x0020, 0x05D4, 0x05D7, 0x05DC, 446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D8, 0x05EA, 0x05D5, 0x0020, 0x05DC, 0x05D0, 0x05D7, 0x05E8, 0x0020, 0x05E9, 0x05E2, 0x05D9, 0x05D9, 0x05DF, 0x0020, 0x05D1, 447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05EA, 0x05DE, 0x05DC, 0x05D9, 0x05DC, 0x0020, 0x05D4, 0x05E2, 0x05D3, 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0000 448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars_reverse[] = { 451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05EA, 0x05D5, 0x05D9, 0x05D5, 0x05D3, 0x05E2, 0x05D4, 0x0020, 0x05DC, 0x05D9, 0x05DC, 0x05DE, 0x05EA, 452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D1, 0x0020, 0x05DF, 0x05D9, 0x05D9, 0x05E2, 0x05E9, 0x0020, 0x05E8, 0x05D7, 0x05D0, 0x05DC, 0x0020, 0x05D5, 0x05EA, 0x05D8, 453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D7, 0x05D4, 0x0020, 0x05EA, 0x05D0, 0x0020, 0x05DC, 0x05D1, 0x05D9, 0x05E7, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x002E, 0x0022, 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 0x0020, 0x05DA, 0x05DC, 0x05D4, 0x05DE, 0x05D1, 0x0020, 0x05DD, 0x05D9, 456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05DC, 0x05E9, 0x0020, 0x05D4, 0x05E8, 0x05D5, 0x05D0, 0x05DB, 0x05DC, 0x0020, 0x05D4, 457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D5, 0x05E1, 0x05E4, 0x0020, 0x05EA, 0x05D5, 0x05D2, 0x05D4, 0x05E0, 0x05EA, 0x05D4, 0x0022, 0x0020, 0x05DC, 0x05E9, 458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x05D4, 0x05E0, 0x05D5, 0x05DE, 0x05EA, 0x0020, 0x05D4, 0x05DC, 0x05D5, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D9, 0x05D5, 459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D3, 0x05E2, 0x05D4, 0x05DE, 0x0020, 0x002C, 0x05E8, 0x0022, 0x05E6, 0x05E4, 0x05D4, 0x0020, 0x05D9, 0x05E8, 0x05D1, 0x05D3, 460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x0020, 0x002E, 0x05D4, 0x05D6, 0x05E2, 0x0020, 0x05EA, 0x05E2, 0x05D5, 0x05E6, 0x05E8, 0x0020, 0x002B, 0x05D1, 0x0020, 461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DE, 0x0020, 0x05DC, 0x0022, 0x05D4, 0x05E6, 0x0020, 0x05D9, 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05EA, 0x05D5, 0x05D9, 463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D5, 0x05D3, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D1, 0x05E7, 0x05E2, 0x05D1, 0x0020, 0x05D7, 0x0022, 0x05E6, 0x05DE, 0x0020, 464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05EA, 0x05E8, 0x05D9, 0x05E7, 0x05D7, 0x0020, 0x05EA, 0x05D7, 0x05D9, 0x05EA, 0x05E4, 0x0020, 0x05DC, 0x05E2, 0x0020, 0x05D4, 465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E8, 0x05D5, 0x05D4, 0x0020, 0x002C, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x05D9, 0x05D7, 466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D9, 0x05D1, 0x05D0, 0x0020, 0x05E3, 0x05D5, 0x05DC, 0x05D0, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x002C, 0x05D9, 0x05E9, 0x05D0, 467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E8, 0x05D4, 0x0020, 0x05D9, 0x05D0, 0x05D1, 0x05E6, 0x05D4, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05E7, 0x05E8, 0x05E4, 0x05D4, 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0000 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse); 472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes = extractBytes(chars, cLength, "IBM424", &bLength); 474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes_r = extractBytes(chars_reverse, crLength, "IBM424", &brLength); 475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UCharsetMatch *match; 478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *name; 479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes, bLength, &status); 481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM424_rtl: got no matches.\n"); 485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM424_rtl") != 0) { 490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM424_rtl: got %s\n", name); 491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes_r, brLength, &status); 494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM424_ltr: got no matches.\n"); 498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM424_ltr") != 0) { 503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM424_ltr: got %s\n", name); 504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubail: 507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes); 508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes_r); 509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_close(csd); 510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM420(void) 513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars[] = { 517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0648, 0x064F, 0x0636, 0x0639, 0x062A, 0x0020, 0x0648, 0x0646, 0x064F, 0x0641, 0x0630, 0x062A, 0x0020, 0x0628, 0x0631, 0x0627, 518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0645, 0x062C, 0x0020, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 0x0639, 0x062F, 0x064A, 0x062F, 0x0629, 0x0020, 0x0641, 519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x064A, 0x0020, 0x0645, 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0627, 0x0644, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0627, 0x0644, 0x0648, 0x0637, 0x0646, 0x064A, 0x002C, 0x0020, 0x0645, 0x0639, 0x0020, 0x0645, 0x0644, 0x0627, 0x0626, 0x0645, 521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062A, 0x0647, 0x0627, 0x0020, 0x062F, 0x0627, 0x0626, 0x0645, 0x0627, 0x064B, 0x0020, 0x0644, 0x0644, 0x0627, 0x062D, 0x062A, 522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x064A, 0x0627, 0x062C, 0x0627, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 0x062A, 0x063A, 0x064A, 0x0631, 0x0629, 0x0020, 0x0644, 523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0644, 0x0645, 0x062C, 0x062A, 0x0645, 0x0639, 0x0020, 0x0648, 0x0644, 0x0644, 0x062F, 0x0648, 0x0644, 0x0629, 0x002E, 0x0020, 524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062A, 0x0648, 0x0633, 0x0639, 0x062A, 0x0020, 0x0648, 0x062A, 0x0637, 0x0648, 0x0631, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0628, 0x0647, 0x062F, 0x0641, 0x0020, 0x0636, 0x0645, 0x0627, 0x0646, 0x0020, 0x0634, 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0628, 0x0643, 0x0629, 0x0020, 0x0623, 0x0645, 0x0627, 0x0646, 0x0020, 0x0644, 0x0633, 0x0643, 0x0627, 0x0646, 0x0020, 0x062F, 527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0648, 0x0644, 0x0629, 0x0020, 0x0627, 0x0633, 0x0631, 0x0627, 0x0626, 0x064A, 0x0644, 0x0020, 0x0628, 0x0648, 0x062C, 0x0647, 528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x0627, 0x0644, 0x0645, 0x062E, 0x0627, 0x0637, 0x0631, 0x0020, 0x0627, 0x0644, 0x0627, 0x0642, 0x062A, 0x0635, 0x0627, 529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062F, 0x064A, 0x0629, 0x0020, 0x0648, 0x0627, 0x0644, 0x0627, 0x062C, 0x062A, 0x0645, 0x0627, 0x0639, 0x064A, 0x0629, 0x002E, 530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0000 531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars_reverse[] = { 533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x002E, 0x0629, 0x064A, 0x0639, 0x0627, 0x0645, 0x062A, 0x062C, 0x0627, 0x0644, 0x0627, 0x0648, 0x0020, 0x0629, 0x064A, 0x062F, 534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0627, 0x0635, 0x062A, 0x0642, 0x0627, 0x0644, 0x0627, 0x0020, 0x0631, 0x0637, 0x0627, 0x062E, 0x0645, 0x0644, 0x0627, 0x0020, 535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0647, 0x062C, 0x0648, 0x0628, 0x0020, 0x0644, 0x064A, 0x0626, 0x0627, 0x0631, 0x0633, 0x0627, 0x0020, 0x0629, 0x0644, 0x0648, 536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062F, 0x0020, 0x0646, 0x0627, 0x0643, 0x0633, 0x0644, 0x0020, 0x0646, 0x0627, 0x0645, 0x0623, 0x0020, 0x0629, 0x0643, 0x0628, 537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0634, 0x0020, 0x0646, 0x0627, 0x0645, 0x0636, 0x0020, 0x0641, 0x062F, 0x0647, 0x0628, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0631, 0x0648, 0x0637, 0x062A, 0x0648, 0x0020, 0x062A, 0x0639, 0x0633, 0x0648, 0x062A, 539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x002E, 0x0629, 0x0644, 0x0648, 0x062F, 0x0644, 0x0644, 0x0648, 0x0020, 0x0639, 0x0645, 0x062A, 0x062C, 0x0645, 0x0644, 540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0644, 0x0020, 0x0629, 0x0631, 0x064A, 0x063A, 0x062A, 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0627, 0x062C, 0x0627, 0x064A, 541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062A, 0x062D, 0x0627, 0x0644, 0x0644, 0x0020, 0x064B, 0x0627, 0x0645, 0x0626, 0x0627, 0x062F, 0x0020, 0x0627, 0x0647, 0x062A, 542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0645, 0x0626, 0x0627, 0x0644, 0x0645, 0x0020, 0x0639, 0x0645, 0x0020, 0x002C, 0x064A, 0x0646, 0x0637, 0x0648, 0x0644, 0x0627, 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0644, 0x0627, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 0x0645, 0x0020, 0x064A, 544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0641, 0x0020, 0x0629, 0x062F, 0x064A, 0x062F, 0x0639, 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0020, 0x062C, 0x0645, 545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0627, 0x0631, 0x0628, 0x0020, 0x062A, 0x0630, 0x0641, 0x064F, 0x0646, 0x0648, 0x0020, 0x062A, 0x0639, 0x0636, 0x064F, 0x0648, 546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0000, 547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse); 550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes = extractBytes(chars, cLength, "IBM420", &bLength); 552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes_r = extractBytes(chars_reverse, crLength, "IBM420", &brLength); 553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UCharsetMatch *match; 556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *name; 557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes, bLength, &status); 559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM420_rtl: got no matches.\n"); 563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM420_rtl") != 0) { 568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM420_rtl: got %s\n", name); 569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes_r, brLength, &status); 572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM420_ltr: got no matches.\n"); 576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM420_ltr") != 0) { 581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM420_ltr: got %s\n", name); 582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubail: 585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes); 586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes_r); 587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_close(csd); 588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 589