1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru **************************************************************************** 3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Copyright (c) 2005-2009, International Business Machines Corporation and * 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. * 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru **************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucsdet.h" 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEW_ARRAY(type,count) (type *) malloc((count) * sizeof(type)) 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DELETE_ARRAY(array) free(array) 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestConstruction(void); 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF8(void); 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF16(void); 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestC1Bytes(void); 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestInputFilter(void); 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestChaining(void); 30c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void TestBufferOverflow(void); 31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM424(void); 32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM420(void); 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid addUCsdetTest(TestNode** root); 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid addUCsdetTest(TestNode** root) 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestConstruction, "ucsdetst/TestConstruction"); 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestUTF8, "ucsdetst/TestUTF8"); 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestUTF16, "ucsdetst/TestUTF16"); 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestC1Bytes, "ucsdetst/TestC1Bytes"); 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestInputFilter, "ucsdetst/TestInputFilter"); 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, &TestChaining, "ucsdetst/TestErrorChaining"); 44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru addTest(root, &TestBufferOverflow, "ucsdetst/TestBufferOverflow"); 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_LEGACY_CONVERSION 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru addTest(root, &TestIBM424, "ucsdetst/TestIBM424"); 47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru addTest(root, &TestIBM420, "ucsdetst/TestIBM420"); 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t preflight(const UChar *src, int32_t length, UConverter *cnv) 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status; 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buffer[1024]; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest, *destLimit = buffer + sizeof(buffer); 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcLimit = src + length; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = 0; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = buffer; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status); 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result += (int32_t) (dest - buffer); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (status == U_BUFFER_OVERFLOW_ERROR); 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic char *extractBytes(const UChar *src, int32_t length, const char *codepage, int32_t *byteLength) 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *cnv = ucnv_open(codepage, &status); 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteCount = preflight(src, length, cnv); 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcLimit = src + length; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bytes = NEW_ARRAY(char, byteCount + 1); 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest = bytes, *destLimit = bytes + byteCount + 1; 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(cnv); 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *byteLength = byteCount; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return bytes; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void freeBytes(char *bytes) 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru DELETE_ARRAY(bytes); 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestConstruction(void) 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = uenum_count(e, &status); 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, length; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i = 0; i < count; i += 1) { 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = uenum_next(e, &length, &status); 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(name == NULL || length <= 0) { 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucsdet_getAllDetectableCharsets() returned a null or empty name!\n"); 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* one past the list of all names must return NULL */ 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = uenum_next(e, &length, &status); 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(name != NULL || length != 0 || U_FAILURE(status)) { 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucsdet_getAllDetectableCharsets(past the list) returned a non-null name!\n"); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uenum_close(e); 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF8(void) 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ss[] = "This is a string with some non-ascii characters that will " 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "be converted to UTF-8, then shoved through the detection process. " 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0391\\u0392\\u0393\\u0394\\u0395" 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Sure would be nice if our source could contain Unicode directly!"; 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteLength = 0, sLength = 0, dLength = 0; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar s[sizeof(ss)]; 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bytes; 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar detected[sizeof(ss)]; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sLength = u_unescape(ss, s, sizeof(ss)); 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bytes = extractBytes(s, sLength, "UTF-8", &byteLength); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bytes, byteLength, &status); 134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("status is %s\n", u_errorName(status)); 136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto bail; 137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Detection failure for UTF-8: got no matches.\n"); 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dLength = ucsdet_getUChars(match, detected, sLength, &status); 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_strCompare(detected, dLength, s, sLength, FALSE) != 0) { 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Round-trip test failed!\n"); 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setDeclaredEncoding(csd, "UTF-8", 5, &status); /* for coverage */ 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bytes); 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestUTF16(void) 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Notice the BOM on the start of this string */ 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar chars[] = { 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xFEFF, 0x0623, 0x0648, 0x0631, 0x0648, 0x0628, 0x0627, 0x002C, 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0020, 0x0628, 0x0631, 0x0645, 0x062c, 0x064a, 0x0627, 0x062a, 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0020, 0x0627, 0x0644, 0x062d, 0x0627, 0x0633, 0x0648, 0x0628, 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0020, 0x002b, 0x0020, 0x0627, 0x0646, 0x062a, 0x0631, 0x0646, 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x064a, 0x062a, 0x0000}; 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t beLength = 0, leLength = 0, cLength = ARRAY_SIZE(chars); 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *beBytes = extractBytes(chars, cLength, "UTF-16BE", &beLength); 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *leBytes = extractBytes(chars, cLength, "UTF-16LE", &leLength); 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t conf; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, beBytes, beLength, &status); 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Encoding detection failure for UTF-16BE: got no matches.\n"); 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto try_le; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru conf = ucsdet_getConfidence(match, &status); 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "UTF-16BE") != 0) { 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Encoding detection failure for UTF-16BE: got %s\n", name); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (conf != 100) { 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Did not get 100%% confidence for UTF-16BE: got %d\n", conf); 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutry_le: 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, leBytes, leLength, &status); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Encoding detection failure for UTF-16LE: got no matches.\n"); 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru conf = ucsdet_getConfidence(match, &status); 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "UTF-16LE") != 0) { 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Enconding detection failure for UTF-16LE: got %s\n", name); 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (conf != 100) { 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Did not get 100%% confidence for UTF-16LE: got %d\n", conf); 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(leBytes); 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(beBytes); 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestC1Bytes(void) 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_LEGACY_CONVERSION 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ssISO[] = "This is a small sample of some English text. Just enough to be sure that it detects correctly."; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ssWindows[] = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes."; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sISOLength = 0, sWindowsLength = 0; 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar sISO[sizeof(ssISO)]; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar sWindows[sizeof(ssWindows)]; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lISO = 0, lWindows = 0; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bISO; 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bWindows; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sISOLength = u_unescape(ssISO, sISO, sizeof(ssISO)); 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sWindowsLength = u_unescape(ssWindows, sWindows, sizeof(ssWindows)); 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bISO = extractBytes(sISO, sISOLength, "ISO-8859-1", &lISO); 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bWindows = extractBytes(sWindows, sWindowsLength, "windows-1252", &lWindows); 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bWindows, lWindows, &status); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English test with C1 bytes got no matches.\n"); 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "windows-1252") != 0) { 2556d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru log_data_err("English text with C1 bytes does not detect as windows-1252, but as %s. (Are you missing data?)\n", name); 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bISO, lISO, &status); 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English text without C1 bytes got no matches.\n"); 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp(name, "ISO-8859-1") != 0) { 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("English text without C1 bytes does not detect as ISO-8859-1, but as %s\n", name); 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bWindows); 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bISO); 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestInputFilter(void) 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char ss[] = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\\u00E8s petit peu de Fran\\u00E7ais. <to> <confuse> <the> <detector>"; 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sLength = 0; 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar s[sizeof(ss)]; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t byteLength = 0; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bytes; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCharsetMatch *match; 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *lang, *name; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sLength = u_unescape(ss, s, sizeof(ss)); 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bytes = extractBytes(s, sLength, "ISO-8859-1", &byteLength); 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_enableInputFilter(csd, TRUE); 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ucsdet_isInputFilterEnabled(csd)) { 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucsdet_enableInputFilter(csd, TRUE) did not enable input filter!\n"); 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bytes, byteLength, &status); 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning on the input filter resulted in no matches.\n"); 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto turn_off; 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (name == NULL || strcmp(name, "ISO-8859-1") != 0) { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning on the input filter resulted in %s rather than ISO-8859-1\n", name); 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lang = ucsdet_getLanguage(match, &status); 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lang == NULL || strcmp(lang, "fr") != 0) { 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Input filter did not strip markup!\n"); 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruturn_off: 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_enableInputFilter(csd, FALSE); 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(csd, bytes, byteLength, &status); 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (match == NULL) { 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning off the input filter resulted in no matches.\n"); 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto bail; 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucsdet_getName(match, &status); 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (name == NULL || strcmp(name, "ISO-8859-1") != 0) { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Turning off the input filter resulted in %s rather than ISO-8859-1\n", name); 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lang = ucsdet_getLanguage(match, &status); 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lang == NULL || strcmp(lang, "en") != 0) { 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Unfiltered input did not detect as English!\n"); 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubail: 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru freeBytes(bytes); 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(csd); 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void TestChaining(void) { 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_USELESS_COLLATOR_ERROR; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_open(&status); 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setText(NULL, NULL, 0, &status); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getName(NULL, &status); 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getConfidence(NULL, &status); 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getLanguage(NULL, &status); 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_detect(NULL, &status); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_setDeclaredEncoding(NULL, NULL, 0, &status); 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_detectAll(NULL, NULL, &status); 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getUChars(NULL, NULL, 0, &status); 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_getUChars(NULL, NULL, 0, &status); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucsdet_close(NULL); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* All of this code should have done nothing. */ 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status != U_USELESS_COLLATOR_ERROR) { 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Status got changed to %s\n", u_errorName(status)); 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void TestBufferOverflow(void) { 371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const char *testStrings[] = { 373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b", /* A partial ISO-2022 shift state at the end */ 374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24", /* A partial ISO-2022 shift state at the end */ 375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28", /* A partial ISO-2022 shift state at the end */ 376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x80\x20\x54\x68\x69\x73\x20\x69\x73\x20\x45\x6E\x67\x6C\x69\x73\x68\x20\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end with a bad one at the start */ 377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x1b\x24\x28\x44", /* A complete ISO-2022 shift state at the end */ 378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\xa1", /* Could be a single byte shift-jis at the end */ 379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x74\x68\xa1", /* Could be a single byte shift-jis at the end */ 380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\x74\x68\x65\xa1" /* Could be a single byte shift-jis at the end, but now we have English creeping in. */ 381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const char *testResults[] = { 383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "windows-1252", 387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "ISO-2022-JP", 388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NULL, 389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NULL, 390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "ISO-8859-1" 391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t idx = 0; 393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UCharsetMatch *match; 395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_setDeclaredEncoding(csd, "ISO-2022-JP", -1, &status); 397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("Couldn't open detector. %s\n", u_errorName(status)); 400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto bail; 401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (idx = 0; idx < ARRAY_SIZE(testStrings); idx++) { 404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_setText(csd, testStrings[idx], -1, &status); 405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru match = ucsdet_detect(csd, &status); 406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (match == NULL) { 408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testResults[idx] != NULL) { 409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("Unexpectedly got no results at index %d.\n", idx); 410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_verbose("Got no result as expected at index %d.\n", idx); 413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testResults[idx] == NULL || strcmp(ucsdet_getName(match, &status), testResults[idx]) != 0) { 418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru log_err("Unexpectedly got %s instead of %s at index %d with confidence %d.\n", 419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_getName(match, &status), testResults[idx], idx, ucsdet_getConfidence(match, &status)); 420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto bail; 421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querubail: 425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucsdet_close(csd); 426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM424(void) 429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars[] = { 433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x05E4, 0x05E8, 0x05E7, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05D4, 0x05E6, 0x05D1, 0x05D0, 0x05D9, 0x0020, 0x05D4, 0x05E8, 434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D0, 0x05E9, 0x05D9, 0x002C, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x05D0, 0x05DC, 0x05D5, 0x05E3, 0x0020, 0x05D0, 0x05D1, 0x05D9, 435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D7, 0x05D9, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x002C, 0x0020, 0x05D4, 0x05D5, 0x05E8, 436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x0020, 0x05E2, 0x05DC, 0x0020, 0x05E4, 0x05EA, 0x05D9, 0x05D7, 0x05EA, 0x0020, 0x05D7, 0x05E7, 0x05D9, 0x05E8, 0x05EA, 437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x05DE, 0x05E6, 0x0022, 0x05D7, 0x0020, 0x05D1, 0x05E2, 0x05E7, 0x05D1, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D3, 0x05D5, 438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 0x05D9, 0x0020, 0x05E6, 0x05D4, 0x0022, 0x05DC, 0x0020, 0x05DE, 439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x05D1, 0x002B, 0x0020, 0x05E8, 0x05E6, 0x05D5, 0x05E2, 0x05EA, 0x0020, 0x05E2, 0x05D6, 0x05D4, 0x002E, 0x0020, 0x05DC, 441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x0020, 0x05D4, 0x05E4, 0x05E6, 0x0022, 0x05E8, 0x002C, 0x0020, 0x05DE, 0x05D4, 0x05E2, 0x05D3, 442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D5, 0x05DC, 0x05D4, 0x0020, 0x05EA, 0x05DE, 0x05D5, 0x05E0, 0x05D4, 0x0020, 443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E9, 0x05DC, 0x0020, 0x0022, 0x05D4, 0x05EA, 0x05E0, 0x05D4, 0x05D2, 0x05D5, 0x05EA, 0x0020, 0x05E4, 0x05E1, 0x05D5, 0x05DC, 444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x0020, 0x05DC, 0x05DB, 0x05D0, 0x05D5, 0x05E8, 0x05D4, 0x0020, 0x05E9, 0x05DC, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D9, 0x05DD, 0x0020, 0x05D1, 0x05DE, 0x05D4, 0x05DC, 0x05DA, 0x0020, 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 0x0022, 0x002E, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05E7, 0x05D9, 0x05D1, 0x05DC, 0x0020, 0x05D0, 0x05EA, 0x0020, 0x05D4, 0x05D7, 0x05DC, 448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D8, 0x05EA, 0x05D5, 0x0020, 0x05DC, 0x05D0, 0x05D7, 0x05E8, 0x0020, 0x05E9, 0x05E2, 0x05D9, 0x05D9, 0x05DF, 0x0020, 0x05D1, 449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05EA, 0x05DE, 0x05DC, 0x05D9, 0x05DC, 0x0020, 0x05D4, 0x05E2, 0x05D3, 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0000 450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars_reverse[] = { 453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05EA, 0x05D5, 0x05D9, 0x05D5, 0x05D3, 0x05E2, 0x05D4, 0x0020, 0x05DC, 0x05D9, 0x05DC, 0x05DE, 0x05EA, 454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D1, 0x0020, 0x05DF, 0x05D9, 0x05D9, 0x05E2, 0x05E9, 0x0020, 0x05E8, 0x05D7, 0x05D0, 0x05DC, 0x0020, 0x05D5, 0x05EA, 0x05D8, 455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D7, 0x05D4, 0x0020, 0x05EA, 0x05D0, 0x0020, 0x05DC, 0x05D1, 0x05D9, 0x05E7, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x002E, 0x0022, 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 0x0020, 0x05DA, 0x05DC, 0x05D4, 0x05DE, 0x05D1, 0x0020, 0x05DD, 0x05D9, 458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05DC, 0x05E9, 0x0020, 0x05D4, 0x05E8, 0x05D5, 0x05D0, 0x05DB, 0x05DC, 0x0020, 0x05D4, 459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x05D5, 0x05E1, 0x05E4, 0x0020, 0x05EA, 0x05D5, 0x05D2, 0x05D4, 0x05E0, 0x05EA, 0x05D4, 0x0022, 0x0020, 0x05DC, 0x05E9, 460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x05D4, 0x05E0, 0x05D5, 0x05DE, 0x05EA, 0x0020, 0x05D4, 0x05DC, 0x05D5, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D9, 0x05D5, 461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D3, 0x05E2, 0x05D4, 0x05DE, 0x0020, 0x002C, 0x05E8, 0x0022, 0x05E6, 0x05E4, 0x05D4, 0x0020, 0x05D9, 0x05E8, 0x05D1, 0x05D3, 462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DC, 0x0020, 0x002E, 0x05D4, 0x05D6, 0x05E2, 0x0020, 0x05EA, 0x05E2, 0x05D5, 0x05E6, 0x05E8, 0x0020, 0x002B, 0x05D1, 0x0020, 463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05DE, 0x0020, 0x05DC, 0x0022, 0x05D4, 0x05E6, 0x0020, 0x05D9, 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05EA, 0x05D5, 0x05D9, 465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D5, 0x05D3, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D1, 0x05E7, 0x05E2, 0x05D1, 0x0020, 0x05D7, 0x0022, 0x05E6, 0x05DE, 0x0020, 466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05EA, 0x05E8, 0x05D9, 0x05E7, 0x05D7, 0x0020, 0x05EA, 0x05D7, 0x05D9, 0x05EA, 0x05E4, 0x0020, 0x05DC, 0x05E2, 0x0020, 0x05D4, 467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E8, 0x05D5, 0x05D4, 0x0020, 0x002C, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x05D9, 0x05D7, 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05D9, 0x05D1, 0x05D0, 0x0020, 0x05E3, 0x05D5, 0x05DC, 0x05D0, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x002C, 0x05D9, 0x05E9, 0x05D0, 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x05E8, 0x05D4, 0x0020, 0x05D9, 0x05D0, 0x05D1, 0x05E6, 0x05D4, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05E7, 0x05E8, 0x05E4, 0x05D4, 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0000 471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse); 474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes = extractBytes(chars, cLength, "IBM424", &bLength); 476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes_r = extractBytes(chars_reverse, crLength, "IBM424", &brLength); 477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UCharsetMatch *match; 480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *name; 481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes, bLength, &status); 483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM424_rtl: got no matches.\n"); 487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM424_rtl") != 0) { 4926d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru log_data_err("Encoding detection failure for IBM424_rtl: got %s. (Are you missing data?)\n", name); 493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes_r, brLength, &status); 496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM424_ltr: got no matches.\n"); 500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM424_ltr") != 0) { 5056d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru log_data_err("Encoding detection failure for IBM424_ltr: got %s. (Are you missing data?)\n", name); 506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubail: 509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes); 510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes_r); 511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_close(csd); 512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void TestIBM420(void) 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars[] = { 519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0648, 0x064F, 0x0636, 0x0639, 0x062A, 0x0020, 0x0648, 0x0646, 0x064F, 0x0641, 0x0630, 0x062A, 0x0020, 0x0628, 0x0631, 0x0627, 520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0645, 0x062C, 0x0020, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 0x0639, 0x062F, 0x064A, 0x062F, 0x0629, 0x0020, 0x0641, 521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x064A, 0x0020, 0x0645, 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0627, 0x0644, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0627, 0x0644, 0x0648, 0x0637, 0x0646, 0x064A, 0x002C, 0x0020, 0x0645, 0x0639, 0x0020, 0x0645, 0x0644, 0x0627, 0x0626, 0x0645, 523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062A, 0x0647, 0x0627, 0x0020, 0x062F, 0x0627, 0x0626, 0x0645, 0x0627, 0x064B, 0x0020, 0x0644, 0x0644, 0x0627, 0x062D, 0x062A, 524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x064A, 0x0627, 0x062C, 0x0627, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 0x062A, 0x063A, 0x064A, 0x0631, 0x0629, 0x0020, 0x0644, 525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0644, 0x0645, 0x062C, 0x062A, 0x0645, 0x0639, 0x0020, 0x0648, 0x0644, 0x0644, 0x062F, 0x0648, 0x0644, 0x0629, 0x002E, 0x0020, 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062A, 0x0648, 0x0633, 0x0639, 0x062A, 0x0020, 0x0648, 0x062A, 0x0637, 0x0648, 0x0631, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0628, 0x0647, 0x062F, 0x0641, 0x0020, 0x0636, 0x0645, 0x0627, 0x0646, 0x0020, 0x0634, 528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0628, 0x0643, 0x0629, 0x0020, 0x0623, 0x0645, 0x0627, 0x0646, 0x0020, 0x0644, 0x0633, 0x0643, 0x0627, 0x0646, 0x0020, 0x062F, 529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0648, 0x0644, 0x0629, 0x0020, 0x0627, 0x0633, 0x0631, 0x0627, 0x0626, 0x064A, 0x0644, 0x0020, 0x0628, 0x0648, 0x062C, 0x0647, 530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x0627, 0x0644, 0x0645, 0x062E, 0x0627, 0x0637, 0x0631, 0x0020, 0x0627, 0x0644, 0x0627, 0x0642, 0x062A, 0x0635, 0x0627, 531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062F, 0x064A, 0x0629, 0x0020, 0x0648, 0x0627, 0x0644, 0x0627, 0x062C, 0x062A, 0x0645, 0x0627, 0x0639, 0x064A, 0x0629, 0x002E, 532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0000 533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const UChar chars_reverse[] = { 535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x002E, 0x0629, 0x064A, 0x0639, 0x0627, 0x0645, 0x062A, 0x062C, 0x0627, 0x0644, 0x0627, 0x0648, 0x0020, 0x0629, 0x064A, 0x062F, 536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0627, 0x0635, 0x062A, 0x0642, 0x0627, 0x0644, 0x0627, 0x0020, 0x0631, 0x0637, 0x0627, 0x062E, 0x0645, 0x0644, 0x0627, 0x0020, 537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0647, 0x062C, 0x0648, 0x0628, 0x0020, 0x0644, 0x064A, 0x0626, 0x0627, 0x0631, 0x0633, 0x0627, 0x0020, 0x0629, 0x0644, 0x0648, 538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062F, 0x0020, 0x0646, 0x0627, 0x0643, 0x0633, 0x0644, 0x0020, 0x0646, 0x0627, 0x0645, 0x0623, 0x0020, 0x0629, 0x0643, 0x0628, 539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0634, 0x0020, 0x0646, 0x0627, 0x0645, 0x0636, 0x0020, 0x0641, 0x062F, 0x0647, 0x0628, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0631, 0x0648, 0x0637, 0x062A, 0x0648, 0x0020, 0x062A, 0x0639, 0x0633, 0x0648, 0x062A, 541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x002E, 0x0629, 0x0644, 0x0648, 0x062F, 0x0644, 0x0644, 0x0648, 0x0020, 0x0639, 0x0645, 0x062A, 0x062C, 0x0645, 0x0644, 542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0644, 0x0020, 0x0629, 0x0631, 0x064A, 0x063A, 0x062A, 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0627, 0x062C, 0x0627, 0x064A, 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x062A, 0x062D, 0x0627, 0x0644, 0x0644, 0x0020, 0x064B, 0x0627, 0x0645, 0x0626, 0x0627, 0x062F, 0x0020, 0x0627, 0x0647, 0x062A, 544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0645, 0x0626, 0x0627, 0x0644, 0x0645, 0x0020, 0x0639, 0x0645, 0x0020, 0x002C, 0x064A, 0x0646, 0x0637, 0x0648, 0x0644, 0x0627, 545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0644, 0x0627, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 0x0645, 0x0020, 0x064A, 546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0641, 0x0020, 0x0629, 0x062F, 0x064A, 0x062F, 0x0639, 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0020, 0x062C, 0x0645, 547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0627, 0x0631, 0x0628, 0x0020, 0x062A, 0x0630, 0x0641, 0x064F, 0x0646, 0x0648, 0x0020, 0x062A, 0x0639, 0x0636, 0x064F, 0x0648, 548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0x0000, 549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t bLength = 0, brLength = 0, cLength = ARRAY_SIZE(chars), crLength = ARRAY_SIZE(chars_reverse); 552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes = extractBytes(chars, cLength, "IBM420", &bLength); 554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *bytes_r = extractBytes(chars_reverse, crLength, "IBM420", &brLength); 555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCharsetDetector *csd = ucsdet_open(&status); 557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UCharsetMatch *match; 558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *name; 559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes, bLength, &status); 561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM420_rtl: got no matches.\n"); 565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM420_rtl") != 0) { 5706d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru log_data_err("Encoding detection failure for IBM420_rtl: got %s. (Are you missing data?)\n", name); 571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_setText(csd, bytes_r, brLength, &status); 574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru match = ucsdet_detect(csd, &status); 575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (match == NULL) { 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru log_err("Encoding detection failure for IBM420_ltr: got no matches.\n"); 578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto bail; 579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru name = ucsdet_getName(match, &status); 582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strcmp(name, "IBM420_ltr") != 0) { 5836d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru log_data_err("Encoding detection failure for IBM420_ltr: got %s. (Are you missing data?)\n", name); 584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubail: 587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes); 588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeBytes(bytes_r); 589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucsdet_close(csd); 590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 591