1/******************************************************************** 2 * Copyright (c) 1997-2009, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * 6 * File UCNVSELTST.C 7 * 8 * Modification History: 9 * Name Description 10 * MOHAMED ELDAWY Creation 11 ******************************************************************** 12 */ 13 14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ 15 16#include "ucnvseltst.h" 17 18#include <stdio.h> 19 20#include "unicode/utypes.h" 21#include "unicode/ucnvsel.h" 22#include "unicode/ustring.h" 23#include "cmemory.h" 24#include "cstring.h" 25 26#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 27 28#define FILENAME_BUFFER 1024 29 30#define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING 31 32static void TestSelector(void); 33void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */ 34 35void addCnvSelTest(TestNode** root) 36{ 37 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); 38} 39 40static const char **gAvailableNames = NULL; 41static int32_t gCountAvailable = 0; 42 43static UBool 44getAvailableNames() { 45 int32_t i; 46 if (gAvailableNames != NULL) { 47 return TRUE; 48 } 49 gCountAvailable = ucnv_countAvailable(); 50 if (gCountAvailable == 0) { 51 log_data_err("No converters available.\n"); 52 return FALSE; 53 } 54 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); 55 if (gAvailableNames == NULL) { 56 log_err("unable to allocate memory for %ld available converter names\n", 57 (long)gCountAvailable); 58 return FALSE; 59 } 60 for (i = 0; i < gCountAvailable; ++i) { 61 gAvailableNames[i] = ucnv_getAvailableName(i); 62 } 63 return TRUE; 64} 65 66static void 67releaseAvailableNames() { 68 uprv_free((void *)gAvailableNames); 69 gAvailableNames = NULL; 70 gCountAvailable = 0; 71} 72 73static const char ** 74getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { 75 const char **names; 76 int32_t i; 77 78 *pCount = 0; 79 if (count <= 0) { 80 return NULL; 81 } 82 names = (const char **)uprv_malloc(count * sizeof(char *)); 83 if (names == NULL) { 84 log_err("memory allocation error for %ld pointers\n", (long)count); 85 return NULL; 86 } 87 if (step == 0 && count > 0) { 88 step = 1; 89 } 90 for (i = 0; i < count; ++i) { 91 if (0 <= start && start < gCountAvailable) { 92 names[i] = gAvailableNames[start]; 93 start += step; 94 ++*pCount; 95 } 96 } 97 return names; 98} 99 100#if 0 101/* 102 * ucnvsel_open() does not support "no encodings": 103 * Given 0 encodings it will open a selector for all available ones. 104 */ 105static const char ** 106getNoEncodings(int32_t *pCount) { 107 *pCount = 0; 108 return NULL; 109} 110#endif 111 112static const char ** 113getOneEncoding(int32_t *pCount) { 114 return getEncodings(1, 0, 1, pCount); 115} 116 117static const char ** 118getFirstEvenEncodings(int32_t *pCount) { 119 return getEncodings(0, 2, 25, pCount); 120} 121 122static const char ** 123getMiddleEncodings(int32_t *pCount) { 124 return getEncodings(gCountAvailable - 12, 1, 22, pCount); 125} 126 127static const char ** 128getLastEncodings(int32_t *pCount) { 129 return getEncodings(gCountAvailable - 1, -1, 25, pCount); 130} 131 132static const char ** 133getSomeEncodings(int32_t *pCount) { 134 /* 20 evenly distributed */ 135 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); 136} 137 138static const char ** 139getEveryThirdEncoding(int32_t *pCount) { 140 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); 141} 142 143static const char ** 144getAllEncodings(int32_t *pCount) { 145 return getEncodings(0, 1, gCountAvailable, pCount); 146} 147 148typedef const char **GetEncodingsFn(int32_t *); 149 150static GetEncodingsFn *const getEncodingsFns[] = { 151 getOneEncoding, 152 getFirstEvenEncodings, 153 getMiddleEncodings, 154 getLastEncodings, 155 getSomeEncodings, 156 getEveryThirdEncoding, 157 getAllEncodings 158}; 159 160static FILE *fopenOrError(const char *filename) { 161 int32_t needLen; 162 FILE *f; 163 char fnbuf[FILENAME_BUFFER]; 164 const char* directory= ctest_dataSrcDir(); 165 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1; 166 if(needLen > FILENAME_BUFFER) { 167 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", 168 filename, needLen, FILENAME_BUFFER); 169 return NULL; 170 } 171 172 strcpy(fnbuf, directory); 173 strcat(fnbuf, TDSRCPATH); 174 strcat(fnbuf, filename); 175 176 f = fopen(fnbuf, "rb"); 177 178 if(f == NULL) { 179 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); 180 } 181 return f; 182} 183 184typedef struct TestText { 185 char *text, *textLimit; 186 char *limit; 187 int32_t number; 188} TestText; 189 190static void 191text_reset(TestText *tt) { 192 tt->limit = tt->text; 193 tt->number = 0; 194} 195 196static char * 197text_nextString(TestText *tt, int32_t *pLength) { 198 char *s = tt->limit; 199 if (s == tt->textLimit) { 200 /* we already delivered the last string */ 201 return NULL; 202 } else if (s == tt->text) { 203 /* first string */ 204 if ((tt->textLimit - tt->text) >= 3 && 205 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf 206 ) { 207 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ 208 } 209 } else { 210 /* skip the string terminator */ 211 ++s; 212 ++tt->number; 213 } 214 215 /* find the end of this string */ 216 tt->limit = uprv_strchr(s, 0); 217 *pLength = (int32_t)(tt->limit - s); 218 return s; 219} 220 221static UBool 222text_open(TestText *tt) { 223 FILE *f; 224 char *s; 225 int32_t length; 226 uprv_memset(tt, 0, sizeof(TestText)); 227 f = fopenOrError("ConverterSelectorTestUTF8.txt"); 228 if(!f) { 229 return FALSE; 230 } 231 fseek(f, 0, SEEK_END); 232 length = (int32_t)ftell(f); 233 fseek(f, 0, SEEK_SET); 234 tt->text = (char *)uprv_malloc(length + 1); 235 if (tt->text == NULL) { 236 fclose(f); 237 return FALSE; 238 } 239 if (length != fread(tt->text, 1, length, f)) { 240 log_err("error reading %ld bytes from test text file\n", (long)length); 241 length = 0; 242 uprv_free(tt->text); 243 } 244 fclose(f); 245 tt->textLimit = tt->text + length; 246 *tt->textLimit = 0; 247 /* replace all Unicode '#' (U+0023) with NUL */ 248 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} 249 text_reset(tt); 250 return TRUE; 251} 252 253static void 254text_close(TestText *tt) { 255 uprv_free(tt->text); 256} 257 258static int32_t findIndex(const char* converterName) { 259 int32_t i; 260 for (i = 0 ; i < gCountAvailable; i++) { 261 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { 262 return i; 263 } 264 } 265 return -1; 266} 267 268static UBool * 269getResultsManually(const char** encodings, int32_t num_encodings, 270 const char *utf8, int32_t length, 271 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { 272 UBool* resultsManually; 273 int32_t i; 274 275 resultsManually = (UBool*) uprv_malloc(gCountAvailable); 276 uprv_memset(resultsManually, 0, gCountAvailable); 277 278 for(i = 0 ; i < num_encodings ; i++) { 279 UErrorCode status = U_ZERO_ERROR; 280 /* get unicode set for that converter */ 281 USet* set; 282 UConverter* test_converter; 283 UChar32 cp; 284 int32_t encIndex, offset; 285 286 set = uset_openEmpty(); 287 test_converter = ucnv_open(encodings[i], &status); 288 ucnv_getUnicodeSet(test_converter, set, 289 whichSet, &status); 290 if (excludedCodePoints != NULL) { 291 uset_addAll(set, excludedCodePoints); 292 } 293 uset_freeze(set); 294 offset = 0; 295 cp = 0; 296 297 encIndex = findIndex(encodings[i]); 298 /* 299 * The following is almost, but not entirely, the same as 300 * resultsManually[encIndex] = 301 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); 302 * They might be different if the set contains strings, 303 * or if the utf8 string contains an illegal sequence. 304 * 305 * The UConverterSelector does not currently handle strings that can be 306 * converted, and it treats an illegal sequence as convertible 307 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. 308 */ 309 resultsManually[encIndex] = TRUE; 310 while(offset<length) { 311 U8_NEXT(utf8, offset, length, cp); 312 if (cp >= 0 && !uset_contains(set, cp)) { 313 resultsManually[encIndex] = FALSE; 314 break; 315 } 316 } 317 uset_close(set); 318 ucnv_close(test_converter); 319 } 320 return resultsManually; 321} 322 323/* closes res but does not free resultsManually */ 324static void verifyResult(UEnumeration* res, const UBool *resultsManually) { 325 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); 326 const char* name; 327 UErrorCode status = U_ZERO_ERROR; 328 int32_t i; 329 330 /* fill the bool for the selector results! */ 331 uprv_memset(resultsFromSystem, 0, gCountAvailable); 332 while ((name = uenum_next(res,NULL, &status)) != NULL) { 333 resultsFromSystem[findIndex(name)] = TRUE; 334 } 335 for(i = 0 ; i < gCountAvailable; i++) { 336 if(resultsManually[i] != resultsFromSystem[i]) { 337 log_err("failure in converter selector\n" 338 "converter %s had conflicting results -- manual: %d, system %d\n", 339 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); 340 } 341 } 342 uprv_free(resultsFromSystem); 343 uenum_close(res); 344} 345 346static UConverterSelector * 347serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { 348 char *new_buffer; 349 int32_t ser_len, ser_len2; 350 /* preflight */ 351 ser_len = ucnvsel_serialize(sel, NULL, 0, status); 352 if (*status != U_BUFFER_OVERFLOW_ERROR) { 353 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); 354 return sel; 355 } 356 new_buffer = (char *)uprv_malloc(ser_len); 357 *status = U_ZERO_ERROR; 358 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); 359 if (U_FAILURE(*status) || ser_len != ser_len2) { 360 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); 361 uprv_free(new_buffer); 362 return sel; 363 } 364 ucnvsel_close(sel); 365 uprv_free(*buffer); 366 *buffer = new_buffer; 367 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); 368 if (U_FAILURE(*status)) { 369 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); 370 return NULL; 371 } 372 return sel; 373} 374 375static void TestSelector() 376{ 377 TestText text; 378 USet* excluded_sets[3] = { NULL }; 379 int32_t i, testCaseIdx; 380 381 if (!getAvailableNames()) { 382 return; 383 } 384 if (!text_open(&text)) { 385 releaseAvailableNames();; 386 } 387 388 excluded_sets[0] = uset_openEmpty(); 389 for(i = 1 ; i < 3 ; i++) { 390 excluded_sets[i] = uset_open(i*30, i*30+500); 391 } 392 393 for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++) 394 { 395 int32_t excluded_set_id; 396 int32_t num_encodings; 397 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); 398 if (QUICK && num_encodings > 25) { 399 uprv_free((void *)encodings); 400 continue; 401 } 402 403 /* 404 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) 405 * 406 * This loop was replaced by the following statement because 407 * the loop made the test run longer without adding to the code coverage. 408 * The handling of the exclusion set is independent of the 409 * set of encodings, so there is no need to test every combination. 410 */ 411 excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets); 412 { 413 UConverterSelector *sel_rt, *sel_fb; 414 char *buffer_fb = NULL; 415 UErrorCode status = U_ZERO_ERROR; 416 sel_rt = ucnvsel_open(encodings, num_encodings, 417 excluded_sets[excluded_set_id], 418 UCNV_ROUNDTRIP_SET, &status); 419 if (num_encodings == gCountAvailable) { 420 /* test the special "all converters" parameter values */ 421 sel_fb = ucnvsel_open(NULL, 0, 422 excluded_sets[excluded_set_id], 423 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 424 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { 425 /* test that a NULL set gives the same results as an empty set */ 426 sel_fb = ucnvsel_open(encodings, num_encodings, 427 NULL, 428 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 429 } else { 430 sel_fb = ucnvsel_open(encodings, num_encodings, 431 excluded_sets[excluded_set_id], 432 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 433 } 434 if (U_FAILURE(status)) { 435 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); 436 ucnvsel_close(sel_rt); 437 uprv_free((void *)encodings); 438 continue; 439 } 440 441 text_reset(&text); 442 for (;;) { 443 UBool *manual_rt, *manual_fb; 444 static UChar utf16[10000]; 445 char *s; 446 int32_t length8, length16; 447 448 s = text_nextString(&text, &length8); 449 if (s == NULL || (QUICK && text.number > 3)) { 450 break; 451 } 452 453 manual_rt = getResultsManually(encodings, num_encodings, 454 s, length8, 455 excluded_sets[excluded_set_id], 456 UCNV_ROUNDTRIP_SET); 457 manual_fb = getResultsManually(encodings, num_encodings, 458 s, length8, 459 excluded_sets[excluded_set_id], 460 UCNV_ROUNDTRIP_AND_FALLBACK_SET); 461 /* UTF-8 with length */ 462 status = U_ZERO_ERROR; 463 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); 464 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); 465 /* UTF-8 NUL-terminated */ 466 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); 467 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); 468 469 u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status); 470 if (U_FAILURE(status)) { 471 log_err("error converting the test text (string %ld) to UTF-16 - %s\n", 472 (long)text.number, u_errorName(status)); 473 } else { 474 if (text.number == 0) { 475 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); 476 } 477 if (U_SUCCESS(status)) { 478 /* UTF-16 with length */ 479 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); 480 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); 481 /* UTF-16 NUL-terminated */ 482 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); 483 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); 484 } 485 } 486 487 uprv_free(manual_rt); 488 uprv_free(manual_fb); 489 } 490 ucnvsel_close(sel_rt); 491 ucnvsel_close(sel_fb); 492 uprv_free(buffer_fb); 493 } 494 uprv_free((void *)encodings); 495 } 496 497 releaseAvailableNames(); 498 text_close(&text); 499 for(i = 0 ; i < 3 ; i++) { 500 uset_close(excluded_sets[i]); 501 } 502} 503