1/******************************************************************** 2 * Copyright (c) 1997-2014, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * 6 * File UCNVSELTST.C 7 * 8 * Modification History: 9 * Name Description 10 * MOHAMED ELDAWY Creation 11 ******************************************************************** 12 */ 13 14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ 15 16#include "ucnvseltst.h" 17 18#include <stdio.h> 19 20#include "unicode/utypes.h" 21#include "unicode/ucnvsel.h" 22#include "unicode/ustring.h" 23#include "cmemory.h" 24#include "cstring.h" 25#include "propsvec.h" 26 27#define FILENAME_BUFFER 1024 28 29#define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING 30 31static void TestSelector(void); 32static void TestUPropsVector(void); 33void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */ 34 35void addCnvSelTest(TestNode** root) 36{ 37 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); 38 addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); 39} 40 41static const char **gAvailableNames = NULL; 42static int32_t gCountAvailable = 0; 43 44static UBool 45getAvailableNames() { 46 int32_t i; 47 if (gAvailableNames != NULL) { 48 return TRUE; 49 } 50 gCountAvailable = ucnv_countAvailable(); 51 if (gCountAvailable == 0) { 52 log_data_err("No converters available.\n"); 53 return FALSE; 54 } 55 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); 56 if (gAvailableNames == NULL) { 57 log_err("unable to allocate memory for %ld available converter names\n", 58 (long)gCountAvailable); 59 return FALSE; 60 } 61 for (i = 0; i < gCountAvailable; ++i) { 62 gAvailableNames[i] = ucnv_getAvailableName(i); 63 } 64 return TRUE; 65} 66 67static void 68releaseAvailableNames() { 69 uprv_free((void *)gAvailableNames); 70 gAvailableNames = NULL; 71 gCountAvailable = 0; 72} 73 74static const char ** 75getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { 76 const char **names; 77 int32_t i; 78 79 *pCount = 0; 80 if (count <= 0) { 81 return NULL; 82 } 83 names = (const char **)uprv_malloc(count * sizeof(char *)); 84 if (names == NULL) { 85 log_err("memory allocation error for %ld pointers\n", (long)count); 86 return NULL; 87 } 88 if (step == 0 && count > 0) { 89 step = 1; 90 } 91 for (i = 0; i < count; ++i) { 92 if (0 <= start && start < gCountAvailable) { 93 names[i] = gAvailableNames[start]; 94 start += step; 95 ++*pCount; 96 } 97 } 98 return names; 99} 100 101#if 0 102/* 103 * ucnvsel_open() does not support "no encodings": 104 * Given 0 encodings it will open a selector for all available ones. 105 */ 106static const char ** 107getNoEncodings(int32_t *pCount) { 108 *pCount = 0; 109 return NULL; 110} 111#endif 112 113static const char ** 114getOneEncoding(int32_t *pCount) { 115 return getEncodings(1, 0, 1, pCount); 116} 117 118static const char ** 119getFirstEvenEncodings(int32_t *pCount) { 120 return getEncodings(0, 2, 25, pCount); 121} 122 123static const char ** 124getMiddleEncodings(int32_t *pCount) { 125 return getEncodings(gCountAvailable - 12, 1, 22, pCount); 126} 127 128static const char ** 129getLastEncodings(int32_t *pCount) { 130 return getEncodings(gCountAvailable - 1, -1, 25, pCount); 131} 132 133static const char ** 134getSomeEncodings(int32_t *pCount) { 135 /* 20 evenly distributed */ 136 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); 137} 138 139static const char ** 140getEveryThirdEncoding(int32_t *pCount) { 141 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); 142} 143 144static const char ** 145getAllEncodings(int32_t *pCount) { 146 return getEncodings(0, 1, gCountAvailable, pCount); 147} 148 149typedef const char **GetEncodingsFn(int32_t *); 150 151static GetEncodingsFn *const getEncodingsFns[] = { 152 getOneEncoding, 153 getFirstEvenEncodings, 154 getMiddleEncodings, 155 getLastEncodings, 156 getSomeEncodings, 157 getEveryThirdEncoding, 158 getAllEncodings 159}; 160 161static FILE *fopenOrError(const char *filename) { 162 int32_t needLen; 163 FILE *f; 164 char fnbuf[FILENAME_BUFFER]; 165 const char* directory= ctest_dataSrcDir(); 166 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1; 167 if(needLen > FILENAME_BUFFER) { 168 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", 169 filename, needLen, FILENAME_BUFFER); 170 return NULL; 171 } 172 173 strcpy(fnbuf, directory); 174 strcat(fnbuf, TDSRCPATH); 175 strcat(fnbuf, filename); 176 177 f = fopen(fnbuf, "rb"); 178 179 if(f == NULL) { 180 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); 181 } 182 return f; 183} 184 185typedef struct TestText { 186 char *text, *textLimit; 187 char *limit; 188 int32_t number; 189} TestText; 190 191static void 192text_reset(TestText *tt) { 193 tt->limit = tt->text; 194 tt->number = 0; 195} 196 197static char * 198text_nextString(TestText *tt, int32_t *pLength) { 199 char *s = tt->limit; 200 if (s == tt->textLimit) { 201 /* we already delivered the last string */ 202 return NULL; 203 } else if (s == tt->text) { 204 /* first string */ 205 if ((tt->textLimit - tt->text) >= 3 && 206 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf 207 ) { 208 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ 209 } 210 } else { 211 /* skip the string terminator */ 212 ++s; 213 ++tt->number; 214 } 215 216 /* find the end of this string */ 217 tt->limit = uprv_strchr(s, 0); 218 *pLength = (int32_t)(tt->limit - s); 219 return s; 220} 221 222static UBool 223text_open(TestText *tt) { 224 FILE *f; 225 char *s; 226 int32_t length; 227 uprv_memset(tt, 0, sizeof(TestText)); 228 f = fopenOrError("ConverterSelectorTestUTF8.txt"); 229 if(!f) { 230 return FALSE; 231 } 232 fseek(f, 0, SEEK_END); 233 length = (int32_t)ftell(f); 234 fseek(f, 0, SEEK_SET); 235 tt->text = (char *)uprv_malloc(length + 1); 236 if (tt->text == NULL) { 237 fclose(f); 238 return FALSE; 239 } 240 if (length != fread(tt->text, 1, length, f)) { 241 log_err("error reading %ld bytes from test text file\n", (long)length); 242 length = 0; 243 uprv_free(tt->text); 244 } 245 fclose(f); 246 tt->textLimit = tt->text + length; 247 *tt->textLimit = 0; 248 /* replace all Unicode '#' (U+0023) with NUL */ 249 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} 250 text_reset(tt); 251 return TRUE; 252} 253 254static void 255text_close(TestText *tt) { 256 uprv_free(tt->text); 257} 258 259static int32_t findIndex(const char* converterName) { 260 int32_t i; 261 for (i = 0 ; i < gCountAvailable; i++) { 262 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { 263 return i; 264 } 265 } 266 return -1; 267} 268 269static UBool * 270getResultsManually(const char** encodings, int32_t num_encodings, 271 const char *utf8, int32_t length, 272 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { 273 UBool* resultsManually; 274 int32_t i; 275 276 resultsManually = (UBool*) uprv_malloc(gCountAvailable); 277 uprv_memset(resultsManually, 0, gCountAvailable); 278 279 for(i = 0 ; i < num_encodings ; i++) { 280 UErrorCode status = U_ZERO_ERROR; 281 /* get unicode set for that converter */ 282 USet* set; 283 UConverter* test_converter; 284 UChar32 cp; 285 int32_t encIndex, offset; 286 287 set = uset_openEmpty(); 288 test_converter = ucnv_open(encodings[i], &status); 289 ucnv_getUnicodeSet(test_converter, set, 290 whichSet, &status); 291 if (excludedCodePoints != NULL) { 292 uset_addAll(set, excludedCodePoints); 293 } 294 uset_freeze(set); 295 offset = 0; 296 cp = 0; 297 298 encIndex = findIndex(encodings[i]); 299 /* 300 * The following is almost, but not entirely, the same as 301 * resultsManually[encIndex] = 302 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); 303 * They might be different if the set contains strings, 304 * or if the utf8 string contains an illegal sequence. 305 * 306 * The UConverterSelector does not currently handle strings that can be 307 * converted, and it treats an illegal sequence as convertible 308 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. 309 */ 310 resultsManually[encIndex] = TRUE; 311 while(offset<length) { 312 U8_NEXT(utf8, offset, length, cp); 313 if (cp >= 0 && !uset_contains(set, cp)) { 314 resultsManually[encIndex] = FALSE; 315 break; 316 } 317 } 318 uset_close(set); 319 ucnv_close(test_converter); 320 } 321 return resultsManually; 322} 323 324/* closes res but does not free resultsManually */ 325static void verifyResult(UEnumeration* res, const UBool *resultsManually) { 326 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); 327 const char* name; 328 UErrorCode status = U_ZERO_ERROR; 329 int32_t i; 330 331 /* fill the bool for the selector results! */ 332 uprv_memset(resultsFromSystem, 0, gCountAvailable); 333 while ((name = uenum_next(res,NULL, &status)) != NULL) { 334 resultsFromSystem[findIndex(name)] = TRUE; 335 } 336 for(i = 0 ; i < gCountAvailable; i++) { 337 if(resultsManually[i] != resultsFromSystem[i]) { 338 log_err("failure in converter selector\n" 339 "converter %s had conflicting results -- manual: %d, system %d\n", 340 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); 341 } 342 } 343 uprv_free(resultsFromSystem); 344 uenum_close(res); 345} 346 347static UConverterSelector * 348serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { 349 char *new_buffer; 350 int32_t ser_len, ser_len2; 351 /* preflight */ 352 ser_len = ucnvsel_serialize(sel, NULL, 0, status); 353 if (*status != U_BUFFER_OVERFLOW_ERROR) { 354 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); 355 return sel; 356 } 357 new_buffer = (char *)uprv_malloc(ser_len); 358 *status = U_ZERO_ERROR; 359 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); 360 if (U_FAILURE(*status) || ser_len != ser_len2) { 361 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); 362 uprv_free(new_buffer); 363 return sel; 364 } 365 ucnvsel_close(sel); 366 uprv_free(*buffer); 367 *buffer = new_buffer; 368 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); 369 if (U_FAILURE(*status)) { 370 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); 371 return NULL; 372 } 373 return sel; 374} 375 376static void TestSelector() 377{ 378 TestText text; 379 USet* excluded_sets[3] = { NULL }; 380 int32_t i, testCaseIdx; 381 382 if (!getAvailableNames()) { 383 return; 384 } 385 if (!text_open(&text)) { 386 releaseAvailableNames();; 387 } 388 389 excluded_sets[0] = uset_openEmpty(); 390 for(i = 1 ; i < 3 ; i++) { 391 excluded_sets[i] = uset_open(i*30, i*30+500); 392 } 393 394 for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++) 395 { 396 int32_t excluded_set_id; 397 int32_t num_encodings; 398 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); 399 if (getTestOption(QUICK_OPTION) && num_encodings > 25) { 400 uprv_free((void *)encodings); 401 continue; 402 } 403 404 /* 405 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) 406 * 407 * This loop was replaced by the following statement because 408 * the loop made the test run longer without adding to the code coverage. 409 * The handling of the exclusion set is independent of the 410 * set of encodings, so there is no need to test every combination. 411 */ 412 excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets); 413 { 414 UConverterSelector *sel_rt, *sel_fb; 415 char *buffer_fb = NULL; 416 UErrorCode status = U_ZERO_ERROR; 417 sel_rt = ucnvsel_open(encodings, num_encodings, 418 excluded_sets[excluded_set_id], 419 UCNV_ROUNDTRIP_SET, &status); 420 if (num_encodings == gCountAvailable) { 421 /* test the special "all converters" parameter values */ 422 sel_fb = ucnvsel_open(NULL, 0, 423 excluded_sets[excluded_set_id], 424 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 425 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { 426 /* test that a NULL set gives the same results as an empty set */ 427 sel_fb = ucnvsel_open(encodings, num_encodings, 428 NULL, 429 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 430 } else { 431 sel_fb = ucnvsel_open(encodings, num_encodings, 432 excluded_sets[excluded_set_id], 433 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 434 } 435 if (U_FAILURE(status)) { 436 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); 437 ucnvsel_close(sel_rt); 438 uprv_free((void *)encodings); 439 continue; 440 } 441 442 text_reset(&text); 443 for (;;) { 444 UBool *manual_rt, *manual_fb; 445 static UChar utf16[10000]; 446 char *s; 447 int32_t length8, length16; 448 449 s = text_nextString(&text, &length8); 450 if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { 451 break; 452 } 453 454 manual_rt = getResultsManually(encodings, num_encodings, 455 s, length8, 456 excluded_sets[excluded_set_id], 457 UCNV_ROUNDTRIP_SET); 458 manual_fb = getResultsManually(encodings, num_encodings, 459 s, length8, 460 excluded_sets[excluded_set_id], 461 UCNV_ROUNDTRIP_AND_FALLBACK_SET); 462 /* UTF-8 with length */ 463 status = U_ZERO_ERROR; 464 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); 465 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); 466 /* UTF-8 NUL-terminated */ 467 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); 468 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); 469 470 u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status); 471 if (U_FAILURE(status)) { 472 log_err("error converting the test text (string %ld) to UTF-16 - %s\n", 473 (long)text.number, u_errorName(status)); 474 } else { 475 if (text.number == 0) { 476 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); 477 } 478 if (U_SUCCESS(status)) { 479 /* UTF-16 with length */ 480 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); 481 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); 482 /* UTF-16 NUL-terminated */ 483 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); 484 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); 485 } 486 } 487 488 uprv_free(manual_rt); 489 uprv_free(manual_fb); 490 } 491 ucnvsel_close(sel_rt); 492 ucnvsel_close(sel_fb); 493 uprv_free(buffer_fb); 494 } 495 uprv_free((void *)encodings); 496 } 497 498 releaseAvailableNames(); 499 text_close(&text); 500 for(i = 0 ; i < 3 ; i++) { 501 uset_close(excluded_sets[i]); 502 } 503} 504 505/* Improve code coverage of UPropsVectors */ 506static void TestUPropsVector() { 507 UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; 508 UPropsVectors *pv = upvec_open(100, &errorCode); 509 if (pv != NULL) { 510 log_err("Should have returned NULL if UErrorCode is an error."); 511 return; 512 } 513 errorCode = U_ZERO_ERROR; 514 pv = upvec_open(-1, &errorCode); 515 if (pv != NULL || U_SUCCESS(errorCode)) { 516 log_err("Should have returned NULL if column is less than 0.\n"); 517 return; 518 } 519 errorCode = U_ZERO_ERROR; 520 pv = upvec_open(100, &errorCode); 521 if (pv == NULL || U_FAILURE(errorCode)) { 522 log_err("Unable to open UPropsVectors.\n"); 523 return; 524 } 525 526 if (upvec_getValue(pv, 0, 1) != 0) { 527 log_err("upvec_getValue should return 0.\n"); 528 } 529 if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { 530 log_err("upvec_getRow should not return NULL.\n"); 531 } 532 if (upvec_getArray(pv, NULL, NULL) != NULL) { 533 log_err("upvec_getArray should return NULL.\n"); 534 } 535 536 upvec_close(pv); 537} 538