ucnvseltst.c revision b0ac937921a2c196d8b9da665135bf6ba01a1ccf
1/******************************************************************** 2 * Copyright (c) 1997-2009, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * 6 * File UCNVSELTST.C 7 * 8 * Modification History: 9 * Name Description 10 * MOHAMED ELDAWY Creation 11 ******************************************************************** 12 */ 13 14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ 15 16#include "ucnvseltst.h" 17 18#include <stdio.h> 19 20#include "unicode/utypes.h" 21#include "unicode/ucnvsel.h" 22#include "unicode/ustring.h" 23#include "cmemory.h" 24#include "cstring.h" 25 26#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 27 28#define FILENAME_BUFFER 1024 29 30#define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING 31 32static void TestSelector(void); 33 34void addCnvSelTest(TestNode** root) 35{ 36 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); 37} 38 39static const char **gAvailableNames = NULL; 40static int32_t gCountAvailable = 0; 41 42static UBool 43getAvailableNames() { 44 int32_t i; 45 if (gAvailableNames != NULL) { 46 return TRUE; 47 } 48 gCountAvailable = ucnv_countAvailable(); 49 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); 50 if (gAvailableNames == NULL) { 51 log_err("unable to allocate memory for %ld available converter names\n", 52 (long)gCountAvailable); 53 return FALSE; 54 } 55 for (i = 0; i < gCountAvailable; ++i) { 56 gAvailableNames[i] = ucnv_getAvailableName(i); 57 } 58 return TRUE; 59} 60 61static void 62releaseAvailableNames() { 63 uprv_free((void *)gAvailableNames); 64 gAvailableNames = NULL; 65 gCountAvailable = 0; 66} 67 68static const char ** 69getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { 70 const char **names; 71 int32_t i; 72 73 *pCount = 0; 74 if (count <= 0) { 75 return NULL; 76 } 77 names = (const char **)uprv_malloc(count * sizeof(char *)); 78 if (names == NULL) { 79 log_err("memory allocation error for %ld pointers\n", (long)count); 80 return NULL; 81 } 82 if (step == 0 && count > 0) { 83 step = 1; 84 } 85 for (i = 0; i < count; ++i) { 86 if (0 <= start && start < gCountAvailable) { 87 names[i] = gAvailableNames[start]; 88 start += step; 89 ++*pCount; 90 } 91 } 92 return names; 93} 94 95#if 0 96/* 97 * ucnvsel_open() does not support "no encodings": 98 * Given 0 encodings it will open a selector for all available ones. 99 */ 100static const char ** 101getNoEncodings(int32_t *pCount) { 102 *pCount = 0; 103 return NULL; 104} 105#endif 106 107static const char ** 108getOneEncoding(int32_t *pCount) { 109 return getEncodings(1, 0, 1, pCount); 110} 111 112static const char ** 113getFirstEvenEncodings(int32_t *pCount) { 114 return getEncodings(0, 2, 25, pCount); 115} 116 117static const char ** 118getMiddleEncodings(int32_t *pCount) { 119 return getEncodings(gCountAvailable - 12, 1, 22, pCount); 120} 121 122static const char ** 123getLastEncodings(int32_t *pCount) { 124 return getEncodings(gCountAvailable - 1, -1, 25, pCount); 125} 126 127static const char ** 128getSomeEncodings(int32_t *pCount) { 129 /* 20 evenly distributed */ 130 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); 131} 132 133static const char ** 134getEveryThirdEncoding(int32_t *pCount) { 135 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); 136} 137 138static const char ** 139getAllEncodings(int32_t *pCount) { 140 return getEncodings(0, 1, gCountAvailable, pCount); 141} 142 143typedef const char **GetEncodingsFn(int32_t *); 144 145static GetEncodingsFn *const getEncodingsFns[] = { 146 getOneEncoding, 147 getFirstEvenEncodings, 148 getMiddleEncodings, 149 getLastEncodings, 150 getSomeEncodings, 151 getEveryThirdEncoding, 152 getAllEncodings 153}; 154 155static FILE *fopenOrError(const char *filename) { 156 int32_t needLen; 157 FILE *f; 158 char fnbuf[FILENAME_BUFFER]; 159 const char* directory= ctest_dataSrcDir(); 160 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1; 161 if(needLen > FILENAME_BUFFER) { 162 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", 163 filename, needLen, FILENAME_BUFFER); 164 return NULL; 165 } 166 167 strcpy(fnbuf, directory); 168 strcat(fnbuf, TDSRCPATH); 169 strcat(fnbuf, filename); 170 171 f = fopen(fnbuf, "rb"); 172 173 if(f == NULL) { 174 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); 175 } 176 return f; 177} 178 179typedef struct TestText { 180 char *text, *textLimit; 181 char *limit; 182 int32_t number; 183} TestText; 184 185static void 186text_reset(TestText *tt) { 187 tt->limit = tt->text; 188 tt->number = 0; 189} 190 191static char * 192text_nextString(TestText *tt, int32_t *pLength) { 193 char *s = tt->limit; 194 if (s == tt->textLimit) { 195 /* we already delivered the last string */ 196 return NULL; 197 } else if (s == tt->text) { 198 /* first string */ 199 if ((tt->textLimit - tt->text) >= 3 && 200 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf 201 ) { 202 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ 203 } 204 } else { 205 /* skip the string terminator */ 206 ++s; 207 ++tt->number; 208 } 209 210 /* find the end of this string */ 211 tt->limit = uprv_strchr(s, 0); 212 *pLength = (int32_t)(tt->limit - s); 213 return s; 214} 215 216static UBool 217text_open(TestText *tt) { 218 FILE *f; 219 char *s; 220 int32_t length; 221 uprv_memset(tt, 0, sizeof(TestText)); 222 f = fopenOrError("ConverterSelectorTestUTF8.txt"); 223 if(!f) { 224 return FALSE; 225 } 226 fseek(f, 0, SEEK_END); 227 length = (int32_t)ftell(f); 228 fseek(f, 0, SEEK_SET); 229 tt->text = (char *)uprv_malloc(length + 1); 230 if (tt->text == NULL) { 231 fclose(f); 232 return FALSE; 233 } 234 if (length != fread(tt->text, 1, length, f)) { 235 log_err("error reading %ld bytes from test text file\n", (long)length); 236 length = 0; 237 uprv_free(tt->text); 238 } 239 fclose(f); 240 tt->textLimit = tt->text + length; 241 *tt->textLimit = 0; 242 /* replace all Unicode '#' (U+0023) with NUL */ 243 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} 244 text_reset(tt); 245 return TRUE; 246} 247 248static void 249text_close(TestText *tt) { 250 uprv_free(tt->text); 251} 252 253static int32_t findIndex(const char* converterName) { 254 int32_t i; 255 for (i = 0 ; i < gCountAvailable; i++) { 256 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { 257 return i; 258 } 259 } 260 return -1; 261} 262 263static UBool * 264getResultsManually(const char** encodings, int32_t num_encodings, 265 const char *utf8, int32_t length, 266 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { 267 UBool* resultsManually; 268 int32_t i; 269 270 resultsManually = (UBool*) uprv_malloc(gCountAvailable); 271 uprv_memset(resultsManually, 0, gCountAvailable); 272 273 for(i = 0 ; i < num_encodings ; i++) { 274 UErrorCode status = U_ZERO_ERROR; 275 /* get unicode set for that converter */ 276 USet* set; 277 UConverter* test_converter; 278 UChar32 cp; 279 int32_t encIndex, offset; 280 281 set = uset_openEmpty(); 282 test_converter = ucnv_open(encodings[i], &status); 283 ucnv_getUnicodeSet(test_converter, set, 284 whichSet, &status); 285 if (excludedCodePoints != NULL) { 286 uset_addAll(set, excludedCodePoints); 287 } 288 uset_freeze(set); 289 offset = 0; 290 cp = 0; 291 292 encIndex = findIndex(encodings[i]); 293 /* 294 * The following is almost, but not entirely, the same as 295 * resultsManually[encIndex] = 296 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); 297 * They might be different if the set contains strings, 298 * or if the utf8 string contains an illegal sequence. 299 * 300 * The UConverterSelector does not currently handle strings that can be 301 * converted, and it treats an illegal sequence as convertible 302 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. 303 */ 304 resultsManually[encIndex] = TRUE; 305 while(offset<length) { 306 U8_NEXT(utf8, offset, length, cp); 307 if (cp >= 0 && !uset_contains(set, cp)) { 308 resultsManually[encIndex] = FALSE; 309 break; 310 } 311 } 312 uset_close(set); 313 ucnv_close(test_converter); 314 } 315 return resultsManually; 316} 317 318/* closes res but does not free resultsManually */ 319static void verifyResult(UEnumeration* res, const UBool *resultsManually) { 320 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); 321 const char* name; 322 UErrorCode status = U_ZERO_ERROR; 323 int32_t i; 324 325 /* fill the bool for the selector results! */ 326 uprv_memset(resultsFromSystem, 0, gCountAvailable); 327 while ((name = uenum_next(res,NULL, &status)) != NULL) { 328 resultsFromSystem[findIndex(name)] = TRUE; 329 } 330 for(i = 0 ; i < gCountAvailable; i++) { 331 if(resultsManually[i] != resultsFromSystem[i]) { 332 log_err("failure in converter selector\n" 333 "converter %s had conflicting results -- manual: %d, system %d\n", 334 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); 335 } 336 } 337 uprv_free(resultsFromSystem); 338 uenum_close(res); 339} 340 341static UConverterSelector * 342serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { 343 char *new_buffer; 344 int32_t ser_len, ser_len2; 345 /* preflight */ 346 ser_len = ucnvsel_serialize(sel, NULL, 0, status); 347 if (*status != U_BUFFER_OVERFLOW_ERROR) { 348 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); 349 return sel; 350 } 351 new_buffer = (char *)uprv_malloc(ser_len); 352 *status = U_ZERO_ERROR; 353 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); 354 if (U_FAILURE(*status) || ser_len != ser_len2) { 355 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); 356 uprv_free(new_buffer); 357 return sel; 358 } 359 ucnvsel_close(sel); 360 uprv_free(*buffer); 361 *buffer = new_buffer; 362 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); 363 if (U_FAILURE(*status)) { 364 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); 365 return NULL; 366 } 367 return sel; 368} 369 370static void TestSelector() 371{ 372 TestText text; 373 USet* excluded_sets[3] = { NULL }; 374 int32_t i, testCaseIdx; 375 376 if (!getAvailableNames()) { 377 return; 378 } 379 if (!text_open(&text)) { 380 releaseAvailableNames();; 381 } 382 383 excluded_sets[0] = uset_openEmpty(); 384 for(i = 1 ; i < 3 ; i++) { 385 excluded_sets[i] = uset_open(i*30, i*30+500); 386 } 387 388 for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++) 389 { 390 int32_t excluded_set_id; 391 int32_t num_encodings; 392 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); 393 if (QUICK && num_encodings > 25) { 394 uprv_free((void *)encodings); 395 continue; 396 } 397 398 /* 399 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) 400 * 401 * This loop was replaced by the following statement because 402 * the loop made the test run longer without adding to the code coverage. 403 * The handling of the exclusion set is independent of the 404 * set of encodings, so there is no need to test every combination. 405 */ 406 excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets); 407 { 408 UConverterSelector *sel_rt, *sel_fb; 409 char *buffer_fb = NULL; 410 UErrorCode status = U_ZERO_ERROR; 411 sel_rt = ucnvsel_open(encodings, num_encodings, 412 excluded_sets[excluded_set_id], 413 UCNV_ROUNDTRIP_SET, &status); 414 if (num_encodings == gCountAvailable) { 415 /* test the special "all converters" parameter values */ 416 sel_fb = ucnvsel_open(NULL, 0, 417 excluded_sets[excluded_set_id], 418 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 419 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { 420 /* test that a NULL set gives the same results as an empty set */ 421 sel_fb = ucnvsel_open(encodings, num_encodings, 422 NULL, 423 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 424 } else { 425 sel_fb = ucnvsel_open(encodings, num_encodings, 426 excluded_sets[excluded_set_id], 427 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 428 } 429 if (U_FAILURE(status)) { 430 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); 431 ucnvsel_close(sel_rt); 432 uprv_free((void *)encodings); 433 continue; 434 } 435 436 text_reset(&text); 437 for (;;) { 438 UBool *manual_rt, *manual_fb; 439 static UChar utf16[10000]; 440 char *s; 441 int32_t length8, length16; 442 443 s = text_nextString(&text, &length8); 444 if (s == NULL || (QUICK && text.number > 3)) { 445 break; 446 } 447 448 manual_rt = getResultsManually(encodings, num_encodings, 449 s, length8, 450 excluded_sets[excluded_set_id], 451 UCNV_ROUNDTRIP_SET); 452 manual_fb = getResultsManually(encodings, num_encodings, 453 s, length8, 454 excluded_sets[excluded_set_id], 455 UCNV_ROUNDTRIP_AND_FALLBACK_SET); 456 /* UTF-8 with length */ 457 status = U_ZERO_ERROR; 458 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); 459 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); 460 /* UTF-8 NUL-terminated */ 461 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); 462 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); 463 464 u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status); 465 if (U_FAILURE(status)) { 466 log_err("error converting the test text (string %ld) to UTF-16 - %s\n", 467 (long)text.number, u_errorName(status)); 468 } else { 469 if (text.number == 0) { 470 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); 471 } 472 if (U_SUCCESS(status)) { 473 /* UTF-16 with length */ 474 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); 475 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); 476 /* UTF-16 NUL-terminated */ 477 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); 478 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); 479 } 480 } 481 482 uprv_free(manual_rt); 483 uprv_free(manual_fb); 484 } 485 ucnvsel_close(sel_rt); 486 ucnvsel_close(sel_fb); 487 uprv_free(buffer_fb); 488 } 489 uprv_free((void *)encodings); 490 } 491 492 releaseAvailableNames(); 493 text_close(&text); 494 for(i = 0 ; i < 3 ; i++) { 495 uset_close(excluded_sets[i]); 496 } 497} 498