ucnvseltst.c revision 85bf2e2fbc60a9f938064abc8127d61da7d19882
1/******************************************************************** 2 * Copyright (c) 1997-2009, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * 6 * File UCNVSELTST.C 7 * 8 * Modification History: 9 * Name Description 10 * MOHAMED ELDAWY Creation 11 ******************************************************************** 12 */ 13 14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ 15 16#include "ucnvseltst.h" 17 18#include <stdio.h> 19 20#include "unicode/utypes.h" 21#include "unicode/ucnvsel.h" 22#include "unicode/ustring.h" 23#include "cmemory.h" 24#include "cstring.h" 25 26#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 27 28#define FILENAME_BUFFER 1024 29 30#define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING 31 32static void TestSelector(void); 33 34void addCnvSelTest(TestNode** root) 35{ 36 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); 37} 38 39static const char **gAvailableNames = NULL; 40static int32_t gCountAvailable = 0; 41 42static UBool 43getAvailableNames() { 44 int32_t i; 45 if (gAvailableNames != NULL) { 46 return TRUE; 47 } 48 gCountAvailable = ucnv_countAvailable(); 49 if (gCountAvailable == 0) { 50 log_data_err("No converters available.\n"); 51 return FALSE; 52 } 53 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); 54 if (gAvailableNames == NULL) { 55 log_err("unable to allocate memory for %ld available converter names\n", 56 (long)gCountAvailable); 57 return FALSE; 58 } 59 for (i = 0; i < gCountAvailable; ++i) { 60 gAvailableNames[i] = ucnv_getAvailableName(i); 61 } 62 return TRUE; 63} 64 65static void 66releaseAvailableNames() { 67 uprv_free((void *)gAvailableNames); 68 gAvailableNames = NULL; 69 gCountAvailable = 0; 70} 71 72static const char ** 73getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { 74 const char **names; 75 int32_t i; 76 77 *pCount = 0; 78 if (count <= 0) { 79 return NULL; 80 } 81 names = (const char **)uprv_malloc(count * sizeof(char *)); 82 if (names == NULL) { 83 log_err("memory allocation error for %ld pointers\n", (long)count); 84 return NULL; 85 } 86 if (step == 0 && count > 0) { 87 step = 1; 88 } 89 for (i = 0; i < count; ++i) { 90 if (0 <= start && start < gCountAvailable) { 91 names[i] = gAvailableNames[start]; 92 start += step; 93 ++*pCount; 94 } 95 } 96 return names; 97} 98 99#if 0 100/* 101 * ucnvsel_open() does not support "no encodings": 102 * Given 0 encodings it will open a selector for all available ones. 103 */ 104static const char ** 105getNoEncodings(int32_t *pCount) { 106 *pCount = 0; 107 return NULL; 108} 109#endif 110 111static const char ** 112getOneEncoding(int32_t *pCount) { 113 return getEncodings(1, 0, 1, pCount); 114} 115 116static const char ** 117getFirstEvenEncodings(int32_t *pCount) { 118 return getEncodings(0, 2, 25, pCount); 119} 120 121static const char ** 122getMiddleEncodings(int32_t *pCount) { 123 return getEncodings(gCountAvailable - 12, 1, 22, pCount); 124} 125 126static const char ** 127getLastEncodings(int32_t *pCount) { 128 return getEncodings(gCountAvailable - 1, -1, 25, pCount); 129} 130 131static const char ** 132getSomeEncodings(int32_t *pCount) { 133 /* 20 evenly distributed */ 134 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); 135} 136 137static const char ** 138getEveryThirdEncoding(int32_t *pCount) { 139 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); 140} 141 142static const char ** 143getAllEncodings(int32_t *pCount) { 144 return getEncodings(0, 1, gCountAvailable, pCount); 145} 146 147typedef const char **GetEncodingsFn(int32_t *); 148 149static GetEncodingsFn *const getEncodingsFns[] = { 150 getOneEncoding, 151 getFirstEvenEncodings, 152 getMiddleEncodings, 153 getLastEncodings, 154 getSomeEncodings, 155 getEveryThirdEncoding, 156 getAllEncodings 157}; 158 159static FILE *fopenOrError(const char *filename) { 160 int32_t needLen; 161 FILE *f; 162 char fnbuf[FILENAME_BUFFER]; 163 const char* directory= ctest_dataSrcDir(); 164 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1; 165 if(needLen > FILENAME_BUFFER) { 166 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", 167 filename, needLen, FILENAME_BUFFER); 168 return NULL; 169 } 170 171 strcpy(fnbuf, directory); 172 strcat(fnbuf, TDSRCPATH); 173 strcat(fnbuf, filename); 174 175 f = fopen(fnbuf, "rb"); 176 177 if(f == NULL) { 178 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); 179 } 180 return f; 181} 182 183typedef struct TestText { 184 char *text, *textLimit; 185 char *limit; 186 int32_t number; 187} TestText; 188 189static void 190text_reset(TestText *tt) { 191 tt->limit = tt->text; 192 tt->number = 0; 193} 194 195static char * 196text_nextString(TestText *tt, int32_t *pLength) { 197 char *s = tt->limit; 198 if (s == tt->textLimit) { 199 /* we already delivered the last string */ 200 return NULL; 201 } else if (s == tt->text) { 202 /* first string */ 203 if ((tt->textLimit - tt->text) >= 3 && 204 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf 205 ) { 206 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ 207 } 208 } else { 209 /* skip the string terminator */ 210 ++s; 211 ++tt->number; 212 } 213 214 /* find the end of this string */ 215 tt->limit = uprv_strchr(s, 0); 216 *pLength = (int32_t)(tt->limit - s); 217 return s; 218} 219 220static UBool 221text_open(TestText *tt) { 222 FILE *f; 223 char *s; 224 int32_t length; 225 uprv_memset(tt, 0, sizeof(TestText)); 226 f = fopenOrError("ConverterSelectorTestUTF8.txt"); 227 if(!f) { 228 return FALSE; 229 } 230 fseek(f, 0, SEEK_END); 231 length = (int32_t)ftell(f); 232 fseek(f, 0, SEEK_SET); 233 tt->text = (char *)uprv_malloc(length + 1); 234 if (tt->text == NULL) { 235 fclose(f); 236 return FALSE; 237 } 238 if (length != fread(tt->text, 1, length, f)) { 239 log_err("error reading %ld bytes from test text file\n", (long)length); 240 length = 0; 241 uprv_free(tt->text); 242 } 243 fclose(f); 244 tt->textLimit = tt->text + length; 245 *tt->textLimit = 0; 246 /* replace all Unicode '#' (U+0023) with NUL */ 247 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} 248 text_reset(tt); 249 return TRUE; 250} 251 252static void 253text_close(TestText *tt) { 254 uprv_free(tt->text); 255} 256 257static int32_t findIndex(const char* converterName) { 258 int32_t i; 259 for (i = 0 ; i < gCountAvailable; i++) { 260 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { 261 return i; 262 } 263 } 264 return -1; 265} 266 267static UBool * 268getResultsManually(const char** encodings, int32_t num_encodings, 269 const char *utf8, int32_t length, 270 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { 271 UBool* resultsManually; 272 int32_t i; 273 274 resultsManually = (UBool*) uprv_malloc(gCountAvailable); 275 uprv_memset(resultsManually, 0, gCountAvailable); 276 277 for(i = 0 ; i < num_encodings ; i++) { 278 UErrorCode status = U_ZERO_ERROR; 279 /* get unicode set for that converter */ 280 USet* set; 281 UConverter* test_converter; 282 UChar32 cp; 283 int32_t encIndex, offset; 284 285 set = uset_openEmpty(); 286 test_converter = ucnv_open(encodings[i], &status); 287 ucnv_getUnicodeSet(test_converter, set, 288 whichSet, &status); 289 if (excludedCodePoints != NULL) { 290 uset_addAll(set, excludedCodePoints); 291 } 292 uset_freeze(set); 293 offset = 0; 294 cp = 0; 295 296 encIndex = findIndex(encodings[i]); 297 /* 298 * The following is almost, but not entirely, the same as 299 * resultsManually[encIndex] = 300 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); 301 * They might be different if the set contains strings, 302 * or if the utf8 string contains an illegal sequence. 303 * 304 * The UConverterSelector does not currently handle strings that can be 305 * converted, and it treats an illegal sequence as convertible 306 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. 307 */ 308 resultsManually[encIndex] = TRUE; 309 while(offset<length) { 310 U8_NEXT(utf8, offset, length, cp); 311 if (cp >= 0 && !uset_contains(set, cp)) { 312 resultsManually[encIndex] = FALSE; 313 break; 314 } 315 } 316 uset_close(set); 317 ucnv_close(test_converter); 318 } 319 return resultsManually; 320} 321 322/* closes res but does not free resultsManually */ 323static void verifyResult(UEnumeration* res, const UBool *resultsManually) { 324 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); 325 const char* name; 326 UErrorCode status = U_ZERO_ERROR; 327 int32_t i; 328 329 /* fill the bool for the selector results! */ 330 uprv_memset(resultsFromSystem, 0, gCountAvailable); 331 while ((name = uenum_next(res,NULL, &status)) != NULL) { 332 resultsFromSystem[findIndex(name)] = TRUE; 333 } 334 for(i = 0 ; i < gCountAvailable; i++) { 335 if(resultsManually[i] != resultsFromSystem[i]) { 336 log_err("failure in converter selector\n" 337 "converter %s had conflicting results -- manual: %d, system %d\n", 338 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); 339 } 340 } 341 uprv_free(resultsFromSystem); 342 uenum_close(res); 343} 344 345static UConverterSelector * 346serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { 347 char *new_buffer; 348 int32_t ser_len, ser_len2; 349 /* preflight */ 350 ser_len = ucnvsel_serialize(sel, NULL, 0, status); 351 if (*status != U_BUFFER_OVERFLOW_ERROR) { 352 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); 353 return sel; 354 } 355 new_buffer = (char *)uprv_malloc(ser_len); 356 *status = U_ZERO_ERROR; 357 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); 358 if (U_FAILURE(*status) || ser_len != ser_len2) { 359 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); 360 uprv_free(new_buffer); 361 return sel; 362 } 363 ucnvsel_close(sel); 364 uprv_free(*buffer); 365 *buffer = new_buffer; 366 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); 367 if (U_FAILURE(*status)) { 368 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); 369 return NULL; 370 } 371 return sel; 372} 373 374static void TestSelector() 375{ 376 TestText text; 377 USet* excluded_sets[3] = { NULL }; 378 int32_t i, testCaseIdx; 379 380 if (!getAvailableNames()) { 381 return; 382 } 383 if (!text_open(&text)) { 384 releaseAvailableNames();; 385 } 386 387 excluded_sets[0] = uset_openEmpty(); 388 for(i = 1 ; i < 3 ; i++) { 389 excluded_sets[i] = uset_open(i*30, i*30+500); 390 } 391 392 for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++) 393 { 394 int32_t excluded_set_id; 395 int32_t num_encodings; 396 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); 397 if (QUICK && num_encodings > 25) { 398 uprv_free((void *)encodings); 399 continue; 400 } 401 402 /* 403 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) 404 * 405 * This loop was replaced by the following statement because 406 * the loop made the test run longer without adding to the code coverage. 407 * The handling of the exclusion set is independent of the 408 * set of encodings, so there is no need to test every combination. 409 */ 410 excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets); 411 { 412 UConverterSelector *sel_rt, *sel_fb; 413 char *buffer_fb = NULL; 414 UErrorCode status = U_ZERO_ERROR; 415 sel_rt = ucnvsel_open(encodings, num_encodings, 416 excluded_sets[excluded_set_id], 417 UCNV_ROUNDTRIP_SET, &status); 418 if (num_encodings == gCountAvailable) { 419 /* test the special "all converters" parameter values */ 420 sel_fb = ucnvsel_open(NULL, 0, 421 excluded_sets[excluded_set_id], 422 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 423 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { 424 /* test that a NULL set gives the same results as an empty set */ 425 sel_fb = ucnvsel_open(encodings, num_encodings, 426 NULL, 427 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 428 } else { 429 sel_fb = ucnvsel_open(encodings, num_encodings, 430 excluded_sets[excluded_set_id], 431 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 432 } 433 if (U_FAILURE(status)) { 434 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); 435 ucnvsel_close(sel_rt); 436 uprv_free((void *)encodings); 437 continue; 438 } 439 440 text_reset(&text); 441 for (;;) { 442 UBool *manual_rt, *manual_fb; 443 static UChar utf16[10000]; 444 char *s; 445 int32_t length8, length16; 446 447 s = text_nextString(&text, &length8); 448 if (s == NULL || (QUICK && text.number > 3)) { 449 break; 450 } 451 452 manual_rt = getResultsManually(encodings, num_encodings, 453 s, length8, 454 excluded_sets[excluded_set_id], 455 UCNV_ROUNDTRIP_SET); 456 manual_fb = getResultsManually(encodings, num_encodings, 457 s, length8, 458 excluded_sets[excluded_set_id], 459 UCNV_ROUNDTRIP_AND_FALLBACK_SET); 460 /* UTF-8 with length */ 461 status = U_ZERO_ERROR; 462 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); 463 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); 464 /* UTF-8 NUL-terminated */ 465 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); 466 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); 467 468 u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status); 469 if (U_FAILURE(status)) { 470 log_err("error converting the test text (string %ld) to UTF-16 - %s\n", 471 (long)text.number, u_errorName(status)); 472 } else { 473 if (text.number == 0) { 474 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); 475 } 476 if (U_SUCCESS(status)) { 477 /* UTF-16 with length */ 478 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); 479 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); 480 /* UTF-16 NUL-terminated */ 481 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); 482 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); 483 } 484 } 485 486 uprv_free(manual_rt); 487 uprv_free(manual_fb); 488 } 489 ucnvsel_close(sel_rt); 490 ucnvsel_close(sel_fb); 491 uprv_free(buffer_fb); 492 } 493 uprv_free((void *)encodings); 494 } 495 496 releaseAvailableNames(); 497 text_close(&text); 498 for(i = 0 ; i < 3 ; i++) { 499 uset_close(excluded_sets[i]); 500 } 501} 502