1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/***************************************************************************** 7* 8* File CU_CAPITST.C 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda Ported for C API 13****************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <ctype.h> 19#include "unicode/uloc.h" 20#include "unicode/ucnv.h" 21#include "unicode/ucnv_err.h" 22#include "unicode/putil.h" 23#include "unicode/uset.h" 24#include "unicode/ustring.h" 25#include "ucnv_bld.h" /* for sizeof(UConverter) */ 26#include "cmemory.h" /* for UAlignedMemory */ 27#include "cintltst.h" 28#include "ccapitst.h" 29 30/* for not including "cstring.h" -begin*/ 31#ifdef U_WINDOWS 32# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2) 33#elif defined(POSIX) 34# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2) 35#else 36# define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2) 37#endif 38 39static int U_EXPORT2 40T_CString_stricmp(const char *str1, const char *str2) { 41 if(str1==NULL) { 42 if(str2==NULL) { 43 return 0; 44 } else { 45 return -1; 46 } 47 } else if(str2==NULL) { 48 return 1; 49 } else { 50 /* compare non-NULL strings lexically with lowercase */ 51 int rc; 52 unsigned char c1, c2; 53 for(;;) { 54 c1=(unsigned char)*str1; 55 c2=(unsigned char)*str2; 56 if(c1==0) { 57 if(c2==0) { 58 return 0; 59 } else { 60 return -1; 61 } 62 } else if(c2==0) { 63 return 1; 64 } else { 65 /* compare non-zero characters with lowercase */ 66 rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2); 67 if(rc!=0) { 68 return rc; 69 } 70 } 71 ++str1; 72 ++str2; 73 } 74 } 75} 76/* for not including "cstring.h" -end*/ 77 78#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 79 80#define NUM_CODEPAGE 1 81#define MAX_FILE_LEN 1024*20 82#define UCS_FILE_NAME_SIZE 512 83 84/*returns an action other than the one provided*/ 85static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 86static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 87 88static UConverter * 89cnv_open(const char *name, UErrorCode *pErrorCode) { 90 if(name!=NULL && name[0]=='*') { 91 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 92 } else { 93 return ucnv_open(name, pErrorCode); 94 } 95} 96 97 98static void ListNames(void); 99static void TestFlushCache(void); 100static void TestDuplicateAlias(void); 101static void TestCCSID(void); 102static void TestJ932(void); 103static void TestJ1968(void); 104static void TestLMBCSMaxChar(void); 105 106#if !UCONFIG_NO_LEGACY_CONVERSION 107static void TestConvertSafeCloneCallback(void); 108#endif 109 110static void TestEBCDICSwapLFNL(void); 111static void TestConvertEx(void); 112static void TestConvertExFromUTF8(void); 113static void TestConvertExFromUTF8_C5F0(void); 114static void TestConvertAlgorithmic(void); 115 void TestDefaultConverterError(void); /* defined in cctest.c */ 116 void TestDefaultConverterSet(void); /* defined in cctest.c */ 117static void TestToUCountPending(void); 118static void TestFromUCountPending(void); 119static void TestDefaultName(void); 120static void TestCompareNames(void); 121static void TestSubstString(void); 122static void InvalidArguments(void); 123static void TestGetName(void); 124static void TestUTFBOM(void); 125 126void addTestConvert(TestNode** root); 127 128void addTestConvert(TestNode** root) 129{ 130 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 131 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 132 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 133 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 134 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 135 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 136#if !UCONFIG_NO_LEGACY_CONVERSION 137 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 138#endif 139 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 140 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 141 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 142#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 143 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 144#endif 145 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 146 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 147 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 148 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 149 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 150 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 151 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); 152#if !UCONFIG_NO_FILE_IO 153 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 154 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 155#endif 156 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 157 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 158 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 159 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 160 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 161 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 162} 163 164static void ListNames(void) { 165 UErrorCode err = U_ZERO_ERROR; 166 int32_t testLong1 = 0; 167 const char* available_conv; 168 UEnumeration *allNamesEnum = NULL; 169 int32_t allNamesCount = 0; 170 uint16_t count; 171 172 log_verbose("Testing ucnv_openAllNames()..."); 173 allNamesEnum = ucnv_openAllNames(&err); 174 if(U_FAILURE(err)) { 175 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 176 } 177 else { 178 const char *string = NULL; 179 int32_t len = 0; 180 int32_t count1 = 0; 181 int32_t count2 = 0; 182 allNamesCount = uenum_count(allNamesEnum, &err); 183 while ((string = uenum_next(allNamesEnum, &len, &err))) { 184 count1++; 185 log_verbose("read \"%s\", length %i\n", string, len); 186 } 187 if (U_FAILURE(err)) { 188 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 189 err = U_ZERO_ERROR; 190 } 191 uenum_reset(allNamesEnum, &err); 192 while ((string = uenum_next(allNamesEnum, &len, &err))) { 193 count2++; 194 ucnv_close(ucnv_open(string, &err)); 195 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 196 err = U_ZERO_ERROR; 197 } 198 if (count1 != count2) { 199 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 200 } 201 } 202 uenum_close(allNamesEnum); 203 err = U_ZERO_ERROR; 204 205 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 206 207 log_verbose("Testing ucnv_countAvailable()..."); 208 209 testLong1=ucnv_countAvailable(); 210 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 211 212 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 213 214 available_conv = ucnv_getAvailableName(testLong1); 215 /*test ucnv_getAvailableName with err condition*/ 216 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 217 available_conv = ucnv_getAvailableName(-1); 218 if(available_conv != NULL){ 219 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 220 } 221 222 /* Test ucnv_countAliases() etc. */ 223 count = ucnv_countAliases("utf-8", &err); 224 if(U_FAILURE(err)) { 225 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 226 } else if(count <= 0) { 227 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 228 } else { 229 /* try to get the aliases individually */ 230 const char *alias; 231 alias = ucnv_getAlias("utf-8", 0, &err); 232 if(U_FAILURE(err)) { 233 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 234 } else if(strcmp("UTF-8", alias) != 0) { 235 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 236 } else { 237 uint16_t aliasNum; 238 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 239 alias = ucnv_getAlias("utf-8", aliasNum, &err); 240 if(U_FAILURE(err)) { 241 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 242 } else if(strlen(alias) > 20) { 243 /* sanity check */ 244 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 245 } else { 246 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 247 } 248 } 249 if(U_SUCCESS(err)) { 250 /* try to fill an array with all aliases */ 251 const char **aliases; 252 aliases=(const char **)malloc(count * sizeof(const char *)); 253 if(aliases != 0) { 254 ucnv_getAliases("utf-8", aliases, &err); 255 if(U_FAILURE(err)) { 256 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 257 } else { 258 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 259 /* compare the pointers with the ones returned individually */ 260 alias = ucnv_getAlias("utf-8", aliasNum, &err); 261 if(U_FAILURE(err)) { 262 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 263 } else if(aliases[aliasNum] != alias) { 264 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 265 } 266 } 267 } 268 free((char **)aliases); 269 } 270 } 271 } 272 } 273} 274 275 276static void TestConvert() 277{ 278#if !UCONFIG_NO_LEGACY_CONVERSION 279 char myptr[4]; 280 char save[4]; 281 int32_t testLong1 = 0; 282 uint16_t rest = 0; 283 int32_t len = 0; 284 int32_t x = 0; 285 FILE* ucs_file_in = NULL; 286 UChar BOM = 0x0000; 287 UChar myUChar = 0x0000; 288 char* mytarget; /* [MAX_FILE_LEN] */ 289 char* mytarget_1; 290 char* mytarget_use; 291 UChar* consumedUni = NULL; 292 char* consumed = NULL; 293 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 294 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 295 UChar* ucs_file_buffer_use; 296 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 297 UChar* my_ucs_file_buffer_1; 298 int8_t ii = 0; 299 int32_t j = 0; 300 uint16_t codepage_index = 0; 301 int32_t cp = 0; 302 UErrorCode err = U_ZERO_ERROR; 303 char ucs_file_name[UCS_FILE_NAME_SIZE]; 304 UConverterFromUCallback MIA1, MIA1_2; 305 UConverterToUCallback MIA2, MIA2_2; 306 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 307 UConverter* someConverters[5]; 308 UConverter* myConverter = 0; 309 UChar* displayname = 0; 310 311 const char* locale; 312 313 UChar* uchar1 = 0; 314 UChar* uchar2 = 0; 315 UChar* uchar3 = 0; 316 int32_t targetcapacity2; 317 int32_t targetcapacity; 318 int32_t targetsize; 319 int32_t disnamelen; 320 321 const UChar* tmp_ucs_buf; 322 const UChar* tmp_consumedUni=NULL; 323 const char* tmp_mytarget_use; 324 const char* tmp_consumed; 325 326 /****************************************************************** 327 Checking Unicode -> ksc 328 ******************************************************************/ 329 330 const char* CodePagesToTest[NUM_CODEPAGE] = 331 { 332 "ibm-949_P110-1999" 333 334 335 }; 336 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 337 { 338 949 339 }; 340 341 342 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 343 { 344 1 345 346 }; 347 348 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 349 { 350 2 351 352 }; 353 354 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 355 { 356 0xAFFE 357 }; 358 359 const char* CodePagesTestFiles[NUM_CODEPAGE] = 360 { 361 "uni-text.bin" 362 }; 363 364 365 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 366 { 367 UCNV_IBM 368 369 }; 370 371 const char* CodePagesLocale[NUM_CODEPAGE] = 372 { 373 "ko_KR" 374 }; 375 376 UConverterFromUCallback oldFromUAction = NULL; 377 UConverterToUCallback oldToUAction = NULL; 378 const void* oldFromUContext = NULL; 379 const void* oldToUContext = NULL; 380 381 /* Allocate memory */ 382 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 383 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 384 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 385 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 386 387 ucs_file_buffer_use = ucs_file_buffer; 388 mytarget_1=mytarget; 389 mytarget_use = mytarget; 390 my_ucs_file_buffer_1=my_ucs_file_buffer; 391 392 /* flush the converter cache to get a consistent state before the flushing is tested */ 393 ucnv_flushCache(); 394 395 /*Testing ucnv_openU()*/ 396 { 397 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 398 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 399 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 400 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 401 UChar illegalName[100]; 402 UConverter *converter=NULL; 403 err=U_ZERO_ERROR; 404 converter=ucnv_openU(converterName, &err); 405 if(U_FAILURE(err)){ 406 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 407 } 408 ucnv_close(converter); 409 err=U_ZERO_ERROR; 410 converter=ucnv_openU(NULL, &err); 411 if(U_FAILURE(err)){ 412 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 413 } 414 ucnv_close(converter); 415 /*testing with error value*/ 416 err=U_ILLEGAL_ARGUMENT_ERROR; 417 converter=ucnv_openU(converterName, &err); 418 if(!(converter == NULL)){ 419 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 420 } 421 ucnv_close(converter); 422 err=U_ZERO_ERROR; 423 u_uastrcpy(illegalName, ""); 424 u_uastrcpy(illegalName, illegalNameChars); 425 ucnv_openU(illegalName, &err); 426 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 427 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 428 } 429 430 err=U_ZERO_ERROR; 431 ucnv_openU(firstSortedName, &err); 432 if(err!=U_FILE_ACCESS_ERROR){ 433 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 434 } 435 436 err=U_ZERO_ERROR; 437 ucnv_openU(lastSortedName, &err); 438 if(err!=U_FILE_ACCESS_ERROR){ 439 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 440 } 441 442 err=U_ZERO_ERROR; 443 } 444 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 445 { 446 UConverter *cnv=NULL; 447 err=U_ZERO_ERROR; 448 cnv=ucnv_open("ibm-949,Madhu", &err); 449 if(U_FAILURE(err)){ 450 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 451 } 452 ucnv_close(cnv); 453 454 } 455 /*Testing ucnv_convert()*/ 456 { 457 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 458 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 459 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 460 char *target=0; 461 sourceLimit=sizeof(source)/sizeof(source[0]); 462 err=U_ZERO_ERROR; 463 targetLimit=0; 464 465 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 466 if(err == U_BUFFER_OVERFLOW_ERROR){ 467 err=U_ZERO_ERROR; 468 targetLimit=targetCapacity+1; 469 target=(char*)malloc(sizeof(char) * targetLimit); 470 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 471 } 472 if(U_FAILURE(err)){ 473 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 474 } 475 else { 476 for(i=0; i<targetCapacity; i++){ 477 if(target[i] != expectedTarget[i]){ 478 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 479 } 480 } 481 482 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 483 if(U_FAILURE(err) || i!=7){ 484 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 485 u_errorName(err), i); 486 } 487 488 /*Test error conditions*/ 489 err=U_ZERO_ERROR; 490 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 491 if(i !=0){ 492 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 493 } 494 495 err=U_ILLEGAL_ARGUMENT_ERROR; 496 sourceLimit=sizeof(source)/sizeof(source[0]); 497 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 498 if(i !=0 ){ 499 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 500 } 501 502 err=U_ZERO_ERROR; 503 sourceLimit=sizeof(source)/sizeof(source[0]); 504 targetLimit=0; 505 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 506 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 507 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 508 } 509 err=U_ZERO_ERROR; 510 free(target); 511 } 512 } 513 514 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 515 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 516 err=U_ILLEGAL_ARGUMENT_ERROR; 517 if(ucnv_open(NULL, &err) != NULL){ 518 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 519 } 520 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 521 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 522 } 523 err=U_ZERO_ERROR; 524 525 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 526 log_verbose("\n---Testing ucnv_open default...\n"); 527 someConverters[0] = ucnv_open(NULL,&err); 528 someConverters[1] = ucnv_open(NULL,&err); 529 someConverters[2] = ucnv_open("utf8", &err); 530 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 531 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 532 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 533 534 /* Testing ucnv_getName()*/ 535 /*default code page */ 536 ucnv_getName(someConverters[0], &err); 537 if(U_FAILURE(err)) { 538 log_data_err("getName[0] failed\n"); 539 } else { 540 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 541 } 542 ucnv_getName(someConverters[1], &err); 543 if(U_FAILURE(err)) { 544 log_data_err("getName[1] failed\n"); 545 } else { 546 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 547 } 548 549 ucnv_close(someConverters[0]); 550 ucnv_close(someConverters[1]); 551 ucnv_close(someConverters[2]); 552 ucnv_close(someConverters[3]); 553 554 555 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 556 { 557 int32_t i = 0; 558 559 err = U_ZERO_ERROR; 560#ifdef U_TOPSRCDIR 561 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 562#else 563 strcpy(ucs_file_name, loadTestData(&err)); 564 565 if(U_FAILURE(err)){ 566 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 567 return; 568 } 569 570 { 571 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 572 573 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 574 *(index+1)=0; 575 } 576 } 577 578 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 579#endif 580 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 581 582 ucs_file_in = fopen(ucs_file_name,"rb"); 583 if (!ucs_file_in) 584 { 585 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 586 break; 587 } 588 589 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 590 591 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 592 /* ucnv_flushCache(); */ 593 myConverter =ucnv_open( "ibm-949", &err); 594 if (!myConverter || U_FAILURE(err)) 595 { 596 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 597 fclose(ucs_file_in); 598 break; 599 } 600 601 /*testing for ucnv_getName() */ 602 log_verbose("Testing ucnv_getName()...\n"); 603 ucnv_getName(myConverter, &err); 604 if(U_FAILURE(err)) 605 log_err("Error in getName\n"); 606 else 607 { 608 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 609 } 610 if (ctest_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 611 log_err("getName failed\n"); 612 else 613 log_verbose("getName ok\n"); 614 /*Test getName with error condition*/ 615 { 616 const char* name=0; 617 err=U_ILLEGAL_ARGUMENT_ERROR; 618 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 619 name=ucnv_getName(myConverter, &err); 620 if(name != NULL){ 621 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 622 } 623 err=U_ZERO_ERROR; 624 } 625 626 627 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 628 629 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 630 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 631 log_verbose("Max byte per character OK\n"); 632 else 633 log_err("Max byte per character failed\n"); 634 635 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 636 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 637 log_verbose("Min byte per character OK\n"); 638 else 639 log_err("Min byte per character failed\n"); 640 641 642 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 643 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 644 ii=4; 645 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 646 if (ii <= 0) { 647 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 648 } 649 650 for(x=0;x<ii;x++) 651 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 652 if (rest==CodePagesSubstitutionChars[codepage_index]) 653 log_verbose("Substitution character ok\n"); 654 else 655 log_err("Substitution character failed.\n"); 656 657 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 658 ucnv_setSubstChars(myConverter, myptr, ii, &err); 659 if (U_FAILURE(err)) 660 { 661 log_err("FAILURE! %s\n", myErrorName(err)); 662 } 663 ucnv_getSubstChars(myConverter,save, &ii, &err); 664 if (U_FAILURE(err)) 665 { 666 log_err("FAILURE! %s\n", myErrorName(err)); 667 } 668 669 if (strncmp(save, myptr, ii)) 670 log_err("Saved substitution character failed\n"); 671 else 672 log_verbose("Saved substitution character ok\n"); 673 674 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 675 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 676 ii=1; 677 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 678 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 679 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 680 } 681 err=U_ZERO_ERROR; 682 ii=4; 683 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 684 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 685 ucnv_setSubstChars(myConverter, myptr, 0, &err); 686 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 687 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 688 } 689 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 690 strcpy(myptr, "abc"); 691 ucnv_setSubstChars(myConverter, myptr, ii, &err); 692 err=U_ZERO_ERROR; 693 ucnv_getSubstChars(myConverter, save, &ii, &err); 694 if(strncmp(save, myptr, ii) == 0){ 695 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 696 } 697 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 698 err=U_ZERO_ERROR; 699 strcpy(myptr, "abc"); 700 ucnv_setSubstChars(myConverter, myptr, ii, &err); 701 err=U_ILLEGAL_ARGUMENT_ERROR; 702 ucnv_getSubstChars(myConverter, save, &ii, &err); 703 if(strncmp(save, myptr, ii) == 0){ 704 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 705 } 706 err=U_ZERO_ERROR; 707 /*------*/ 708 709#ifdef U_ENABLE_GENERIC_ISO_2022 710 /*resetState ucnv_reset()*/ 711 log_verbose("\n---Testing ucnv_reset()..\n"); 712 ucnv_reset(myConverter); 713 { 714 UChar32 c; 715 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 716 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 717 UConverter *cnv=ucnv_open("ISO_2022", &err); 718 if(U_FAILURE(err)) { 719 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 720 } 721 c=ucnv_getNextUChar(cnv, &source, limit, &err); 722 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 723 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 724 } 725 ucnv_reset(cnv); 726 ucnv_close(cnv); 727 728 } 729#endif 730 731 /*getDisplayName*/ 732 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 733 locale=CodePagesLocale[codepage_index]; 734 len=0; 735 displayname=NULL; 736 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 737 if(err==U_BUFFER_OVERFLOW_ERROR) { 738 err=U_ZERO_ERROR; 739 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 740 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 741 if(U_FAILURE(err)) { 742 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 743 } 744 else { 745 log_verbose(" getDisplayName o.k.\n"); 746 } 747 free(displayname); 748 displayname=NULL; 749 } 750 else { 751 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 752 } 753 /*test ucnv_getDiaplayName with error condition*/ 754 err= U_ILLEGAL_ARGUMENT_ERROR; 755 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 756 if( len !=0 ){ 757 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 758 } 759 /*test ucnv_getDiaplayName with error condition*/ 760 err=U_ZERO_ERROR; 761 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 762 if( len !=0 || U_SUCCESS(err)){ 763 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 764 } 765 err=U_ZERO_ERROR; 766 767 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 768 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 769 770 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 771 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 772 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 773 { 774 log_err("FAILURE! %s\n", myErrorName(err)); 775 } 776 777 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 778 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 779 log_err("get From UCallBack failed\n"); 780 else 781 log_verbose("get From UCallBack ok\n"); 782 783 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 784 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 785 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 786 { 787 log_err("FAILURE! %s\n", myErrorName(err)); 788 } 789 790 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 791 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 792 log_err("get From UCallBack action failed\n"); 793 else 794 log_verbose("get From UCallBack action ok\n"); 795 796 /*testing ucnv_setToUCallBack with error conditions*/ 797 err=U_ILLEGAL_ARGUMENT_ERROR; 798 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 799 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 800 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 801 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 802 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 803 } 804 err=U_ZERO_ERROR; 805 806 807 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 808 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 809 810 log_verbose("\n---Testing setTo UCallBack...\n"); 811 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 812 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 813 { 814 log_err("FAILURE! %s\n", myErrorName(err)); 815 } 816 817 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 818 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 819 log_err("To UCallBack failed\n"); 820 else 821 log_verbose("To UCallBack ok\n"); 822 823 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 824 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 825 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 826 { log_err("FAILURE! %s\n", myErrorName(err)); } 827 828 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 829 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 830 log_err("To UCallBack failed\n"); 831 else 832 log_verbose("To UCallBack ok\n"); 833 834 /*testing ucnv_setToUCallBack with error conditions*/ 835 err=U_ILLEGAL_ARGUMENT_ERROR; 836 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 837 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 838 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 839 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 840 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 841 } 842 err=U_ZERO_ERROR; 843 844 845 /*getcodepageid testing ucnv_getCCSID() */ 846 log_verbose("\n----Testing getCCSID....\n"); 847 cp = ucnv_getCCSID(myConverter,&err); 848 if (U_FAILURE(err)) 849 { 850 log_err("FAILURE!..... %s\n", myErrorName(err)); 851 } 852 if (cp != CodePageNumberToTest[codepage_index]) 853 log_err("Codepage number test failed\n"); 854 else 855 log_verbose("Codepage number test OK\n"); 856 857 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 858 err=U_ILLEGAL_ARGUMENT_ERROR; 859 if( ucnv_getCCSID(myConverter,&err) != -1){ 860 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 861 } 862 err=U_ZERO_ERROR; 863 864 /*getCodepagePlatform testing ucnv_getPlatform()*/ 865 log_verbose("\n---Testing getCodepagePlatform ..\n"); 866 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 867 log_err("Platform codepage test failed\n"); 868 else 869 log_verbose("Platform codepage test ok\n"); 870 871 if (U_FAILURE(err)) 872 { 873 log_err("FAILURE! %s\n", myErrorName(err)); 874 } 875 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 876 err= U_ILLEGAL_ARGUMENT_ERROR; 877 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 878 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 879 } 880 err=U_ZERO_ERROR; 881 882 883 /*Reads the BOM*/ 884 fread(&BOM, sizeof(UChar), 1, ucs_file_in); 885 if (BOM!=0xFEFF && BOM!=0xFFFE) 886 { 887 log_err("File Missing BOM...Bailing!\n"); 888 fclose(ucs_file_in); 889 break; 890 } 891 892 893 /*Reads in the file*/ 894 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 895 { 896 myUChar = ucs_file_buffer[i-1]; 897 898 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 899 } 900 901 myUChar = ucs_file_buffer[i-1]; 902 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 903 904 905 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 906 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 907 908 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 909 u_uastrcpy(uchar1,""); 910 u_strncpy(uchar1,ucs_file_buffer,i); 911 uchar1[i] = 0; 912 913 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 914 u_uastrcpy(uchar3,""); 915 u_strncpy(uchar3,ucs_file_buffer,i); 916 uchar3[i] = 0; 917 918 /*Calls the Conversion Routine */ 919 testLong1 = MAX_FILE_LEN; 920 log_verbose("\n---Testing ucnv_fromUChars()\n"); 921 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 922 if (U_FAILURE(err)) 923 { 924 log_err("\nFAILURE...%s\n", myErrorName(err)); 925 } 926 else 927 log_verbose(" ucnv_fromUChars() o.k.\n"); 928 929 /*test the conversion routine */ 930 log_verbose("\n---Testing ucnv_toUChars()\n"); 931 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 932 targetcapacity2=0; 933 targetsize = ucnv_toUChars(myConverter, 934 NULL, 935 targetcapacity2, 936 output_cp_buffer, 937 strlen(output_cp_buffer), 938 &err); 939 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 940 941 if(err==U_BUFFER_OVERFLOW_ERROR) 942 { 943 err=U_ZERO_ERROR; 944 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 945 targetsize = ucnv_toUChars(myConverter, 946 uchar2, 947 targetsize+1, 948 output_cp_buffer, 949 strlen(output_cp_buffer), 950 &err); 951 952 if(U_FAILURE(err)) 953 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 954 else 955 log_verbose(" ucnv_toUChars() o.k.\n"); 956 957 if(u_strcmp(uchar1,uchar2)!=0) 958 log_err("equality test failed with conversion routine\n"); 959 } 960 else 961 { 962 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 963 } 964 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 965 err=U_ILLEGAL_ARGUMENT_ERROR; 966 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 967 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 968 if (targetcapacity !=0) { 969 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 970 } 971 err=U_ZERO_ERROR; 972 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 973 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 974 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 975 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 976 } 977 err=U_ZERO_ERROR; 978 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 979 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 980 if (targetcapacity !=0) { 981 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 982 } 983 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 984 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 985 if (err != U_BUFFER_OVERFLOW_ERROR) { 986 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 987 } 988 /*toUChars with error conditions*/ 989 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 990 if(targetsize != 0){ 991 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 992 } 993 err=U_ZERO_ERROR; 994 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 995 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 996 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 997 } 998 err=U_ZERO_ERROR; 999 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 1000 if (targetsize !=0) { 1001 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 1002 } 1003 targetcapacity2=0; 1004 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 1005 if (err != U_STRING_NOT_TERMINATED_WARNING) { 1006 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 1007 u_errorName(err)); 1008 } 1009 err=U_ZERO_ERROR; 1010 /*-----*/ 1011 1012 1013 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 1014 /*Clean up re-usable vars*/ 1015 j=0; 1016 log_verbose("Testing ucnv_fromUnicode().....\n"); 1017 tmp_ucs_buf=ucs_file_buffer_use; 1018 ucnv_fromUnicode(myConverter, &mytarget_1, 1019 mytarget + MAX_FILE_LEN, 1020 &tmp_ucs_buf, 1021 ucs_file_buffer_use+i, 1022 NULL, 1023 TRUE, 1024 &err); 1025 consumedUni = (UChar*)tmp_consumedUni; 1026 1027 if (U_FAILURE(err)) 1028 { 1029 log_err("FAILURE! %s\n", myErrorName(err)); 1030 } 1031 else 1032 log_verbose("ucnv_fromUnicode() o.k.\n"); 1033 1034 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 1035 log_verbose("Testing ucnv_toUnicode().....\n"); 1036 tmp_mytarget_use=mytarget_use; 1037 tmp_consumed = consumed; 1038 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 1039 my_ucs_file_buffer + MAX_FILE_LEN, 1040 &tmp_mytarget_use, 1041 mytarget_use + (mytarget_1 - mytarget), 1042 NULL, 1043 FALSE, 1044 &err); 1045 consumed = (char*)tmp_consumed; 1046 if (U_FAILURE(err)) 1047 { 1048 log_err("FAILURE! %s\n", myErrorName(err)); 1049 } 1050 else 1051 log_verbose("ucnv_toUnicode() o.k.\n"); 1052 1053 1054 log_verbose("\n---Testing RoundTrip ...\n"); 1055 1056 1057 u_strncpy(uchar3, my_ucs_file_buffer,i); 1058 uchar3[i] = 0; 1059 1060 if(u_strcmp(uchar1,uchar3)==0) 1061 log_verbose("Equality test o.k.\n"); 1062 else 1063 log_err("Equality test failed\n"); 1064 1065 /*sanity compare */ 1066 if(uchar2 == NULL) 1067 { 1068 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1069 } 1070 else 1071 { 1072 if(u_strcmp(uchar2, uchar3)==0) 1073 log_verbose("Equality test o.k.\n"); 1074 else 1075 log_err("Equality test failed\n"); 1076 } 1077 1078 fclose(ucs_file_in); 1079 ucnv_close(myConverter); 1080 if (uchar1 != 0) free(uchar1); 1081 if (uchar2 != 0) free(uchar2); 1082 if (uchar3 != 0) free(uchar3); 1083 } 1084 1085 free((void*)mytarget); 1086 free((void*)output_cp_buffer); 1087 free((void*)ucs_file_buffer); 1088 free((void*)my_ucs_file_buffer); 1089#endif 1090} 1091 1092static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1093{ 1094 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1095} 1096 1097 1098static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1099{ 1100 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1101} 1102 1103static void TestFlushCache(void) { 1104#if !UCONFIG_NO_LEGACY_CONVERSION 1105 UErrorCode err = U_ZERO_ERROR; 1106 UConverter* someConverters[5]; 1107 int flushCount = 0; 1108 1109 /* flush the converter cache to get a consistent state before the flushing is tested */ 1110 ucnv_flushCache(); 1111 1112 /*Testing ucnv_open()*/ 1113 /* Note: These converters have been chosen because they do NOT 1114 encode the Latin characters (U+0041, ...), and therefore are 1115 highly unlikely to be chosen as system default codepages */ 1116 1117 someConverters[0] = ucnv_open("ibm-1047", &err); 1118 if (U_FAILURE(err)) { 1119 log_data_err("FAILURE! %s\n", myErrorName(err)); 1120 } 1121 1122 someConverters[1] = ucnv_open("ibm-1047", &err); 1123 if (U_FAILURE(err)) { 1124 log_data_err("FAILURE! %s\n", myErrorName(err)); 1125 } 1126 1127 someConverters[2] = ucnv_open("ibm-1047", &err); 1128 if (U_FAILURE(err)) { 1129 log_data_err("FAILURE! %s\n", myErrorName(err)); 1130 } 1131 1132 someConverters[3] = ucnv_open("gb18030", &err); 1133 if (U_FAILURE(err)) { 1134 log_data_err("FAILURE! %s\n", myErrorName(err)); 1135 } 1136 1137 someConverters[4] = ucnv_open("ibm-954", &err); 1138 if (U_FAILURE(err)) { 1139 log_data_err("FAILURE! %s\n", myErrorName(err)); 1140 } 1141 1142 1143 /* Testing ucnv_flushCache() */ 1144 log_verbose("\n---Testing ucnv_flushCache...\n"); 1145 if ((flushCount=ucnv_flushCache())==0) 1146 log_verbose("Flush cache ok\n"); 1147 else 1148 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1149 1150 /*testing ucnv_close() and ucnv_flushCache() */ 1151 ucnv_close(someConverters[0]); 1152 ucnv_close(someConverters[1]); 1153 1154 if ((flushCount=ucnv_flushCache())==0) 1155 log_verbose("Flush cache ok\n"); 1156 else 1157 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1158 1159 ucnv_close(someConverters[2]); 1160 ucnv_close(someConverters[3]); 1161 1162 if ((flushCount=ucnv_flushCache())==2) 1163 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1164 else 1165 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1166 __LINE__, 1167 flushCount); 1168 1169 ucnv_close(someConverters[4]); 1170 if ( (flushCount=ucnv_flushCache())==1) 1171 log_verbose("Flush cache ok\n"); 1172 else 1173 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1174#endif 1175} 1176 1177/** 1178 * Test the converter alias API, specifically the fuzzy matching of 1179 * alias names and the alias table integrity. Make sure each 1180 * converter has at least one alias (itself), and that its listed 1181 * aliases map back to itself. Check some hard-coded UTF-8 and 1182 * ISO_2022 aliases to make sure they work. 1183 */ 1184static void TestAlias() { 1185 int32_t i, ncnv; 1186 UErrorCode status = U_ZERO_ERROR; 1187 1188 /* Predetermined aliases that we expect to map back to ISO_2022 1189 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1190 const char* ISO_2022_NAMES[] = 1191 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1192 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1193 int32_t ISO_2022_NAMES_LENGTH = 1194 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); 1195 const char *UTF8_NAMES[] = 1196 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1197 "utf_8", "ibm1208", "cp1208" }; 1198 int32_t UTF8_NAMES_LENGTH = 1199 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); 1200 1201 struct { 1202 const char *name; 1203 const char *alias; 1204 } CONVERTERS_NAMES[] = { 1205 { "UTF-32BE", "UTF32_BigEndian" }, 1206 { "UTF-32LE", "UTF32_LittleEndian" }, 1207 { "UTF-32", "ISO-10646-UCS-4" }, 1208 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1209 { "UTF-32", "ucs-4" } 1210 }; 1211 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1212 1213 /* When there are bugs in gencnval or in ucnv_io, converters can 1214 appear to have no aliases. */ 1215 ncnv = ucnv_countAvailable(); 1216 log_verbose("%d converters\n", ncnv); 1217 for (i=0; i<ncnv; ++i) { 1218 const char *name = ucnv_getAvailableName(i); 1219 const char *alias0; 1220 uint16_t na = ucnv_countAliases(name, &status); 1221 uint16_t j; 1222 UConverter *cnv; 1223 1224 if (na == 0) { 1225 log_err("FAIL: Converter \"%s\" (i=%d)" 1226 " has no aliases; expect at least one\n", 1227 name, i); 1228 continue; 1229 } 1230 cnv = ucnv_open(name, &status); 1231 if (U_FAILURE(status)) { 1232 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1233 " can't be opened.\n", 1234 name, i); 1235 } 1236 else { 1237 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1238 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1239 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1240 "The should be the same\n", 1241 name, ucnv_getName(cnv, &status)); 1242 } 1243 } 1244 ucnv_close(cnv); 1245 1246 status = U_ZERO_ERROR; 1247 alias0 = ucnv_getAlias(name, 0, &status); 1248 for (j=1; j<na; ++j) { 1249 const char *alias; 1250 /* Make sure each alias maps back to the the same list of 1251 aliases. Assume that if alias 0 is the same, the whole 1252 list is the same (this should always be true). */ 1253 const char *mapBack; 1254 1255 status = U_ZERO_ERROR; 1256 alias = ucnv_getAlias(name, j, &status); 1257 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1258 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1259 } 1260 1261 if (alias == NULL) { 1262 log_err("FAIL: Converter \"%s\" -> " 1263 "alias[%d]=NULL\n", 1264 name, j); 1265 continue; 1266 } 1267 1268 mapBack = ucnv_getAlias(alias, 0, &status); 1269 1270 if (mapBack == NULL) { 1271 log_err("FAIL: Converter \"%s\" -> " 1272 "alias[%d]=\"%s\" -> " 1273 "alias[0]=NULL, exp. \"%s\"\n", 1274 name, j, alias, alias0); 1275 continue; 1276 } 1277 1278 if (0 != strcmp(alias0, mapBack)) { 1279 int32_t idx; 1280 UBool foundAlias = FALSE; 1281 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1282 /* Make sure that we only get this mismapping when there is 1283 an ambiguous alias, and the other converter has this alias too. */ 1284 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1285 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1286 foundAlias = TRUE; 1287 break; 1288 } 1289 } 1290 } 1291 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1292 1293 if (!foundAlias) { 1294 log_err("FAIL: Converter \"%s\" -> " 1295 "alias[%d]=\"%s\" -> " 1296 "alias[0]=\"%s\", exp. \"%s\"\n", 1297 name, j, alias, mapBack, alias0); 1298 } 1299 } 1300 } 1301 } 1302 1303 1304 /* Check a list of predetermined aliases that we expect to map 1305 * back to ISO_2022 and UTF-8. */ 1306 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1307 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1308 if(!mapBack) { 1309 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1310 continue; 1311 } 1312 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1313 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1314 ISO_2022_NAMES[i], mapBack); 1315 } 1316 } 1317 1318 1319 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1320 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1321 if(!mapBack) { 1322 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1323 continue; 1324 } 1325 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1326 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1327 UTF8_NAMES[i], mapBack); 1328 } 1329 } 1330 1331 /* 1332 * Check a list of predetermined aliases that we expect to map 1333 * back to predermined converter names. 1334 */ 1335 1336 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1337 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1338 if(!mapBack) { 1339 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1340 continue; 1341 } 1342 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1343 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1344 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1345 } 1346 } 1347 1348} 1349 1350static void TestDuplicateAlias(void) { 1351 const char *alias; 1352 UErrorCode status = U_ZERO_ERROR; 1353 1354 status = U_ZERO_ERROR; 1355 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1356 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1357 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1358 } 1359 status = U_ZERO_ERROR; 1360 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1361 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1362 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1363 } 1364 status = U_ZERO_ERROR; 1365 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1366 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1367 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1368 } 1369} 1370 1371 1372/* Test safe clone callback */ 1373 1374static uint32_t TSCC_nextSerial() 1375{ 1376 static uint32_t n = 1; 1377 1378 return (n++); 1379} 1380 1381typedef struct 1382{ 1383 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1384 uint32_t serial; /* minted from nextSerial, above */ 1385 UBool wasClosed; /* close happened on the object */ 1386} TSCCContext; 1387 1388static TSCCContext *TSCC_clone(TSCCContext *ctx) 1389{ 1390 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1391 1392 newCtx->serial = TSCC_nextSerial(); 1393 newCtx->wasClosed = 0; 1394 newCtx->magic = 0xC0FFEE; 1395 1396 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1397 1398 return newCtx; 1399} 1400 1401static void TSCC_fromU(const void *context, 1402 UConverterFromUnicodeArgs *fromUArgs, 1403 const UChar* codeUnits, 1404 int32_t length, 1405 UChar32 codePoint, 1406 UConverterCallbackReason reason, 1407 UErrorCode * err) 1408{ 1409 TSCCContext *ctx = (TSCCContext*)context; 1410 UConverterFromUCallback junkFrom; 1411 1412 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1413 1414 if(ctx->magic != 0xC0FFEE) { 1415 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1416 return; 1417 } 1418 1419 if(reason == UCNV_CLONE) { 1420 UErrorCode subErr = U_ZERO_ERROR; 1421 TSCCContext *newCtx; 1422 TSCCContext *junkCtx; 1423 TSCCContext **pjunkCtx = &junkCtx; 1424 1425 /* "recreate" it */ 1426 log_verbose("TSCC_fromU: cloning..\n"); 1427 newCtx = TSCC_clone(ctx); 1428 1429 if(newCtx == NULL) { 1430 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1431 } 1432 1433 /* now, SET it */ 1434 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1435 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1436 1437 if(U_FAILURE(subErr)) { 1438 *err = subErr; 1439 } 1440 } 1441 1442 if(reason == UCNV_CLOSE) { 1443 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1444 ctx->wasClosed = TRUE; 1445 } 1446} 1447 1448 1449static void TSCC_toU(const void *context, 1450 UConverterToUnicodeArgs *toUArgs, 1451 const char* codeUnits, 1452 int32_t length, 1453 UConverterCallbackReason reason, 1454 UErrorCode * err) 1455{ 1456 TSCCContext *ctx = (TSCCContext*)context; 1457 UConverterToUCallback junkFrom; 1458 1459 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1460 1461 if(ctx->magic != 0xC0FFEE) { 1462 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1463 return; 1464 } 1465 1466 if(reason == UCNV_CLONE) { 1467 UErrorCode subErr = U_ZERO_ERROR; 1468 TSCCContext *newCtx; 1469 TSCCContext *junkCtx; 1470 TSCCContext **pjunkCtx = &junkCtx; 1471 1472 /* "recreate" it */ 1473 log_verbose("TSCC_toU: cloning..\n"); 1474 newCtx = TSCC_clone(ctx); 1475 1476 if(newCtx == NULL) { 1477 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1478 } 1479 1480 /* now, SET it */ 1481 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1482 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1483 1484 if(U_FAILURE(subErr)) { 1485 *err = subErr; 1486 } 1487 } 1488 1489 if(reason == UCNV_CLOSE) { 1490 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1491 ctx->wasClosed = TRUE; 1492 } 1493} 1494 1495static void TSCC_init(TSCCContext *q) 1496{ 1497 q->magic = 0xC0FFEE; 1498 q->serial = TSCC_nextSerial(); 1499 q->wasClosed = 0; 1500} 1501 1502static void TSCC_print_log(TSCCContext *q, const char *name) 1503{ 1504 if(q==NULL) { 1505 log_verbose("TSCContext: %s is NULL!!\n", name); 1506 } else { 1507 if(q->magic != 0xC0FFEE) { 1508 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1509 q,q->serial, q->magic); 1510 } 1511 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1512 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1513 } 1514} 1515 1516#if !UCONFIG_NO_LEGACY_CONVERSION 1517static void TestConvertSafeCloneCallback() 1518{ 1519 UErrorCode err = U_ZERO_ERROR; 1520 TSCCContext from1, to1; 1521 TSCCContext *from2, *from3, *to2, *to3; 1522 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1523 char hunk[8192]; 1524 int32_t hunkSize = 8192; 1525 UConverterFromUCallback junkFrom; 1526 UConverterToUCallback junkTo; 1527 UConverter *conv1, *conv2 = NULL; 1528 1529 conv1 = ucnv_open("iso-8859-3", &err); 1530 1531 if(U_FAILURE(err)) { 1532 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1533 return; 1534 } 1535 1536 log_verbose("Opened conv1=%p\n", conv1); 1537 1538 TSCC_init(&from1); 1539 TSCC_init(&to1); 1540 1541 TSCC_print_log(&from1, "from1"); 1542 TSCC_print_log(&to1, "to1"); 1543 1544 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1545 log_verbose("Set from1 on conv1\n"); 1546 TSCC_print_log(&from1, "from1"); 1547 1548 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1549 log_verbose("Set to1 on conv1\n"); 1550 TSCC_print_log(&to1, "to1"); 1551 1552 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1553 if(U_FAILURE(err)) { 1554 log_err("safeClone failed: %s\n", u_errorName(err)); 1555 return; 1556 } 1557 log_verbose("Cloned to conv2=%p.\n", conv2); 1558 1559/********** from *********************/ 1560 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1561 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1562 1563 TSCC_print_log(from2, "from2"); 1564 TSCC_print_log(from3, "from3(==from1)"); 1565 1566 if(from2 == NULL) { 1567 log_err("FAIL! from2 is null \n"); 1568 return; 1569 } 1570 1571 if(from3 == NULL) { 1572 log_err("FAIL! from3 is null \n"); 1573 return; 1574 } 1575 1576 if(from3 != (&from1) ) { 1577 log_err("FAIL! conv1's FROM context changed!\n"); 1578 } 1579 1580 if(from2 == (&from1) ) { 1581 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1582 } 1583 1584 if(from1.wasClosed) { 1585 log_err("FAIL! from1 is closed \n"); 1586 } 1587 1588 if(from2->wasClosed) { 1589 log_err("FAIL! from2 was closed\n"); 1590 } 1591 1592/********** to *********************/ 1593 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1594 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1595 1596 TSCC_print_log(to2, "to2"); 1597 TSCC_print_log(to3, "to3(==to1)"); 1598 1599 if(to2 == NULL) { 1600 log_err("FAIL! to2 is null \n"); 1601 return; 1602 } 1603 1604 if(to3 == NULL) { 1605 log_err("FAIL! to3 is null \n"); 1606 return; 1607 } 1608 1609 if(to3 != (&to1) ) { 1610 log_err("FAIL! conv1's TO context changed!\n"); 1611 } 1612 1613 if(to2 == (&to1) ) { 1614 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1615 } 1616 1617 if(to1.wasClosed) { 1618 log_err("FAIL! to1 is closed \n"); 1619 } 1620 1621 if(to2->wasClosed) { 1622 log_err("FAIL! to2 was closed\n"); 1623 } 1624 1625/*************************************/ 1626 1627 ucnv_close(conv1); 1628 log_verbose("ucnv_closed (conv1)\n"); 1629 TSCC_print_log(&from1, "from1"); 1630 TSCC_print_log(from2, "from2"); 1631 TSCC_print_log(&to1, "to1"); 1632 TSCC_print_log(to2, "to2"); 1633 1634 if(from1.wasClosed == FALSE) { 1635 log_err("FAIL! from1 is NOT closed \n"); 1636 } 1637 1638 if(from2->wasClosed) { 1639 log_err("FAIL! from2 was closed\n"); 1640 } 1641 1642 if(to1.wasClosed == FALSE) { 1643 log_err("FAIL! to1 is NOT closed \n"); 1644 } 1645 1646 if(to2->wasClosed) { 1647 log_err("FAIL! to2 was closed\n"); 1648 } 1649 1650 ucnv_close(conv2); 1651 log_verbose("ucnv_closed (conv2)\n"); 1652 1653 TSCC_print_log(&from1, "from1"); 1654 TSCC_print_log(from2, "from2"); 1655 1656 if(from1.wasClosed == FALSE) { 1657 log_err("FAIL! from1 is NOT closed \n"); 1658 } 1659 1660 if(from2->wasClosed == FALSE) { 1661 log_err("FAIL! from2 was NOT closed\n"); 1662 } 1663 1664 TSCC_print_log(&to1, "to1"); 1665 TSCC_print_log(to2, "to2"); 1666 1667 if(to1.wasClosed == FALSE) { 1668 log_err("FAIL! to1 is NOT closed \n"); 1669 } 1670 1671 if(to2->wasClosed == FALSE) { 1672 log_err("FAIL! to2 was NOT closed\n"); 1673 } 1674 1675 if(to2 != (&to1)) { 1676 free(to2); /* to1 is stack based */ 1677 } 1678 if(from2 != (&from1)) { 1679 free(from2); /* from1 is stack based */ 1680 } 1681} 1682#endif 1683 1684static UBool 1685containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1686 while(length>0) { 1687 if(*p!=b) { 1688 return TRUE; 1689 } 1690 ++p; 1691 --length; 1692 } 1693 return FALSE; 1694} 1695 1696static void TestConvertSafeClone() 1697{ 1698 /* one 'regular' & all the 'private stateful' converters */ 1699 static const char *const names[] = { 1700/* BEGIN android-changed */ 1701/* To save data space, Android does not support ISO2022 CJK */ 1702#if !UCONFIG_NO_LEGACY_CONVERSION 1703 "ibm-1047", 1704 /* "ISO_2022,locale=zh,version=1", */ 1705#endif 1706 "SCSU", 1707#if !UCONFIG_NO_LEGACY_CONVERSION 1708 "HZ", 1709 "lmbcs", 1710 "ISCII,version=0", 1711 /* "ISO_2022,locale=kr,version=1", */ 1712 /* "ISO_2022,locale=jp,version=2", */ 1713#endif 1714 "BOCU-1", 1715 "UTF-7", 1716#if !UCONFIG_NO_LEGACY_CONVERSION 1717 "IMAP-mailbox-name", 1718 "ibm-1047-s390" 1719#else 1720 "IMAP=mailbox-name" 1721#endif 1722/* END android-changed */ 1723 }; 1724 1725 /* store the actual sizes of each converter */ 1726 int32_t actualSizes[LENGTHOF(names)]; 1727 1728 static const int32_t bufferSizes[] = { 1729 U_CNV_SAFECLONE_BUFFERSIZE, 1730 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1731 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1732 }; 1733 1734 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1735 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1736 int32_t bufferSize, maxBufferSize; 1737 const char *maxName; 1738 UConverter * cnv, *cnv2; 1739 UErrorCode err; 1740 1741 char *pCharBuffer; 1742 const char *pConstCharBuffer; 1743 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1744 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1745 UChar uniCharBuffer[20]; 1746 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1747 const char *pCharSource = charSourceBuffer; 1748 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1749 UChar *pUCharTarget = uniCharBuffer; 1750 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1751 const UChar * pUniBuffer; 1752 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1753 int32_t index, j; 1754 1755 err = U_ZERO_ERROR; 1756 cnv = ucnv_open(names[0], &err); 1757 if(U_SUCCESS(err)) { 1758 /* Check the various error & informational states: */ 1759 1760 /* Null status - just returns NULL */ 1761 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1762 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) 1763 { 1764 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1765 } 1766 /* error status - should return 0 & keep error the same */ 1767 err = U_MEMORY_ALLOCATION_ERROR; 1768 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1769 { 1770 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1771 } 1772 err = U_ZERO_ERROR; 1773 1774 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ 1775 if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1776 { 1777 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1778 } 1779 err = U_ZERO_ERROR; 1780 1781 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1782 bufferSize = 0; 1783 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1784 { 1785 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1786 } 1787 /* Verify our define is large enough */ 1788 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1789 { 1790 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1791 } 1792 /* Verify we can use this run-time calculated size */ 1793 if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1794 { 1795 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1796 } 1797 if (cnv2) { 1798 ucnv_close(cnv2); 1799 } 1800 1801 /* size one byte too small - should allocate & let us know */ 1802 --bufferSize; 1803 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1804 { 1805 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1806 } 1807 if (cnv2) { 1808 ucnv_close(cnv2); 1809 } 1810 1811 err = U_ZERO_ERROR; 1812 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1813 1814 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1815 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1816 { 1817 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1818 } 1819 if (cnv2) { 1820 ucnv_close(cnv2); 1821 } 1822 1823 err = U_ZERO_ERROR; 1824 1825 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1826 if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1827 { 1828 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1829 } 1830 1831 ucnv_close(cnv); 1832 } 1833 1834 maxBufferSize = 0; 1835 maxName = ""; 1836 1837 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1838 1839 for(j = 0; j < LENGTHOF(bufferSizes); ++j) { 1840 for (index = 0; index < LENGTHOF(names); index++) 1841 { 1842 err = U_ZERO_ERROR; 1843 cnv = ucnv_open(names[index], &err); 1844 if(U_FAILURE(err)) { 1845 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err)); 1846 continue; 1847 } 1848 1849 if(j == 0) { 1850 /* preflight to get maxBufferSize */ 1851 actualSizes[index] = 0; 1852 ucnv_safeClone(cnv, NULL, &actualSizes[index], &err); 1853 if(actualSizes[index] > maxBufferSize) { 1854 maxBufferSize = actualSizes[index]; 1855 maxName = names[index]; 1856 } 1857 } 1858 1859 memset(buffer, 0xaa, sizeof(buffer)); 1860 1861 bufferSize = bufferSizes[j]; 1862 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1863 1864 /* close the original immediately to make sure that the clone works by itself */ 1865 ucnv_close(cnv); 1866 1867 if( actualSizes[index] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1868 err == U_SAFECLONE_ALLOCATED_WARNING 1869 ) { 1870 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[index]); 1871 } 1872 1873 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1874 if(bufferSize <= bufferSizes[j]) { 1875 /* used the stack buffer */ 1876 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1877 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1878 ) { 1879 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1880 names[index], bufferSize, bufferSizes[j]); 1881 } 1882 } else { 1883 /* heap-allocated the clone */ 1884 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1885 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1886 names[index], bufferSize, bufferSizes[j]); 1887 } 1888 } 1889 1890 pCharBuffer = charBuffer; 1891 pUniBuffer = uniBuffer; 1892 1893 ucnv_fromUnicode(cnv2, 1894 &pCharBuffer, 1895 charBufferLimit, 1896 &pUniBuffer, 1897 uniBufferLimit, 1898 NULL, 1899 TRUE, 1900 &err); 1901 if(U_FAILURE(err)){ 1902 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1903 } 1904 ucnv_toUnicode(cnv2, 1905 &pUCharTarget, 1906 pUCharTargetLimit, 1907 &pCharSource, 1908 pCharSourceLimit, 1909 NULL, 1910 TRUE, 1911 &err 1912 ); 1913 1914 if(U_FAILURE(err)){ 1915 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1916 } 1917 1918 pConstCharBuffer = charBuffer; 1919 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1920 { 1921 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1922 } 1923 ucnv_close(cnv2); 1924 } 1925 } 1926 1927 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1928 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1929 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1930 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1931 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1932 } 1933} 1934 1935static void TestCCSID() { 1936#if !UCONFIG_NO_LEGACY_CONVERSION 1937 UConverter *cnv; 1938 UErrorCode errorCode; 1939 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1940 int32_t i, ccsid; 1941 1942 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1943 ccsid=ccsids[i]; 1944 1945 errorCode=U_ZERO_ERROR; 1946 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1947 if(U_FAILURE(errorCode)) { 1948 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1949 continue; 1950 } 1951 1952 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1953 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1954 } 1955 1956 /* skip gb18030(ccsid 1392) */ 1957 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1958 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1959 } 1960 1961 ucnv_close(cnv); 1962 } 1963#endif 1964} 1965 1966/* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1967 1968/* CHUNK_SIZE defined in common\ucnv.c: */ 1969#define CHUNK_SIZE 1024 1970 1971static void bug1(void); 1972static void bug2(void); 1973static void bug3(void); 1974 1975static void 1976TestJ932(void) 1977{ 1978 bug1(); /* Unicode intermediate buffer straddle bug */ 1979 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1980 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1981} 1982 1983/* 1984 * jitterbug 932: test chunking boundary conditions in 1985 1986 int32_t ucnv_convert(const char *toConverterName, 1987 const char *fromConverterName, 1988 char *target, 1989 int32_t targetSize, 1990 const char *source, 1991 int32_t sourceSize, 1992 UErrorCode * err) 1993 1994 * See discussions on the icu mailing list in 1995 * 2001-April with the subject "converter 'flush' question". 1996 * 1997 * Bug report and test code provided by Edward J. Batutis. 1998 */ 1999static void bug1() 2000{ 2001#if !UCONFIG_NO_LEGACY_CONVERSION 2002 char char_in[CHUNK_SIZE+32]; 2003 char char_out[CHUNK_SIZE*2]; 2004 2005 /* GB 18030 equivalent of U+10000 is 90308130 */ 2006 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 2007 2008 UErrorCode err = U_ZERO_ERROR; 2009 int32_t i, test_seq_len = sizeof(test_seq); 2010 2011 /* 2012 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 2013 * until the straddle bug appears. I didn't want to hard-code everything so this test could 2014 * be expanded - however this is the only type of straddle bug I can think of at the moment - 2015 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 2016 * other Unicode sequences cause a bug since combining sequences are not supported by the 2017 * converters. 2018 */ 2019 2020 for (i = test_seq_len; i >= 0; i--) { 2021 /* put character sequence into input buffer */ 2022 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 2023 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 2024 2025 /* do the conversion */ 2026 ucnv_convert("us-ascii", /* out */ 2027 "gb18030", /* in */ 2028 char_out, 2029 sizeof(char_out), 2030 char_in, 2031 sizeof(char_in), 2032 &err); 2033 2034 /* bug1: */ 2035 if (err == U_TRUNCATED_CHAR_FOUND) { 2036 /* this happens when surrogate pair straddles the intermediate buffer in 2037 * T_UConverter_fromCodepageToCodepage */ 2038 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 2039 } 2040 } 2041#endif 2042} 2043 2044/* bug2: pre-flighting loop bug: simple overflow causes bug */ 2045static void bug2() 2046{ 2047 /* US-ASCII "1234567890" */ 2048 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2049 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2050 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2051 0x00, 0x00, 0x00, 0x31, 2052 0x00, 0x00, 0x00, 0x32, 2053 0x00, 0x00, 0x00, 0x33, 2054 0x00, 0x00, 0x00, 0x34, 2055 0x00, 0x00, 0x00, 0x35, 2056 0x00, 0x00, 0x00, 0x36, 2057 0x00, 0x00, 0x00, 0x37, 2058 0x00, 0x00, 0x00, 0x38, 2059 0x00, 0x00, (char)0xf0, 0x00}; 2060 static char target[5]; 2061 2062 UErrorCode err = U_ZERO_ERROR; 2063 int32_t size; 2064 2065 /* do the conversion */ 2066 size = ucnv_convert("iso-8859-1", /* out */ 2067 "us-ascii", /* in */ 2068 target, 2069 sizeof(target), 2070 source, 2071 sizeof(source), 2072 &err); 2073 2074 if ( size != 10 ) { 2075 /* bug2: size is 5, should be 10 */ 2076 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2077 } 2078 2079 err = U_ZERO_ERROR; 2080 /* do the conversion */ 2081 size = ucnv_convert("UTF-32BE", /* out */ 2082 "UTF-8", /* in */ 2083 target, 2084 sizeof(target), 2085 sourceUTF8, 2086 sizeof(sourceUTF8), 2087 &err); 2088 2089 if ( size != 32 ) { 2090 /* bug2: size is 5, should be 32 */ 2091 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2092 } 2093 2094 err = U_ZERO_ERROR; 2095 /* do the conversion */ 2096 size = ucnv_convert("UTF-8", /* out */ 2097 "UTF-32BE", /* in */ 2098 target, 2099 sizeof(target), 2100 sourceUTF32, 2101 sizeof(sourceUTF32), 2102 &err); 2103 2104 if ( size != 12 ) { 2105 /* bug2: size is 5, should be 12 */ 2106 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2107 } 2108} 2109 2110/* 2111 * bug3: when the characters expand going from source to target codepage 2112 * you get bug3 in addition to bug2 2113 */ 2114static void bug3() 2115{ 2116#if !UCONFIG_NO_LEGACY_CONVERSION 2117 char char_in[CHUNK_SIZE*4]; 2118 char target[5]; 2119 UErrorCode err = U_ZERO_ERROR; 2120 int32_t size; 2121 2122 /* 2123 * first get the buggy size from bug2 then 2124 * compare it to buggy size with an expansion 2125 */ 2126 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2127 2128 /* do the conversion */ 2129 size = ucnv_convert("lmbcs", /* out */ 2130 "us-ascii", /* in */ 2131 target, 2132 sizeof(target), 2133 char_in, 2134 sizeof(char_in), 2135 &err); 2136 2137 if ( size != sizeof(char_in) ) { 2138 /* 2139 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2140 * in the converter?), should be CHUNK_SIZE*4 2141 * 2142 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2143 */ 2144 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2145 } 2146 2147 /* 2148 * now do the conversion with expansion 2149 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2150 */ 2151 memset(char_in, 8, sizeof(char_in)); 2152 err = U_ZERO_ERROR; 2153 2154 /* do the conversion */ 2155 size = ucnv_convert("lmbcs", /* out */ 2156 "us-ascii", /* in */ 2157 target, 2158 sizeof(target), 2159 char_in, 2160 sizeof(char_in), 2161 &err); 2162 2163 /* expect 2X expansion */ 2164 if ( size != sizeof(char_in) * 2 ) { 2165 /* 2166 * bug3: 2167 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2168 */ 2169 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2170 } 2171#endif 2172} 2173 2174static void 2175convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2176 const char *src, int32_t srcLength, 2177 const char *expectTarget, int32_t expectTargetLength, 2178 int32_t chunkSize, 2179 const char *testName, 2180 UErrorCode expectCode) { 2181 UChar pivotBuffer[CHUNK_SIZE]; 2182 UChar *pivotSource, *pivotTarget; 2183 const UChar *pivotLimit; 2184 2185 char targetBuffer[CHUNK_SIZE]; 2186 char *target; 2187 const char *srcLimit, *finalSrcLimit, *targetLimit; 2188 2189 int32_t targetLength; 2190 2191 UBool flush; 2192 2193 UErrorCode errorCode; 2194 2195 /* setup */ 2196 if(chunkSize>CHUNK_SIZE) { 2197 chunkSize=CHUNK_SIZE; 2198 } 2199 2200 pivotSource=pivotTarget=pivotBuffer; 2201 pivotLimit=pivotBuffer+chunkSize; 2202 2203 finalSrcLimit=src+srcLength; 2204 target=targetBuffer; 2205 targetLimit=targetBuffer+chunkSize; 2206 2207 ucnv_resetToUnicode(srcCnv); 2208 ucnv_resetFromUnicode(targetCnv); 2209 2210 errorCode=U_ZERO_ERROR; 2211 flush=FALSE; 2212 2213 /* convert, streaming-style (both converters and pivot keep state) */ 2214 for(;;) { 2215 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2216 if(src+chunkSize<=finalSrcLimit) { 2217 srcLimit=src+chunkSize; 2218 } else { 2219 srcLimit=finalSrcLimit; 2220 } 2221 ucnv_convertEx(targetCnv, srcCnv, 2222 &target, targetLimit, 2223 &src, srcLimit, 2224 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2225 FALSE, flush, &errorCode); 2226 targetLength=(int32_t)(target-targetBuffer); 2227 if(target>targetLimit) { 2228 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2229 testName, chunkSize, target, targetLimit); 2230 break; /* TODO: major problem! */ 2231 } 2232 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2233 /* continue converting another chunk */ 2234 errorCode=U_ZERO_ERROR; 2235 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2236 targetLimit=target+chunkSize; 2237 } else { 2238 targetLimit=targetBuffer+sizeof(targetBuffer); 2239 } 2240 } else if(U_FAILURE(errorCode)) { 2241 /* failure */ 2242 break; 2243 } else if(flush) { 2244 /* all done */ 2245 break; 2246 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2247 /* all consumed, now flush without input (separate from conversion for testing) */ 2248 flush=TRUE; 2249 } 2250 } 2251 2252 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2253 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2254 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2255 } else if(targetLength!=expectTargetLength) { 2256 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2257 testName, chunkSize, targetLength, expectTargetLength); 2258 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2259 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2260 testName, chunkSize); 2261 } 2262} 2263 2264static void 2265convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2266 const char *src, int32_t srcLength, 2267 const char *expectTarget, int32_t expectTargetLength, 2268 const char *testName, 2269 UErrorCode expectCode) { 2270 convertExStreaming(srcCnv, targetCnv, 2271 src, srcLength, 2272 expectTarget, expectTargetLength, 2273 1, testName, expectCode); 2274 convertExStreaming(srcCnv, targetCnv, 2275 src, srcLength, 2276 expectTarget, expectTargetLength, 2277 3, testName, expectCode); 2278 convertExStreaming(srcCnv, targetCnv, 2279 src, srcLength, 2280 expectTarget, expectTargetLength, 2281 7, testName, expectCode); 2282} 2283 2284static void TestConvertEx() { 2285#if !UCONFIG_NO_LEGACY_CONVERSION 2286 static const uint8_t 2287 utf8[]={ 2288 /* 4e00 30a1 ff61 0410 */ 2289 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2290 }, 2291 shiftJIS[]={ 2292 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2293 }, 2294 errorTarget[]={ 2295 /* 2296 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2297 * SUB, SUB, 0x40, SUB, SUB, 0x40 2298 */ 2299 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2300 }; 2301 2302 char srcBuffer[100], targetBuffer[100]; 2303 2304 const char *src; 2305 char *target; 2306 2307 UChar pivotBuffer[100]; 2308 UChar *pivotSource, *pivotTarget; 2309 2310 UConverter *cnv1, *cnv2; 2311 UErrorCode errorCode; 2312 2313 errorCode=U_ZERO_ERROR; 2314 cnv1=ucnv_open("UTF-8", &errorCode); 2315 if(U_FAILURE(errorCode)) { 2316 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2317 return; 2318 } 2319 2320 cnv2=ucnv_open("Shift-JIS", &errorCode); 2321 if(U_FAILURE(errorCode)) { 2322 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2323 ucnv_close(cnv1); 2324 return; 2325 } 2326 2327 /* test ucnv_convertEx() with streaming conversion style */ 2328 convertExMultiStreaming(cnv1, cnv2, 2329 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2330 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2331 2332 convertExMultiStreaming(cnv2, cnv1, 2333 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2334 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2335 2336 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2337 convertExMultiStreaming(cnv1, cnv2, 2338 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2339 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2340 2341 /* test some simple conversions */ 2342 2343 /* NUL-terminated source and target */ 2344 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2345 memcpy(srcBuffer, utf8, sizeof(utf8)); 2346 srcBuffer[sizeof(utf8)]=0; 2347 src=srcBuffer; 2348 target=targetBuffer; 2349 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2350 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2351 if( errorCode!=U_ZERO_ERROR || 2352 target-targetBuffer!=sizeof(shiftJIS) || 2353 *target!=0 || 2354 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2355 ) { 2356 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2357 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2358 } 2359 2360 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2361 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2362 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2363 src=srcBuffer; 2364 target=targetBuffer; 2365 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2366 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2367 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2368 target-targetBuffer!=sizeof(shiftJIS) || 2369 *target!=(char)0xff || 2370 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2371 ) { 2372 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2373 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2374 } 2375 2376 /* bad arguments */ 2377 errorCode=U_MESSAGE_PARSE_ERROR; 2378 src=srcBuffer; 2379 target=targetBuffer; 2380 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2381 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2382 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2383 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2384 } 2385 2386 /* pivotLimit==pivotStart */ 2387 errorCode=U_ZERO_ERROR; 2388 pivotSource=pivotTarget=pivotBuffer; 2389 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2390 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2391 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2392 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2393 } 2394 2395 /* *pivotSource==NULL */ 2396 errorCode=U_ZERO_ERROR; 2397 pivotSource=NULL; 2398 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2399 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2400 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2401 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2402 } 2403 2404 /* *source==NULL */ 2405 errorCode=U_ZERO_ERROR; 2406 src=NULL; 2407 pivotSource=pivotBuffer; 2408 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2409 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2410 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2411 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2412 } 2413 2414 /* streaming conversion without a pivot buffer */ 2415 errorCode=U_ZERO_ERROR; 2416 src=srcBuffer; 2417 pivotSource=pivotBuffer; 2418 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2419 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2420 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2421 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2422 } 2423 2424 ucnv_close(cnv1); 2425 ucnv_close(cnv2); 2426#endif 2427} 2428 2429/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2430static const char *const badUTF8[]={ 2431 /* trail byte */ 2432 "\x80", 2433 2434 /* truncated multi-byte sequences */ 2435 "\xd0", 2436 "\xe0", 2437 "\xe1", 2438 "\xed", 2439 "\xee", 2440 "\xf0", 2441 "\xf1", 2442 "\xf4", 2443 "\xf8", 2444 "\xfc", 2445 2446 "\xe0\x80", 2447 "\xe0\xa0", 2448 "\xe1\x80", 2449 "\xed\x80", 2450 "\xed\xa0", 2451 "\xee\x80", 2452 "\xf0\x80", 2453 "\xf0\x90", 2454 "\xf1\x80", 2455 "\xf4\x80", 2456 "\xf4\x90", 2457 "\xf8\x80", 2458 "\xfc\x80", 2459 2460 "\xf0\x80\x80", 2461 "\xf0\x90\x80", 2462 "\xf1\x80\x80", 2463 "\xf4\x80\x80", 2464 "\xf4\x90\x80", 2465 "\xf8\x80\x80", 2466 "\xfc\x80\x80", 2467 2468 "\xf8\x80\x80\x80", 2469 "\xfc\x80\x80\x80", 2470 2471 "\xfc\x80\x80\x80\x80", 2472 2473 /* complete sequences but non-shortest forms or out of range etc. */ 2474 "\xc0\x80", 2475 "\xe0\x80\x80", 2476 "\xed\xa0\x80", 2477 "\xf0\x80\x80\x80", 2478 "\xf4\x90\x80\x80", 2479 "\xf8\x80\x80\x80\x80", 2480 "\xfc\x80\x80\x80\x80\x80", 2481 "\xfe", 2482 "\xff" 2483}; 2484 2485#define ARG_CHAR_ARR_SIZE 8 2486 2487/* get some character that can be converted and convert it */ 2488static UBool getTestChar(UConverter *cnv, const char *converterName, 2489 char charUTF8[4], int32_t *pCharUTF8Length, 2490 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, 2491 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { 2492 UChar utf16[U16_MAX_LENGTH]; 2493 int32_t utf16Length; 2494 2495 const UChar *utf16Source; 2496 char *target; 2497 2498 USet *set; 2499 UChar32 c; 2500 UErrorCode errorCode; 2501 2502 errorCode=U_ZERO_ERROR; 2503 set=uset_open(1, 0); 2504 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2505 c=uset_charAt(set, uset_size(set)/2); 2506 uset_close(set); 2507 2508 utf16Length=0; 2509 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2510 *pCharUTF8Length=0; 2511 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2512 2513 utf16Source=utf16; 2514 target=char0; 2515 ucnv_fromUnicode(cnv, 2516 &target, char0+ARG_CHAR_ARR_SIZE, 2517 &utf16Source, utf16+utf16Length, 2518 NULL, FALSE, &errorCode); 2519 *pChar0Length=(int32_t)(target-char0); 2520 2521 utf16Source=utf16; 2522 target=char1; 2523 ucnv_fromUnicode(cnv, 2524 &target, char1+ARG_CHAR_ARR_SIZE, 2525 &utf16Source, utf16+utf16Length, 2526 NULL, FALSE, &errorCode); 2527 *pChar1Length=(int32_t)(target-char1); 2528 2529 if(U_FAILURE(errorCode)) { 2530 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2531 return FALSE; 2532 } 2533 return TRUE; 2534} 2535 2536static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2537 char charUTF8[4], int32_t charUTF8Length, 2538 char char0[8], int32_t char0Length, 2539 char char1[8], int32_t char1Length) { 2540 char utf8[16]; 2541 int32_t utf8Length; 2542 2543 char output[16]; 2544 int32_t outputLength; 2545 2546 char invalidChars[8]; 2547 int8_t invalidLength; 2548 2549 const char *source; 2550 char *target; 2551 2552 UChar pivotBuffer[8]; 2553 UChar *pivotSource, *pivotTarget; 2554 2555 UErrorCode errorCode; 2556 int32_t i; 2557 2558 /* test truncated sequences */ 2559 errorCode=U_ZERO_ERROR; 2560 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2561 2562 memcpy(utf8, charUTF8, charUTF8Length); 2563 2564 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2565 /* truncated sequence? */ 2566 int32_t length=strlen(badUTF8[i]); 2567 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2568 continue; 2569 } 2570 2571 /* assemble a string with the test character and the truncated sequence */ 2572 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2573 utf8Length=charUTF8Length+length; 2574 2575 /* convert and check the invalidChars */ 2576 source=utf8; 2577 target=output; 2578 pivotSource=pivotTarget=pivotBuffer; 2579 errorCode=U_ZERO_ERROR; 2580 ucnv_convertEx(cnv, utf8Cnv, 2581 &target, output+sizeof(output), 2582 &source, utf8+utf8Length, 2583 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2584 TRUE, TRUE, /* reset & flush */ 2585 &errorCode); 2586 outputLength=(int32_t)(target-output); 2587 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2588 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2589 continue; 2590 } 2591 2592 errorCode=U_ZERO_ERROR; 2593 invalidLength=(int8_t)sizeof(invalidChars); 2594 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2595 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2596 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2597 } 2598 } 2599} 2600 2601static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2602 char charUTF8[4], int32_t charUTF8Length, 2603 char char0[8], int32_t char0Length, 2604 char char1[8], int32_t char1Length) { 2605 char utf8[600], expect[600]; 2606 int32_t utf8Length, expectLength; 2607 2608 char testName[32]; 2609 2610 UErrorCode errorCode; 2611 int32_t i; 2612 2613 errorCode=U_ZERO_ERROR; 2614 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2615 2616 /* 2617 * assemble an input string with the test character between each 2618 * bad sequence, 2619 * and an expected string with repeated test character output 2620 */ 2621 memcpy(utf8, charUTF8, charUTF8Length); 2622 utf8Length=charUTF8Length; 2623 2624 memcpy(expect, char0, char0Length); 2625 expectLength=char0Length; 2626 2627 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2628 int32_t length=strlen(badUTF8[i]); 2629 memcpy(utf8+utf8Length, badUTF8[i], length); 2630 utf8Length+=length; 2631 2632 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2633 utf8Length+=charUTF8Length; 2634 2635 memcpy(expect+expectLength, char1, char1Length); 2636 expectLength+=char1Length; 2637 } 2638 2639 /* expect that each bad UTF-8 sequence is detected and skipped */ 2640 strcpy(testName, "from bad UTF-8 to "); 2641 strcat(testName, converterName); 2642 2643 convertExMultiStreaming(utf8Cnv, cnv, 2644 utf8, utf8Length, 2645 expect, expectLength, 2646 testName, 2647 U_ZERO_ERROR); 2648} 2649 2650/* Test illegal UTF-8 input. */ 2651static void TestConvertExFromUTF8() { 2652 static const char *const converterNames[]={ 2653#if !UCONFIG_NO_LEGACY_CONVERSION 2654 "windows-1252", 2655 "shift-jis", 2656#endif 2657 "us-ascii", 2658 "iso-8859-1", 2659 "utf-8" 2660 }; 2661 2662 UConverter *utf8Cnv, *cnv; 2663 UErrorCode errorCode; 2664 int32_t i; 2665 2666 /* fromUnicode versions of some character, from initial state and later */ 2667 char charUTF8[4], char0[8], char1[8]; 2668 int32_t charUTF8Length, char0Length, char1Length; 2669 2670 errorCode=U_ZERO_ERROR; 2671 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2672 if(U_FAILURE(errorCode)) { 2673 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2674 return; 2675 } 2676 2677 for(i=0; i<LENGTHOF(converterNames); ++i) { 2678 errorCode=U_ZERO_ERROR; 2679 cnv=ucnv_open(converterNames[i], &errorCode); 2680 if(U_FAILURE(errorCode)) { 2681 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2682 continue; 2683 } 2684 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2685 continue; 2686 } 2687 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2688 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2689 ucnv_close(cnv); 2690 } 2691 ucnv_close(utf8Cnv); 2692} 2693 2694static void TestConvertExFromUTF8_C5F0() { 2695 static const char *const converterNames[]={ 2696#if !UCONFIG_NO_LEGACY_CONVERSION 2697 "windows-1251", 2698 "shift-jis", 2699#endif 2700 "us-ascii", 2701 "iso-8859-1", 2702 "utf-8" 2703 }; 2704 2705 UConverter *utf8Cnv, *cnv; 2706 UErrorCode errorCode; 2707 int32_t i; 2708 2709 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; 2710 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2711 static const char twoNCRs[16]={ 2712 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2713 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2714 }; 2715 static const char twoFFFD[6]={ 2716 (char)0xef, (char)0xbf, (char)0xbd, 2717 (char)0xef, (char)0xbf, (char)0xbd 2718 }; 2719 const char *expected; 2720 int32_t expectedLength; 2721 char dest[20]; /* longer than longest expectedLength */ 2722 2723 const char *src; 2724 char *target; 2725 2726 UChar pivotBuffer[128]; 2727 UChar *pivotSource, *pivotTarget; 2728 2729 errorCode=U_ZERO_ERROR; 2730 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2731 if(U_FAILURE(errorCode)) { 2732 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2733 return; 2734 } 2735 2736 for(i=0; i<LENGTHOF(converterNames); ++i) { 2737 errorCode=U_ZERO_ERROR; 2738 cnv=ucnv_open(converterNames[i], &errorCode); 2739 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2740 NULL, NULL, &errorCode); 2741 if(U_FAILURE(errorCode)) { 2742 log_data_err("unable to open %s converter - %s\n", 2743 converterNames[i], u_errorName(errorCode)); 2744 continue; 2745 } 2746 src=bad_utf8; 2747 target=dest; 2748 uprv_memset(dest, 9, sizeof(dest)); 2749 if(i==LENGTHOF(converterNames)-1) { 2750 /* conversion to UTF-8 yields two U+FFFD directly */ 2751 expected=twoFFFD; 2752 expectedLength=6; 2753 } else { 2754 /* conversion to a non-Unicode charset yields two NCRs */ 2755 expected=twoNCRs; 2756 expectedLength=16; 2757 } 2758 pivotBuffer[0]=0; 2759 pivotBuffer[1]=1; 2760 pivotBuffer[2]=2; 2761 pivotSource=pivotTarget=pivotBuffer; 2762 ucnv_convertEx( 2763 cnv, utf8Cnv, 2764 &target, dest+expectedLength, 2765 &src, bad_utf8+sizeof(bad_utf8), 2766 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2767 TRUE, TRUE, &errorCode); 2768 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2769 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2770 dest[expectedLength]!=9 2771 ) { 2772 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2773 } 2774 ucnv_close(cnv); 2775 } 2776 ucnv_close(utf8Cnv); 2777} 2778 2779static void 2780TestConvertAlgorithmic() { 2781#if !UCONFIG_NO_LEGACY_CONVERSION 2782 static const uint8_t 2783 utf8[]={ 2784 /* 4e00 30a1 ff61 0410 */ 2785 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2786 }, 2787 shiftJIS[]={ 2788 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2789 }, 2790 /*errorTarget[]={*/ 2791 /* 2792 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2793 * SUB, SUB, 0x40, SUB, SUB, 0x40 2794 */ 2795 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2796 /*},*/ 2797 utf16[]={ 2798 0xfe, 0xff /* BOM only, no text */ 2799 }, 2800 utf32[]={ 2801 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2802 }; 2803 2804 char target[100], utf8NUL[100], shiftJISNUL[100]; 2805 2806 UConverter *cnv; 2807 UErrorCode errorCode; 2808 2809 int32_t length; 2810 2811 errorCode=U_ZERO_ERROR; 2812 cnv=ucnv_open("Shift-JIS", &errorCode); 2813 if(U_FAILURE(errorCode)) { 2814 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2815 ucnv_close(cnv); 2816 return; 2817 } 2818 2819 memcpy(utf8NUL, utf8, sizeof(utf8)); 2820 utf8NUL[sizeof(utf8)]=0; 2821 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2822 shiftJISNUL[sizeof(shiftJIS)]=0; 2823 2824 /* 2825 * The to/from algorithmic convenience functions share a common implementation, 2826 * so we need not test all permutations of them. 2827 */ 2828 2829 /* length in, not terminated out */ 2830 errorCode=U_ZERO_ERROR; 2831 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2832 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2833 length!=sizeof(shiftJIS) || 2834 memcmp(target, shiftJIS, length)!=0 2835 ) { 2836 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2837 u_errorName(errorCode), length, sizeof(shiftJIS)); 2838 } 2839 2840 /* terminated in and out */ 2841 memset(target, 0x55, sizeof(target)); 2842 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2843 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2844 if( errorCode!=U_ZERO_ERROR || 2845 length!=sizeof(utf8) || 2846 memcmp(target, utf8, length)!=0 2847 ) { 2848 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2849 u_errorName(errorCode), length, sizeof(shiftJIS)); 2850 } 2851 2852 /* empty string, some target buffer */ 2853 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2854 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2855 if( errorCode!=U_ZERO_ERROR || 2856 length!=0 2857 ) { 2858 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2859 u_errorName(errorCode), length); 2860 } 2861 2862 /* pseudo-empty string, no target buffer */ 2863 errorCode=U_ZERO_ERROR; 2864 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2865 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2866 length!=0 2867 ) { 2868 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2869 u_errorName(errorCode), length); 2870 } 2871 2872 errorCode=U_ZERO_ERROR; 2873 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2874 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2875 length!=0 2876 ) { 2877 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2878 u_errorName(errorCode), length); 2879 } 2880 2881 /* bad arguments */ 2882 errorCode=U_MESSAGE_PARSE_ERROR; 2883 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2884 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2885 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2886 } 2887 2888 /* source==NULL */ 2889 errorCode=U_ZERO_ERROR; 2890 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2891 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2892 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2893 } 2894 2895 /* illegal alg. type */ 2896 errorCode=U_ZERO_ERROR; 2897 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2898 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2899 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2900 } 2901ucnv_close(cnv); 2902#endif 2903} 2904 2905static void TestLMBCSMaxChar(void) { 2906 static const struct { 2907 int8_t maxSize; 2908 const char *name; 2909 } converter[] = { 2910 /* some non-LMBCS converters - perfect test setup here */ 2911 { 1, "US-ASCII"}, 2912 { 1, "ISO-8859-1"}, 2913 2914 { 2, "UTF-16"}, 2915 { 2, "UTF-16BE"}, 2916 { 3, "UTF-8"}, 2917 { 3, "CESU-8"}, 2918 { 3, "SCSU"}, 2919 { 4, "UTF-32"}, 2920 { 4, "UTF-7"}, 2921 { 4, "IMAP-mailbox-name"}, 2922 { 4, "BOCU-1"}, 2923 2924 { 1, "windows-1256"}, 2925 { 2, "Shift-JIS"}, 2926 { 2, "ibm-16684"}, 2927 { 3, "ibm-930"}, 2928 { 3, "ibm-1390"}, 2929 { 4, "*test3"}, 2930 { 16,"*test4"}, 2931 2932 { 4, "ISCII"}, 2933 { 4, "HZ"}, 2934 2935 { 3, "ISO-2022"}, 2936 { 3, "ISO-2022-KR"}, 2937 { 6, "ISO-2022-JP"}, 2938 { 8, "ISO-2022-CN"}, 2939 2940 /* LMBCS */ 2941 { 3, "LMBCS-1"}, 2942 { 3, "LMBCS-2"}, 2943 { 3, "LMBCS-3"}, 2944 { 3, "LMBCS-4"}, 2945 { 3, "LMBCS-5"}, 2946 { 3, "LMBCS-6"}, 2947 { 3, "LMBCS-8"}, 2948 { 3, "LMBCS-11"}, 2949 { 3, "LMBCS-16"}, 2950 { 3, "LMBCS-17"}, 2951 { 3, "LMBCS-18"}, 2952 { 3, "LMBCS-19"} 2953 }; 2954 int32_t idx; 2955 2956 for (idx = 0; idx < LENGTHOF(converter); idx++) { 2957 UErrorCode status = U_ZERO_ERROR; 2958 UConverter *cnv = cnv_open(converter[idx].name, &status); 2959 if (U_FAILURE(status)) { 2960 continue; 2961 } 2962 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2963 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2964 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2965 } 2966 ucnv_close(cnv); 2967 } 2968 2969 /* mostly test that the macro compiles */ 2970 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2971 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2972 } 2973} 2974 2975 2976static void TestJ1968(void) { 2977 UErrorCode err = U_ZERO_ERROR; 2978 UConverter *cnv; 2979 char myConvName[] = "My really really really really really really really really really really really" 2980 " really really really really really really really really really really really" 2981 " really really really really really really really really long converter name"; 2982 UChar myConvNameU[sizeof(myConvName)]; 2983 2984 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2985 2986 err = U_ZERO_ERROR; 2987 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2988 cnv = ucnv_openU(myConvNameU, &err); 2989 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2990 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2991 } 2992 2993 err = U_ZERO_ERROR; 2994 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2995 cnv = ucnv_openU(myConvNameU, &err); 2996 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2997 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2998 } 2999 3000 err = U_ZERO_ERROR; 3001 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3002 cnv = ucnv_openU(myConvNameU, &err); 3003 if (cnv || err != U_FILE_ACCESS_ERROR) { 3004 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3005 } 3006 3007 3008 3009 3010 err = U_ZERO_ERROR; 3011 cnv = ucnv_open(myConvName, &err); 3012 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3013 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3014 } 3015 3016 err = U_ZERO_ERROR; 3017 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 3018 cnv = ucnv_open(myConvName, &err); 3019 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3020 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3021 } 3022 3023 err = U_ZERO_ERROR; 3024 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 3025 cnv = ucnv_open(myConvName, &err); 3026 if (cnv || err != U_FILE_ACCESS_ERROR) { 3027 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3028 } 3029 3030 err = U_ZERO_ERROR; 3031 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 3032 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 3033 cnv = ucnv_open(myConvName, &err); 3034 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3035 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3036 } 3037 3038 /* The comma isn't really a part of the converter name. */ 3039 err = U_ZERO_ERROR; 3040 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 3041 cnv = ucnv_open(myConvName, &err); 3042 if (cnv || err != U_FILE_ACCESS_ERROR) { 3043 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3044 } 3045 3046 err = U_ZERO_ERROR; 3047 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3048 cnv = ucnv_open(myConvName, &err); 3049 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3050 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3051 } 3052 3053 err = U_ZERO_ERROR; 3054 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3055 cnv = ucnv_open(myConvName, &err); 3056 if (cnv || err != U_FILE_ACCESS_ERROR) { 3057 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3058 } 3059 3060} 3061 3062#if !UCONFIG_NO_LEGACY_CONVERSION 3063static void 3064testSwap(const char *name, UBool swap) { 3065 /* 3066 * Test Unicode text. 3067 * Contains characters that are the highest for some of the 3068 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3069 * tables copies the entire tables. 3070 */ 3071 static const UChar text[]={ 3072 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3073 }; 3074 3075 UChar uNormal[32], uSwapped[32]; 3076 char normal[32], swapped[32]; 3077 const UChar *pcu; 3078 UChar *pu; 3079 char *pc; 3080 int32_t i, normalLength, swappedLength; 3081 UChar u; 3082 char c; 3083 3084 const char *swappedName; 3085 UConverter *cnv, *swapCnv; 3086 UErrorCode errorCode; 3087 3088 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3089 3090 /* open both the normal and the LF/NL-swapping converters */ 3091 strcpy(swapped, name); 3092 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3093 3094 errorCode=U_ZERO_ERROR; 3095 swapCnv=ucnv_open(swapped, &errorCode); 3096 cnv=ucnv_open(name, &errorCode); 3097 if(U_FAILURE(errorCode)) { 3098 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3099 goto cleanup; 3100 } 3101 3102 /* the name must contain the swap option if and only if we expect the converter to swap */ 3103 swappedName=ucnv_getName(swapCnv, &errorCode); 3104 if(U_FAILURE(errorCode)) { 3105 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3106 goto cleanup; 3107 } 3108 3109 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3110 if(swap != (pc!=NULL)) { 3111 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3112 goto cleanup; 3113 } 3114 3115 /* convert to EBCDIC */ 3116 pcu=text; 3117 pc=normal; 3118 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3119 normalLength=(int32_t)(pc-normal); 3120 3121 pcu=text; 3122 pc=swapped; 3123 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3124 swappedLength=(int32_t)(pc-swapped); 3125 3126 if(U_FAILURE(errorCode)) { 3127 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3128 goto cleanup; 3129 } 3130 3131 /* compare EBCDIC output */ 3132 if(normalLength!=swappedLength) { 3133 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3134 goto cleanup; 3135 } 3136 for(i=0; i<normalLength; ++i) { 3137 /* swap EBCDIC LF/NL for comparison */ 3138 c=normal[i]; 3139 if(swap) { 3140 if(c==0x15) { 3141 c=0x25; 3142 } else if(c==0x25) { 3143 c=0x15; 3144 } 3145 } 3146 3147 if(c!=swapped[i]) { 3148 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3149 goto cleanup; 3150 } 3151 } 3152 3153 /* convert back to Unicode (may not roundtrip) */ 3154 pc=normal; 3155 pu=uNormal; 3156 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3157 normalLength=(int32_t)(pu-uNormal); 3158 3159 pc=normal; 3160 pu=uSwapped; 3161 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3162 swappedLength=(int32_t)(pu-uSwapped); 3163 3164 if(U_FAILURE(errorCode)) { 3165 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3166 goto cleanup; 3167 } 3168 3169 /* compare EBCDIC output */ 3170 if(normalLength!=swappedLength) { 3171 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3172 goto cleanup; 3173 } 3174 for(i=0; i<normalLength; ++i) { 3175 /* swap EBCDIC LF/NL for comparison */ 3176 u=uNormal[i]; 3177 if(swap) { 3178 if(u==0xa) { 3179 u=0x85; 3180 } else if(u==0x85) { 3181 u=0xa; 3182 } 3183 } 3184 3185 if(u!=uSwapped[i]) { 3186 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3187 goto cleanup; 3188 } 3189 } 3190 3191 /* clean up */ 3192cleanup: 3193 ucnv_close(cnv); 3194 ucnv_close(swapCnv); 3195} 3196 3197static void 3198TestEBCDICSwapLFNL() { 3199 static const struct { 3200 const char *name; 3201 UBool swap; 3202 } tests[]={ 3203 { "ibm-37", TRUE }, 3204 { "ibm-1047", TRUE }, 3205 { "ibm-1140", TRUE }, 3206 { "ibm-930", TRUE }, 3207 { "iso-8859-3", FALSE } 3208 }; 3209 3210 int i; 3211 3212 for(i=0; i<LENGTHOF(tests); ++i) { 3213 testSwap(tests[i].name, tests[i].swap); 3214 } 3215} 3216#else 3217static void 3218TestEBCDICSwapLFNL() { 3219 /* test nothing... */ 3220} 3221#endif 3222 3223static const UVersionInfo ICU_34 = {3,4,0,0}; 3224 3225static void TestFromUCountPending(){ 3226#if !UCONFIG_NO_LEGACY_CONVERSION 3227 UErrorCode status = U_ZERO_ERROR; 3228/* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3229 static const struct { 3230 UChar input[6]; 3231 int32_t len; 3232 int32_t exp; 3233 }fromUnicodeTests[] = { 3234 /*m:n conversion*/ 3235 {{0xdbc4},1,1}, 3236 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3237 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3238 }; 3239 int i; 3240 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3241 if(U_FAILURE(status)){ 3242 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3243 return; 3244 } 3245 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) { 3246 char tgt[10]; 3247 char* target = tgt; 3248 char* targetLimit = target + 10; 3249 const UChar* source = fromUnicodeTests[i].input; 3250 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3251 int32_t len = 0; 3252 ucnv_reset(cnv); 3253 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3254 len = ucnv_fromUCountPending(cnv, &status); 3255 if(U_FAILURE(status)){ 3256 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3257 status = U_ZERO_ERROR; 3258 continue; 3259 } 3260 if(len != fromUnicodeTests[i].exp){ 3261 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3262 } 3263 } 3264 status = U_ZERO_ERROR; 3265 { 3266 /* 3267 * The converter has to read the tail before it knows that 3268 * only head alone matches. 3269 * At the end, the output for head will overflow the target, 3270 * middle will be pending, and tail will not have been consumed. 3271 */ 3272 /* 3273 \U00101234 -> x (<U101234> \x07 |0) 3274 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3275 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3276 \U00060007 -> unassigned 3277 */ 3278 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3279 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3280 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3281 char tgt[10]; 3282 char* target = tgt; 3283 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3284 const UChar* source = head; 3285 const UChar* sourceLimit = source + u_strlen(head); 3286 int32_t len = 0; 3287 ucnv_reset(cnv); 3288 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3289 len = ucnv_fromUCountPending(cnv, &status); 3290 if(U_FAILURE(status)){ 3291 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3292 status = U_ZERO_ERROR; 3293 } 3294 if(len!=4){ 3295 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3296 } 3297 source = middle; 3298 sourceLimit = source + u_strlen(middle); 3299 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3300 len = ucnv_fromUCountPending(cnv, &status); 3301 if(U_FAILURE(status)){ 3302 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3303 status = U_ZERO_ERROR; 3304 } 3305 if(len!=5){ 3306 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3307 } 3308 source = tail; 3309 sourceLimit = source + u_strlen(tail); 3310 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3311 if(status != U_BUFFER_OVERFLOW_ERROR){ 3312 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3313 } 3314 status = U_ZERO_ERROR; 3315 len = ucnv_fromUCountPending(cnv, &status); 3316 /* middle[1] is pending, tail has not been consumed */ 3317 if(U_FAILURE(status)){ 3318 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3319 } 3320 if(len!=1){ 3321 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3322 } 3323 } 3324 ucnv_close(cnv); 3325#endif 3326} 3327 3328static void 3329TestToUCountPending(){ 3330#if !UCONFIG_NO_LEGACY_CONVERSION 3331 UErrorCode status = U_ZERO_ERROR; 3332 static const struct { 3333 char input[6]; 3334 int32_t len; 3335 int32_t exp; 3336 }toUnicodeTests[] = { 3337 /*m:n conversion*/ 3338 {{0x05, 0x01, 0x02},3,3}, 3339 {{0x01, 0x02},2,2}, 3340 {{0x07, 0x00, 0x01, 0x02},4,4}, 3341 }; 3342 3343 int i; 3344 UConverterToUCallback *oldToUAction= NULL; 3345 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3346 if(U_FAILURE(status)){ 3347 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3348 return; 3349 } 3350 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3351 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) { 3352 UChar tgt[10]; 3353 UChar* target = tgt; 3354 UChar* targetLimit = target + 20; 3355 const char* source = toUnicodeTests[i].input; 3356 const char* sourceLimit = source + toUnicodeTests[i].len; 3357 int32_t len = 0; 3358 ucnv_reset(cnv); 3359 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3360 len = ucnv_toUCountPending(cnv,&status); 3361 if(U_FAILURE(status)){ 3362 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3363 status = U_ZERO_ERROR; 3364 continue; 3365 } 3366 if(len != toUnicodeTests[i].exp){ 3367 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3368 } 3369 } 3370 status = U_ZERO_ERROR; 3371 ucnv_close(cnv); 3372 3373 { 3374 /* 3375 * The converter has to read the tail before it knows that 3376 * only head alone matches. 3377 * At the end, the output for head will overflow the target, 3378 * mid will be pending, and tail will not have been consumed. 3379 */ 3380 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3381 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3382 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3383 /* 3384 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3385 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3386 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3387 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3388 */ 3389 UChar tgt[10]; 3390 UChar* target = tgt; 3391 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3392 const char* source = head; 3393 const char* sourceLimit = source + strlen(head); 3394 int32_t len = 0; 3395 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3396 if(U_FAILURE(status)){ 3397 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3398 return; 3399 } 3400 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3401 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3402 len = ucnv_toUCountPending(cnv,&status); 3403 if(U_FAILURE(status)){ 3404 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3405 } 3406 if(len != 4){ 3407 log_err("Did not get the expected len for head.\n"); 3408 } 3409 source=mid; 3410 sourceLimit = source+strlen(mid); 3411 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3412 len = ucnv_toUCountPending(cnv,&status); 3413 if(U_FAILURE(status)){ 3414 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3415 } 3416 if(len != 8){ 3417 log_err("Did not get the expected len for mid.\n"); 3418 } 3419 3420 source=tail; 3421 sourceLimit = source+strlen(tail); 3422 targetLimit = target; 3423 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3424 if(status != U_BUFFER_OVERFLOW_ERROR){ 3425 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3426 } 3427 status = U_ZERO_ERROR; 3428 len = ucnv_toUCountPending(cnv,&status); 3429 /* mid[4] is pending, tail has not been consumed */ 3430 if(U_FAILURE(status)){ 3431 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3432 } 3433 if(len != 4){ 3434 log_err("Did not get the expected len for tail.\n"); 3435 } 3436 ucnv_close(cnv); 3437 } 3438#endif 3439} 3440 3441static void TestOneDefaultNameChange(const char *name, const char *expected) { 3442 UErrorCode status = U_ZERO_ERROR; 3443 UConverter *cnv; 3444 ucnv_setDefaultName(name); 3445 if(strcmp(ucnv_getDefaultName(), expected)==0) 3446 log_verbose("setDefaultName of %s works.\n", name); 3447 else 3448 log_err("setDefaultName of %s failed\n", name); 3449 cnv=ucnv_open(NULL, &status); 3450 if (U_FAILURE(status) || cnv == NULL) { 3451 log_err("opening the default converter of %s failed\n", name); 3452 return; 3453 } 3454 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3455 log_verbose("ucnv_getName of %s works.\n", name); 3456 else 3457 log_err("ucnv_getName of %s failed\n", name); 3458 ucnv_close(cnv); 3459} 3460 3461static void TestDefaultName(void) { 3462 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3463 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3464 strcpy(defaultName, ucnv_getDefaultName()); 3465 3466 log_verbose("getDefaultName returned %s\n", defaultName); 3467 3468 /*change the default name by setting it */ 3469 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3470#if U_CHARSET_IS_UTF8 3471 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3472 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3473 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3474#else 3475# if !UCONFIG_NO_LEGACY_CONVERSION 3476 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3477 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3478# endif 3479 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3480#endif 3481 3482 /*set the default name back*/ 3483 ucnv_setDefaultName(defaultName); 3484} 3485 3486/* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3487 3488static U_INLINE int 3489sign(int n) { 3490 if(n==0) { 3491 return 0; 3492 } else if(n<0) { 3493 return -1; 3494 } else /* n>0 */ { 3495 return 1; 3496 } 3497} 3498 3499static void 3500compareNames(const char **names) { 3501 const char *relation, *name1, *name2; 3502 int rel, result; 3503 3504 relation=*names++; 3505 if(*relation=='=') { 3506 rel = 0; 3507 } else if(*relation=='<') { 3508 rel = -1; 3509 } else { 3510 rel = 1; 3511 } 3512 3513 name1=*names++; 3514 if(name1==NULL) { 3515 return; 3516 } 3517 while((name2=*names++)!=NULL) { 3518 result=ucnv_compareNames(name1, name2); 3519 if(sign(result)!=rel) { 3520 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3521 } 3522 name1=name2; 3523 } 3524} 3525 3526static void 3527TestCompareNames() { 3528 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3529 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3530 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3531 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3532 3533 compareNames(equalUTF8); 3534 compareNames(equalIBM); 3535 compareNames(lessMac); 3536 compareNames(lessUTF080); 3537} 3538 3539static void 3540TestSubstString() { 3541 static const UChar surrogate[1]={ 0xd900 }; 3542 char buffer[16]; 3543 3544 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3545 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3546 UConverter *cnv; 3547 UErrorCode errorCode; 3548 int32_t length; 3549 int8_t len8; 3550 3551 /* UTF-16/32: test that the BOM is output before the sub character */ 3552 errorCode=U_ZERO_ERROR; 3553 cnv=ucnv_open("UTF-16", &errorCode); 3554 if(U_FAILURE(errorCode)) { 3555 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3556 return; 3557 } 3558 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3559 ucnv_close(cnv); 3560 if(U_FAILURE(errorCode) || 3561 length!=4 || 3562 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3563 ) { 3564 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3565 } 3566 3567 errorCode=U_ZERO_ERROR; 3568 cnv=ucnv_open("UTF-32", &errorCode); 3569 if(U_FAILURE(errorCode)) { 3570 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3571 return; 3572 } 3573 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3574 ucnv_close(cnv); 3575 if(U_FAILURE(errorCode) || 3576 length!=8 || 3577 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3578 ) { 3579 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3580 } 3581 3582 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3583 errorCode=U_ZERO_ERROR; 3584 cnv=ucnv_open("ISO-8859-1", &errorCode); 3585 if(U_FAILURE(errorCode)) { 3586 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3587 return; 3588 } 3589 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3590 if(U_FAILURE(errorCode)) { 3591 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3592 } else { 3593 len8 = sizeof(buffer); 3594 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3595 /* Stateless converter, we expect the string converted to charset bytes. */ 3596 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3597 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3598 } 3599 } 3600 ucnv_close(cnv); 3601 3602#if !UCONFIG_NO_LEGACY_CONVERSION 3603 errorCode=U_ZERO_ERROR; 3604 cnv=ucnv_open("HZ", &errorCode); 3605 if(U_FAILURE(errorCode)) { 3606 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3607 return; 3608 } 3609 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3610 if(U_FAILURE(errorCode)) { 3611 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3612 } else { 3613 len8 = sizeof(buffer); 3614 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3615 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3616 if(U_FAILURE(errorCode) || len8!=0) { 3617 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3618 } 3619 } 3620 ucnv_close(cnv); 3621#endif 3622 /* 3623 * Further testing of ucnv_setSubstString() is done via intltest convert. 3624 * We do not test edge cases of illegal arguments and similar because the 3625 * function implementation uses all of its parameters in calls to other 3626 * functions with UErrorCode parameters. 3627 */ 3628} 3629 3630static void 3631InvalidArguments() { 3632 UConverter *cnv; 3633 UErrorCode errorCode; 3634 char charBuffer[2] = {1, 1}; 3635 char ucharAsCharBuffer[2] = {2, 2}; 3636 char *charsPtr = charBuffer; 3637 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3638 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3639 3640 errorCode=U_ZERO_ERROR; 3641 cnv=ucnv_open("UTF-8", &errorCode); 3642 if(U_FAILURE(errorCode)) { 3643 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3644 return; 3645 } 3646 3647 errorCode=U_ZERO_ERROR; 3648 /* This one should fail because an incomplete UChar is being passed in */ 3649 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3650 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3651 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3652 } 3653 3654 errorCode=U_ZERO_ERROR; 3655 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3656 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3657 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3658 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3659 } 3660 3661 errorCode=U_ZERO_ERROR; 3662 /* This one should fail because an incomplete UChar is being passed in */ 3663 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3664 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3665 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3666 } 3667 3668 errorCode=U_ZERO_ERROR; 3669 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3670 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3671 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3672 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3673 } 3674 3675 if (charBuffer[0] != 1 || charBuffer[1] != 1 3676 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3677 { 3678 log_err("Data was incorrectly written to buffers\n"); 3679 } 3680 3681 ucnv_close(cnv); 3682} 3683 3684static void TestGetName() { 3685 static const char *const names[] = { 3686 "Unicode", "UTF-16", 3687 "UnicodeBigUnmarked", "UTF-16BE", 3688 "UnicodeBig", "UTF-16BE,version=1", 3689 "UnicodeLittleUnmarked", "UTF-16LE", 3690 "UnicodeLittle", "UTF-16LE,version=1", 3691 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3692 }; 3693 int32_t i; 3694 for(i = 0; i < LENGTHOF(names); i += 2) { 3695 UErrorCode errorCode = U_ZERO_ERROR; 3696 UConverter *cnv = ucnv_open(names[i], &errorCode); 3697 if(U_SUCCESS(errorCode)) { 3698 const char *name = ucnv_getName(cnv, &errorCode); 3699 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3700 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3701 names[i], name, names[i+1], u_errorName(errorCode)); 3702 } 3703 ucnv_close(cnv); 3704 } 3705 } 3706} 3707 3708static void TestUTFBOM() { 3709 static const UChar a16[] = { 0x61 }; 3710 static const char *const names[] = { 3711 "UTF-16", 3712 "UTF-16,version=1", 3713 "UTF-16BE", 3714 "UnicodeBig", 3715 "UTF-16LE", 3716 "UnicodeLittle" 3717 }; 3718 static const uint8_t expected[][5] = { 3719#if U_IS_BIG_ENDIAN 3720 { 4, 0xfe, 0xff, 0, 0x61 }, 3721 { 4, 0xfe, 0xff, 0, 0x61 }, 3722#else 3723 { 4, 0xff, 0xfe, 0x61, 0 }, 3724 { 4, 0xff, 0xfe, 0x61, 0 }, 3725#endif 3726 3727 { 2, 0, 0x61 }, 3728 { 4, 0xfe, 0xff, 0, 0x61 }, 3729 3730 { 2, 0x61, 0 }, 3731 { 4, 0xff, 0xfe, 0x61, 0 } 3732 }; 3733 3734 char bytes[10]; 3735 int32_t i; 3736 3737 for(i = 0; i < LENGTHOF(names); ++i) { 3738 UErrorCode errorCode = U_ZERO_ERROR; 3739 UConverter *cnv = ucnv_open(names[i], &errorCode); 3740 int32_t length = 0; 3741 const uint8_t *exp = expected[i]; 3742 if (U_FAILURE(errorCode)) { 3743 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3744 continue; 3745 } 3746 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3747 3748 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3749 log_err("unexpected %s BOM writing behavior -- %s\n", 3750 names[i], u_errorName(errorCode)); 3751 } 3752 ucnv_close(cnv); 3753 } 3754} 3755