1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/***************************************************************************** 7* 8* File CU_CAPITST.C 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda Ported for C API 13****************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <ctype.h> 19#include "unicode/uloc.h" 20#include "unicode/ucnv.h" 21#include "unicode/ucnv_err.h" 22#include "unicode/putil.h" 23#include "unicode/uset.h" 24#include "unicode/ustring.h" 25#include "ucnv_bld.h" /* for sizeof(UConverter) */ 26#include "cmemory.h" /* for UAlignedMemory */ 27#include "cintltst.h" 28#include "ccapitst.h" 29#include "cstring.h" 30 31#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 32 33#define NUM_CODEPAGE 1 34#define MAX_FILE_LEN 1024*20 35#define UCS_FILE_NAME_SIZE 512 36 37/*returns an action other than the one provided*/ 38#if !UCONFIG_NO_LEGACY_CONVERSION 39static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 40static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 41#endif 42 43static UConverter * 44cnv_open(const char *name, UErrorCode *pErrorCode) { 45 if(name!=NULL && name[0]=='*') { 46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 47 } else { 48 return ucnv_open(name, pErrorCode); 49 } 50} 51 52 53static void ListNames(void); 54static void TestFlushCache(void); 55static void TestDuplicateAlias(void); 56static void TestCCSID(void); 57static void TestJ932(void); 58static void TestJ1968(void); 59#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 60static void TestLMBCSMaxChar(void); 61#endif 62 63#if !UCONFIG_NO_LEGACY_CONVERSION 64static void TestConvertSafeCloneCallback(void); 65#endif 66 67static void TestEBCDICSwapLFNL(void); 68static void TestConvertEx(void); 69static void TestConvertExFromUTF8(void); 70static void TestConvertExFromUTF8_C5F0(void); 71static void TestConvertAlgorithmic(void); 72 void TestDefaultConverterError(void); /* defined in cctest.c */ 73 void TestDefaultConverterSet(void); /* defined in cctest.c */ 74static void TestToUCountPending(void); 75static void TestFromUCountPending(void); 76static void TestDefaultName(void); 77static void TestCompareNames(void); 78static void TestSubstString(void); 79static void InvalidArguments(void); 80static void TestGetName(void); 81static void TestUTFBOM(void); 82 83void addTestConvert(TestNode** root); 84 85void addTestConvert(TestNode** root) 86{ 87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 93#if !UCONFIG_NO_LEGACY_CONVERSION 94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 95#endif 96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 99#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 101#endif 102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); 109#if !UCONFIG_NO_FILE_IO 110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 112#endif 113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 119} 120 121static void ListNames(void) { 122 UErrorCode err = U_ZERO_ERROR; 123 int32_t testLong1 = 0; 124 const char* available_conv; 125 UEnumeration *allNamesEnum = NULL; 126 int32_t allNamesCount = 0; 127 uint16_t count; 128 129 log_verbose("Testing ucnv_openAllNames()..."); 130 allNamesEnum = ucnv_openAllNames(&err); 131 if(U_FAILURE(err)) { 132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 133 } 134 else { 135 const char *string = NULL; 136 int32_t len = 0; 137 int32_t count1 = 0; 138 int32_t count2 = 0; 139 allNamesCount = uenum_count(allNamesEnum, &err); 140 while ((string = uenum_next(allNamesEnum, &len, &err))) { 141 count1++; 142 log_verbose("read \"%s\", length %i\n", string, len); 143 } 144 if (U_FAILURE(err)) { 145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 146 err = U_ZERO_ERROR; 147 } 148 uenum_reset(allNamesEnum, &err); 149 while ((string = uenum_next(allNamesEnum, &len, &err))) { 150 count2++; 151 ucnv_close(ucnv_open(string, &err)); 152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 153 err = U_ZERO_ERROR; 154 } 155 if (count1 != count2) { 156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 157 } 158 } 159 uenum_close(allNamesEnum); 160 err = U_ZERO_ERROR; 161 162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 163 164 log_verbose("Testing ucnv_countAvailable()..."); 165 166 testLong1=ucnv_countAvailable(); 167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 168 169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 170 171 available_conv = ucnv_getAvailableName(testLong1); 172 /*test ucnv_getAvailableName with err condition*/ 173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 174 available_conv = ucnv_getAvailableName(-1); 175 if(available_conv != NULL){ 176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 177 } 178 179 /* Test ucnv_countAliases() etc. */ 180 count = ucnv_countAliases("utf-8", &err); 181 if(U_FAILURE(err)) { 182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 183 } else if(count <= 0) { 184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 185 } else { 186 /* try to get the aliases individually */ 187 const char *alias; 188 alias = ucnv_getAlias("utf-8", 0, &err); 189 if(U_FAILURE(err)) { 190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 191 } else if(strcmp("UTF-8", alias) != 0) { 192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 193 } else { 194 uint16_t aliasNum; 195 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 196 alias = ucnv_getAlias("utf-8", aliasNum, &err); 197 if(U_FAILURE(err)) { 198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 199 } else if(strlen(alias) > 20) { 200 /* sanity check */ 201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 202 } else { 203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 204 } 205 } 206 if(U_SUCCESS(err)) { 207 /* try to fill an array with all aliases */ 208 const char **aliases; 209 aliases=(const char **)malloc(count * sizeof(const char *)); 210 if(aliases != 0) { 211 ucnv_getAliases("utf-8", aliases, &err); 212 if(U_FAILURE(err)) { 213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 214 } else { 215 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 216 /* compare the pointers with the ones returned individually */ 217 alias = ucnv_getAlias("utf-8", aliasNum, &err); 218 if(U_FAILURE(err)) { 219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 220 } else if(aliases[aliasNum] != alias) { 221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 222 } 223 } 224 } 225 free((char **)aliases); 226 } 227 } 228 } 229 } 230} 231 232 233static void TestConvert() 234{ 235#if !UCONFIG_NO_LEGACY_CONVERSION 236 char myptr[4]; 237 char save[4]; 238 int32_t testLong1 = 0; 239 uint16_t rest = 0; 240 int32_t len = 0; 241 int32_t x = 0; 242 FILE* ucs_file_in = NULL; 243 UChar BOM = 0x0000; 244 UChar myUChar = 0x0000; 245 char* mytarget; /* [MAX_FILE_LEN] */ 246 char* mytarget_1; 247 char* mytarget_use; 248 UChar* consumedUni = NULL; 249 char* consumed = NULL; 250 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 252 UChar* ucs_file_buffer_use; 253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 254 UChar* my_ucs_file_buffer_1; 255 int8_t ii = 0; 256 uint16_t codepage_index = 0; 257 int32_t cp = 0; 258 UErrorCode err = U_ZERO_ERROR; 259 char ucs_file_name[UCS_FILE_NAME_SIZE]; 260 UConverterFromUCallback MIA1, MIA1_2; 261 UConverterToUCallback MIA2, MIA2_2; 262 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 263 UConverter* someConverters[5]; 264 UConverter* myConverter = 0; 265 UChar* displayname = 0; 266 267 const char* locale; 268 269 UChar* uchar1 = 0; 270 UChar* uchar2 = 0; 271 UChar* uchar3 = 0; 272 int32_t targetcapacity2; 273 int32_t targetcapacity; 274 int32_t targetsize; 275 int32_t disnamelen; 276 277 const UChar* tmp_ucs_buf; 278 const UChar* tmp_consumedUni=NULL; 279 const char* tmp_mytarget_use; 280 const char* tmp_consumed; 281 282 /****************************************************************** 283 Checking Unicode -> ksc 284 ******************************************************************/ 285 286 const char* CodePagesToTest[NUM_CODEPAGE] = 287 { 288 "ibm-949_P110-1999" 289 290 291 }; 292 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 293 { 294 949 295 }; 296 297 298 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 299 { 300 1 301 302 }; 303 304 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 305 { 306 2 307 308 }; 309 310 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 311 { 312 0xAFFE 313 }; 314 315 const char* CodePagesTestFiles[NUM_CODEPAGE] = 316 { 317 "uni-text.bin" 318 }; 319 320 321 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 322 { 323 UCNV_IBM 324 325 }; 326 327 const char* CodePagesLocale[NUM_CODEPAGE] = 328 { 329 "ko_KR" 330 }; 331 332 UConverterFromUCallback oldFromUAction = NULL; 333 UConverterToUCallback oldToUAction = NULL; 334 const void* oldFromUContext = NULL; 335 const void* oldToUContext = NULL; 336 337 /* Allocate memory */ 338 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 339 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 340 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 341 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 342 343 ucs_file_buffer_use = ucs_file_buffer; 344 mytarget_1=mytarget; 345 mytarget_use = mytarget; 346 my_ucs_file_buffer_1=my_ucs_file_buffer; 347 348 /* flush the converter cache to get a consistent state before the flushing is tested */ 349 ucnv_flushCache(); 350 351 /*Testing ucnv_openU()*/ 352 { 353 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 354 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 355 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 356 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 357 UChar illegalName[100]; 358 UConverter *converter=NULL; 359 err=U_ZERO_ERROR; 360 converter=ucnv_openU(converterName, &err); 361 if(U_FAILURE(err)){ 362 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 363 } 364 ucnv_close(converter); 365 err=U_ZERO_ERROR; 366 converter=ucnv_openU(NULL, &err); 367 if(U_FAILURE(err)){ 368 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 369 } 370 ucnv_close(converter); 371 /*testing with error value*/ 372 err=U_ILLEGAL_ARGUMENT_ERROR; 373 converter=ucnv_openU(converterName, &err); 374 if(!(converter == NULL)){ 375 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 376 } 377 ucnv_close(converter); 378 err=U_ZERO_ERROR; 379 u_uastrcpy(illegalName, ""); 380 u_uastrcpy(illegalName, illegalNameChars); 381 ucnv_openU(illegalName, &err); 382 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 383 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 384 } 385 386 err=U_ZERO_ERROR; 387 ucnv_openU(firstSortedName, &err); 388 if(err!=U_FILE_ACCESS_ERROR){ 389 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 390 } 391 392 err=U_ZERO_ERROR; 393 ucnv_openU(lastSortedName, &err); 394 if(err!=U_FILE_ACCESS_ERROR){ 395 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 396 } 397 398 err=U_ZERO_ERROR; 399 } 400 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 401 { 402 UConverter *cnv=NULL; 403 err=U_ZERO_ERROR; 404 cnv=ucnv_open("ibm-949,Madhu", &err); 405 if(U_FAILURE(err)){ 406 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 407 } 408 ucnv_close(cnv); 409 410 } 411 /*Testing ucnv_convert()*/ 412 { 413 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 414 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 415 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 416 char *target=0; 417 sourceLimit=sizeof(source)/sizeof(source[0]); 418 err=U_ZERO_ERROR; 419 targetLimit=0; 420 421 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 422 if(err == U_BUFFER_OVERFLOW_ERROR){ 423 err=U_ZERO_ERROR; 424 targetLimit=targetCapacity+1; 425 target=(char*)malloc(sizeof(char) * targetLimit); 426 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 427 } 428 if(U_FAILURE(err)){ 429 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 430 } 431 else { 432 for(i=0; i<targetCapacity; i++){ 433 if(target[i] != expectedTarget[i]){ 434 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 435 } 436 } 437 438 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 439 if(U_FAILURE(err) || i!=7){ 440 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 441 u_errorName(err), i); 442 } 443 444 /*Test error conditions*/ 445 err=U_ZERO_ERROR; 446 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 447 if(i !=0){ 448 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 449 } 450 451 err=U_ILLEGAL_ARGUMENT_ERROR; 452 sourceLimit=sizeof(source)/sizeof(source[0]); 453 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 454 if(i !=0 ){ 455 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 456 } 457 458 err=U_ZERO_ERROR; 459 sourceLimit=sizeof(source)/sizeof(source[0]); 460 targetLimit=0; 461 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 462 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 463 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 464 } 465 err=U_ZERO_ERROR; 466 free(target); 467 } 468 } 469 470 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 471 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 472 err=U_ILLEGAL_ARGUMENT_ERROR; 473 if(ucnv_open(NULL, &err) != NULL){ 474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 475 } 476 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 477 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 478 } 479 err=U_ZERO_ERROR; 480 481 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 482 log_verbose("\n---Testing ucnv_open default...\n"); 483 someConverters[0] = ucnv_open(NULL,&err); 484 someConverters[1] = ucnv_open(NULL,&err); 485 someConverters[2] = ucnv_open("utf8", &err); 486 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 487 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 488 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 489 490 /* Testing ucnv_getName()*/ 491 /*default code page */ 492 ucnv_getName(someConverters[0], &err); 493 if(U_FAILURE(err)) { 494 log_data_err("getName[0] failed\n"); 495 } else { 496 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 497 } 498 ucnv_getName(someConverters[1], &err); 499 if(U_FAILURE(err)) { 500 log_data_err("getName[1] failed\n"); 501 } else { 502 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 503 } 504 505 ucnv_close(someConverters[0]); 506 ucnv_close(someConverters[1]); 507 ucnv_close(someConverters[2]); 508 ucnv_close(someConverters[3]); 509 510 511 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 512 { 513 int32_t i = 0; 514 515 err = U_ZERO_ERROR; 516#ifdef U_TOPSRCDIR 517 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 518#else 519 strcpy(ucs_file_name, loadTestData(&err)); 520 521 if(U_FAILURE(err)){ 522 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 523 return; 524 } 525 526 { 527 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 528 529 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 530 *(index+1)=0; 531 } 532 } 533 534 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 535#endif 536 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 537 538 ucs_file_in = fopen(ucs_file_name,"rb"); 539 if (!ucs_file_in) 540 { 541 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 542 return; 543 } 544 545 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 546 547 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 548 /* ucnv_flushCache(); */ 549 myConverter =ucnv_open( "ibm-949", &err); 550 if (!myConverter || U_FAILURE(err)) 551 { 552 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 553 fclose(ucs_file_in); 554 break; 555 } 556 557 /*testing for ucnv_getName() */ 558 log_verbose("Testing ucnv_getName()...\n"); 559 ucnv_getName(myConverter, &err); 560 if(U_FAILURE(err)) 561 log_err("Error in getName\n"); 562 else 563 { 564 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 565 } 566 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 567 log_err("getName failed\n"); 568 else 569 log_verbose("getName ok\n"); 570 /*Test getName with error condition*/ 571 { 572 const char* name=0; 573 err=U_ILLEGAL_ARGUMENT_ERROR; 574 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 575 name=ucnv_getName(myConverter, &err); 576 if(name != NULL){ 577 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 578 } 579 err=U_ZERO_ERROR; 580 } 581 582 583 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 584 585 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 586 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 587 log_verbose("Max byte per character OK\n"); 588 else 589 log_err("Max byte per character failed\n"); 590 591 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 592 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 593 log_verbose("Min byte per character OK\n"); 594 else 595 log_err("Min byte per character failed\n"); 596 597 598 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 599 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 600 ii=4; 601 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 602 if (ii <= 0) { 603 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 604 } 605 606 for(x=0;x<ii;x++) 607 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 608 if (rest==CodePagesSubstitutionChars[codepage_index]) 609 log_verbose("Substitution character ok\n"); 610 else 611 log_err("Substitution character failed.\n"); 612 613 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 614 ucnv_setSubstChars(myConverter, myptr, ii, &err); 615 if (U_FAILURE(err)) 616 { 617 log_err("FAILURE! %s\n", myErrorName(err)); 618 } 619 ucnv_getSubstChars(myConverter,save, &ii, &err); 620 if (U_FAILURE(err)) 621 { 622 log_err("FAILURE! %s\n", myErrorName(err)); 623 } 624 625 if (strncmp(save, myptr, ii)) 626 log_err("Saved substitution character failed\n"); 627 else 628 log_verbose("Saved substitution character ok\n"); 629 630 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 631 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 632 ii=1; 633 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 634 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 635 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 636 } 637 err=U_ZERO_ERROR; 638 ii=4; 639 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 640 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 641 ucnv_setSubstChars(myConverter, myptr, 0, &err); 642 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 643 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 644 } 645 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 646 strcpy(myptr, "abc"); 647 ucnv_setSubstChars(myConverter, myptr, ii, &err); 648 err=U_ZERO_ERROR; 649 ucnv_getSubstChars(myConverter, save, &ii, &err); 650 if(strncmp(save, myptr, ii) == 0){ 651 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 652 } 653 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 654 err=U_ZERO_ERROR; 655 strcpy(myptr, "abc"); 656 ucnv_setSubstChars(myConverter, myptr, ii, &err); 657 err=U_ILLEGAL_ARGUMENT_ERROR; 658 ucnv_getSubstChars(myConverter, save, &ii, &err); 659 if(strncmp(save, myptr, ii) == 0){ 660 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 661 } 662 err=U_ZERO_ERROR; 663 /*------*/ 664 665#ifdef U_ENABLE_GENERIC_ISO_2022 666 /*resetState ucnv_reset()*/ 667 log_verbose("\n---Testing ucnv_reset()..\n"); 668 ucnv_reset(myConverter); 669 { 670 UChar32 c; 671 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 672 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 673 UConverter *cnv=ucnv_open("ISO_2022", &err); 674 if(U_FAILURE(err)) { 675 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 676 } 677 c=ucnv_getNextUChar(cnv, &source, limit, &err); 678 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 679 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 680 } 681 ucnv_reset(cnv); 682 ucnv_close(cnv); 683 684 } 685#endif 686 687 /*getDisplayName*/ 688 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 689 locale=CodePagesLocale[codepage_index]; 690 len=0; 691 displayname=NULL; 692 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 693 if(err==U_BUFFER_OVERFLOW_ERROR) { 694 err=U_ZERO_ERROR; 695 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 696 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 697 if(U_FAILURE(err)) { 698 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 699 } 700 else { 701 log_verbose(" getDisplayName o.k.\n"); 702 } 703 free(displayname); 704 displayname=NULL; 705 } 706 else { 707 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 708 } 709 /*test ucnv_getDiaplayName with error condition*/ 710 err= U_ILLEGAL_ARGUMENT_ERROR; 711 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 712 if( len !=0 ){ 713 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 714 } 715 /*test ucnv_getDiaplayName with error condition*/ 716 err=U_ZERO_ERROR; 717 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 718 if( len !=0 || U_SUCCESS(err)){ 719 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 720 } 721 err=U_ZERO_ERROR; 722 723 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 724 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 725 726 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 727 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 728 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 729 { 730 log_err("FAILURE! %s\n", myErrorName(err)); 731 } 732 733 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 734 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 735 log_err("get From UCallBack failed\n"); 736 else 737 log_verbose("get From UCallBack ok\n"); 738 739 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 740 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 741 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 742 { 743 log_err("FAILURE! %s\n", myErrorName(err)); 744 } 745 746 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 747 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 748 log_err("get From UCallBack action failed\n"); 749 else 750 log_verbose("get From UCallBack action ok\n"); 751 752 /*testing ucnv_setToUCallBack with error conditions*/ 753 err=U_ILLEGAL_ARGUMENT_ERROR; 754 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 755 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 756 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 757 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 758 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 759 } 760 err=U_ZERO_ERROR; 761 762 763 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 764 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 765 766 log_verbose("\n---Testing setTo UCallBack...\n"); 767 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 768 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 769 { 770 log_err("FAILURE! %s\n", myErrorName(err)); 771 } 772 773 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 774 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 775 log_err("To UCallBack failed\n"); 776 else 777 log_verbose("To UCallBack ok\n"); 778 779 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 780 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 781 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 782 { log_err("FAILURE! %s\n", myErrorName(err)); } 783 784 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 785 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 786 log_err("To UCallBack failed\n"); 787 else 788 log_verbose("To UCallBack ok\n"); 789 790 /*testing ucnv_setToUCallBack with error conditions*/ 791 err=U_ILLEGAL_ARGUMENT_ERROR; 792 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 793 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 794 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 795 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 796 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 797 } 798 err=U_ZERO_ERROR; 799 800 801 /*getcodepageid testing ucnv_getCCSID() */ 802 log_verbose("\n----Testing getCCSID....\n"); 803 cp = ucnv_getCCSID(myConverter,&err); 804 if (U_FAILURE(err)) 805 { 806 log_err("FAILURE!..... %s\n", myErrorName(err)); 807 } 808 if (cp != CodePageNumberToTest[codepage_index]) 809 log_err("Codepage number test failed\n"); 810 else 811 log_verbose("Codepage number test OK\n"); 812 813 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 814 err=U_ILLEGAL_ARGUMENT_ERROR; 815 if( ucnv_getCCSID(myConverter,&err) != -1){ 816 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 817 } 818 err=U_ZERO_ERROR; 819 820 /*getCodepagePlatform testing ucnv_getPlatform()*/ 821 log_verbose("\n---Testing getCodepagePlatform ..\n"); 822 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 823 log_err("Platform codepage test failed\n"); 824 else 825 log_verbose("Platform codepage test ok\n"); 826 827 if (U_FAILURE(err)) 828 { 829 log_err("FAILURE! %s\n", myErrorName(err)); 830 } 831 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 832 err= U_ILLEGAL_ARGUMENT_ERROR; 833 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 834 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 835 } 836 err=U_ZERO_ERROR; 837 838 839 /*Reads the BOM*/ 840 fread(&BOM, sizeof(UChar), 1, ucs_file_in); 841 if (BOM!=0xFEFF && BOM!=0xFFFE) 842 { 843 log_err("File Missing BOM...Bailing!\n"); 844 fclose(ucs_file_in); 845 break; 846 } 847 848 849 /*Reads in the file*/ 850 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 851 { 852 myUChar = ucs_file_buffer[i-1]; 853 854 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 855 } 856 857 myUChar = ucs_file_buffer[i-1]; 858 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 859 860 861 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 862 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 863 864 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 865 u_uastrcpy(uchar1,""); 866 u_strncpy(uchar1,ucs_file_buffer,i); 867 uchar1[i] = 0; 868 869 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 870 u_uastrcpy(uchar3,""); 871 u_strncpy(uchar3,ucs_file_buffer,i); 872 uchar3[i] = 0; 873 874 /*Calls the Conversion Routine */ 875 testLong1 = MAX_FILE_LEN; 876 log_verbose("\n---Testing ucnv_fromUChars()\n"); 877 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 878 if (U_FAILURE(err)) 879 { 880 log_err("\nFAILURE...%s\n", myErrorName(err)); 881 } 882 else 883 log_verbose(" ucnv_fromUChars() o.k.\n"); 884 885 /*test the conversion routine */ 886 log_verbose("\n---Testing ucnv_toUChars()\n"); 887 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 888 targetcapacity2=0; 889 targetsize = ucnv_toUChars(myConverter, 890 NULL, 891 targetcapacity2, 892 output_cp_buffer, 893 strlen(output_cp_buffer), 894 &err); 895 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 896 897 if(err==U_BUFFER_OVERFLOW_ERROR) 898 { 899 err=U_ZERO_ERROR; 900 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 901 targetsize = ucnv_toUChars(myConverter, 902 uchar2, 903 targetsize+1, 904 output_cp_buffer, 905 strlen(output_cp_buffer), 906 &err); 907 908 if(U_FAILURE(err)) 909 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 910 else 911 log_verbose(" ucnv_toUChars() o.k.\n"); 912 913 if(u_strcmp(uchar1,uchar2)!=0) 914 log_err("equality test failed with conversion routine\n"); 915 } 916 else 917 { 918 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 919 } 920 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 921 err=U_ILLEGAL_ARGUMENT_ERROR; 922 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 923 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 924 if (targetcapacity !=0) { 925 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 926 } 927 err=U_ZERO_ERROR; 928 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 929 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 930 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 931 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 932 } 933 err=U_ZERO_ERROR; 934 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 935 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 936 if (targetcapacity !=0) { 937 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 938 } 939 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 940 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 941 if (err != U_BUFFER_OVERFLOW_ERROR) { 942 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 943 } 944 /*toUChars with error conditions*/ 945 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 946 if(targetsize != 0){ 947 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 948 } 949 err=U_ZERO_ERROR; 950 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 951 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 952 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 953 } 954 err=U_ZERO_ERROR; 955 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 956 if (targetsize !=0) { 957 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 958 } 959 targetcapacity2=0; 960 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 961 if (err != U_STRING_NOT_TERMINATED_WARNING) { 962 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 963 u_errorName(err)); 964 } 965 err=U_ZERO_ERROR; 966 /*-----*/ 967 968 969 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 970 /*Clean up re-usable vars*/ 971 log_verbose("Testing ucnv_fromUnicode().....\n"); 972 tmp_ucs_buf=ucs_file_buffer_use; 973 ucnv_fromUnicode(myConverter, &mytarget_1, 974 mytarget + MAX_FILE_LEN, 975 &tmp_ucs_buf, 976 ucs_file_buffer_use+i, 977 NULL, 978 TRUE, 979 &err); 980 consumedUni = (UChar*)tmp_consumedUni; 981 (void)consumedUni; /* Suppress set but not used warning. */ 982 983 if (U_FAILURE(err)) 984 { 985 log_err("FAILURE! %s\n", myErrorName(err)); 986 } 987 else 988 log_verbose("ucnv_fromUnicode() o.k.\n"); 989 990 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 991 log_verbose("Testing ucnv_toUnicode().....\n"); 992 tmp_mytarget_use=mytarget_use; 993 tmp_consumed = consumed; 994 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 995 my_ucs_file_buffer + MAX_FILE_LEN, 996 &tmp_mytarget_use, 997 mytarget_use + (mytarget_1 - mytarget), 998 NULL, 999 FALSE, 1000 &err); 1001 consumed = (char*)tmp_consumed; 1002 if (U_FAILURE(err)) 1003 { 1004 log_err("FAILURE! %s\n", myErrorName(err)); 1005 } 1006 else 1007 log_verbose("ucnv_toUnicode() o.k.\n"); 1008 1009 1010 log_verbose("\n---Testing RoundTrip ...\n"); 1011 1012 1013 u_strncpy(uchar3, my_ucs_file_buffer,i); 1014 uchar3[i] = 0; 1015 1016 if(u_strcmp(uchar1,uchar3)==0) 1017 log_verbose("Equality test o.k.\n"); 1018 else 1019 log_err("Equality test failed\n"); 1020 1021 /*sanity compare */ 1022 if(uchar2 == NULL) 1023 { 1024 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1025 } 1026 else 1027 { 1028 if(u_strcmp(uchar2, uchar3)==0) 1029 log_verbose("Equality test o.k.\n"); 1030 else 1031 log_err("Equality test failed\n"); 1032 } 1033 1034 fclose(ucs_file_in); 1035 ucnv_close(myConverter); 1036 if (uchar1 != 0) free(uchar1); 1037 if (uchar2 != 0) free(uchar2); 1038 if (uchar3 != 0) free(uchar3); 1039 } 1040 1041 free((void*)mytarget); 1042 free((void*)output_cp_buffer); 1043 free((void*)ucs_file_buffer); 1044 free((void*)my_ucs_file_buffer); 1045#endif 1046} 1047 1048#if !UCONFIG_NO_LEGACY_CONVERSION 1049static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1050{ 1051 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1052} 1053 1054static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1055{ 1056 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1057} 1058#endif 1059 1060static void TestFlushCache(void) { 1061#if !UCONFIG_NO_LEGACY_CONVERSION 1062 UErrorCode err = U_ZERO_ERROR; 1063 UConverter* someConverters[5]; 1064 int flushCount = 0; 1065 1066 /* flush the converter cache to get a consistent state before the flushing is tested */ 1067 ucnv_flushCache(); 1068 1069 /*Testing ucnv_open()*/ 1070 /* Note: These converters have been chosen because they do NOT 1071 encode the Latin characters (U+0041, ...), and therefore are 1072 highly unlikely to be chosen as system default codepages */ 1073 1074 someConverters[0] = ucnv_open("ibm-1047", &err); 1075 if (U_FAILURE(err)) { 1076 log_data_err("FAILURE! %s\n", myErrorName(err)); 1077 } 1078 1079 someConverters[1] = ucnv_open("ibm-1047", &err); 1080 if (U_FAILURE(err)) { 1081 log_data_err("FAILURE! %s\n", myErrorName(err)); 1082 } 1083 1084 someConverters[2] = ucnv_open("ibm-1047", &err); 1085 if (U_FAILURE(err)) { 1086 log_data_err("FAILURE! %s\n", myErrorName(err)); 1087 } 1088 1089 someConverters[3] = ucnv_open("gb18030", &err); 1090 if (U_FAILURE(err)) { 1091 log_data_err("FAILURE! %s\n", myErrorName(err)); 1092 } 1093 1094 someConverters[4] = ucnv_open("ibm-954", &err); 1095 if (U_FAILURE(err)) { 1096 log_data_err("FAILURE! %s\n", myErrorName(err)); 1097 } 1098 1099 1100 /* Testing ucnv_flushCache() */ 1101 log_verbose("\n---Testing ucnv_flushCache...\n"); 1102 if ((flushCount=ucnv_flushCache())==0) 1103 log_verbose("Flush cache ok\n"); 1104 else 1105 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1106 1107 /*testing ucnv_close() and ucnv_flushCache() */ 1108 ucnv_close(someConverters[0]); 1109 ucnv_close(someConverters[1]); 1110 1111 if ((flushCount=ucnv_flushCache())==0) 1112 log_verbose("Flush cache ok\n"); 1113 else 1114 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1115 1116 ucnv_close(someConverters[2]); 1117 ucnv_close(someConverters[3]); 1118 1119 if ((flushCount=ucnv_flushCache())==2) 1120 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1121 else 1122 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1123 __LINE__, 1124 flushCount); 1125 1126 ucnv_close(someConverters[4]); 1127 if ( (flushCount=ucnv_flushCache())==1) 1128 log_verbose("Flush cache ok\n"); 1129 else 1130 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1131#endif 1132} 1133 1134/** 1135 * Test the converter alias API, specifically the fuzzy matching of 1136 * alias names and the alias table integrity. Make sure each 1137 * converter has at least one alias (itself), and that its listed 1138 * aliases map back to itself. Check some hard-coded UTF-8 and 1139 * ISO_2022 aliases to make sure they work. 1140 */ 1141static void TestAlias() { 1142 int32_t i, ncnv; 1143 UErrorCode status = U_ZERO_ERROR; 1144 1145 /* Predetermined aliases that we expect to map back to ISO_2022 1146 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1147 const char* ISO_2022_NAMES[] = 1148 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1149 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1150 int32_t ISO_2022_NAMES_LENGTH = 1151 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); 1152 const char *UTF8_NAMES[] = 1153 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1154 "utf_8", "ibm1208", "cp1208" }; 1155 int32_t UTF8_NAMES_LENGTH = 1156 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); 1157 1158 struct { 1159 const char *name; 1160 const char *alias; 1161 } CONVERTERS_NAMES[] = { 1162 { "UTF-32BE", "UTF32_BigEndian" }, 1163 { "UTF-32LE", "UTF32_LittleEndian" }, 1164 { "UTF-32", "ISO-10646-UCS-4" }, 1165 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1166 { "UTF-32", "ucs-4" } 1167 }; 1168 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1169 1170 /* When there are bugs in gencnval or in ucnv_io, converters can 1171 appear to have no aliases. */ 1172 ncnv = ucnv_countAvailable(); 1173 log_verbose("%d converters\n", ncnv); 1174 for (i=0; i<ncnv; ++i) { 1175 const char *name = ucnv_getAvailableName(i); 1176 const char *alias0; 1177 uint16_t na = ucnv_countAliases(name, &status); 1178 uint16_t j; 1179 UConverter *cnv; 1180 1181 if (na == 0) { 1182 log_err("FAIL: Converter \"%s\" (i=%d)" 1183 " has no aliases; expect at least one\n", 1184 name, i); 1185 continue; 1186 } 1187 cnv = ucnv_open(name, &status); 1188 if (U_FAILURE(status)) { 1189 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1190 " can't be opened.\n", 1191 name, i); 1192 } 1193 else { 1194 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1195 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1196 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1197 "The should be the same\n", 1198 name, ucnv_getName(cnv, &status)); 1199 } 1200 } 1201 ucnv_close(cnv); 1202 1203 status = U_ZERO_ERROR; 1204 alias0 = ucnv_getAlias(name, 0, &status); 1205 for (j=1; j<na; ++j) { 1206 const char *alias; 1207 /* Make sure each alias maps back to the the same list of 1208 aliases. Assume that if alias 0 is the same, the whole 1209 list is the same (this should always be true). */ 1210 const char *mapBack; 1211 1212 status = U_ZERO_ERROR; 1213 alias = ucnv_getAlias(name, j, &status); 1214 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1215 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1216 } 1217 1218 if (alias == NULL) { 1219 log_err("FAIL: Converter \"%s\" -> " 1220 "alias[%d]=NULL\n", 1221 name, j); 1222 continue; 1223 } 1224 1225 mapBack = ucnv_getAlias(alias, 0, &status); 1226 1227 if (mapBack == NULL) { 1228 log_err("FAIL: Converter \"%s\" -> " 1229 "alias[%d]=\"%s\" -> " 1230 "alias[0]=NULL, exp. \"%s\"\n", 1231 name, j, alias, alias0); 1232 continue; 1233 } 1234 1235 if (0 != strcmp(alias0, mapBack)) { 1236 int32_t idx; 1237 UBool foundAlias = FALSE; 1238 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1239 /* Make sure that we only get this mismapping when there is 1240 an ambiguous alias, and the other converter has this alias too. */ 1241 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1242 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1243 foundAlias = TRUE; 1244 break; 1245 } 1246 } 1247 } 1248 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1249 1250 if (!foundAlias) { 1251 log_err("FAIL: Converter \"%s\" -> " 1252 "alias[%d]=\"%s\" -> " 1253 "alias[0]=\"%s\", exp. \"%s\"\n", 1254 name, j, alias, mapBack, alias0); 1255 } 1256 } 1257 } 1258 } 1259 1260 1261 /* Check a list of predetermined aliases that we expect to map 1262 * back to ISO_2022 and UTF-8. */ 1263 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1264 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1265 if(!mapBack) { 1266 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1267 continue; 1268 } 1269 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1270 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1271 ISO_2022_NAMES[i], mapBack); 1272 } 1273 } 1274 1275 1276 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1277 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1278 if(!mapBack) { 1279 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1280 continue; 1281 } 1282 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1283 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1284 UTF8_NAMES[i], mapBack); 1285 } 1286 } 1287 1288 /* 1289 * Check a list of predetermined aliases that we expect to map 1290 * back to predermined converter names. 1291 */ 1292 1293 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1294 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1295 if(!mapBack) { 1296 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1297 continue; 1298 } 1299 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1300 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1301 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1302 } 1303 } 1304 1305} 1306 1307static void TestDuplicateAlias(void) { 1308 const char *alias; 1309 UErrorCode status = U_ZERO_ERROR; 1310 1311 status = U_ZERO_ERROR; 1312 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1313 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1314 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1315 } 1316 status = U_ZERO_ERROR; 1317 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1318 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1319 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1320 } 1321 status = U_ZERO_ERROR; 1322 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1323 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1324 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1325 } 1326} 1327 1328 1329/* Test safe clone callback */ 1330 1331static uint32_t TSCC_nextSerial() 1332{ 1333 static uint32_t n = 1; 1334 1335 return (n++); 1336} 1337 1338typedef struct 1339{ 1340 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1341 uint32_t serial; /* minted from nextSerial, above */ 1342 UBool wasClosed; /* close happened on the object */ 1343} TSCCContext; 1344 1345static TSCCContext *TSCC_clone(TSCCContext *ctx) 1346{ 1347 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1348 1349 newCtx->serial = TSCC_nextSerial(); 1350 newCtx->wasClosed = 0; 1351 newCtx->magic = 0xC0FFEE; 1352 1353 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1354 1355 return newCtx; 1356} 1357 1358#if !UCONFIG_NO_LEGACY_CONVERSION 1359static void TSCC_fromU(const void *context, 1360 UConverterFromUnicodeArgs *fromUArgs, 1361 const UChar* codeUnits, 1362 int32_t length, 1363 UChar32 codePoint, 1364 UConverterCallbackReason reason, 1365 UErrorCode * err) 1366{ 1367 TSCCContext *ctx = (TSCCContext*)context; 1368 UConverterFromUCallback junkFrom; 1369 1370 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1371 1372 if(ctx->magic != 0xC0FFEE) { 1373 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1374 return; 1375 } 1376 1377 if(reason == UCNV_CLONE) { 1378 UErrorCode subErr = U_ZERO_ERROR; 1379 TSCCContext *newCtx; 1380 TSCCContext *junkCtx; 1381 TSCCContext **pjunkCtx = &junkCtx; 1382 1383 /* "recreate" it */ 1384 log_verbose("TSCC_fromU: cloning..\n"); 1385 newCtx = TSCC_clone(ctx); 1386 1387 if(newCtx == NULL) { 1388 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1389 } 1390 1391 /* now, SET it */ 1392 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1393 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1394 1395 if(U_FAILURE(subErr)) { 1396 *err = subErr; 1397 } 1398 } 1399 1400 if(reason == UCNV_CLOSE) { 1401 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1402 ctx->wasClosed = TRUE; 1403 } 1404} 1405 1406static void TSCC_toU(const void *context, 1407 UConverterToUnicodeArgs *toUArgs, 1408 const char* codeUnits, 1409 int32_t length, 1410 UConverterCallbackReason reason, 1411 UErrorCode * err) 1412{ 1413 TSCCContext *ctx = (TSCCContext*)context; 1414 UConverterToUCallback junkFrom; 1415 1416 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1417 1418 if(ctx->magic != 0xC0FFEE) { 1419 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1420 return; 1421 } 1422 1423 if(reason == UCNV_CLONE) { 1424 UErrorCode subErr = U_ZERO_ERROR; 1425 TSCCContext *newCtx; 1426 TSCCContext *junkCtx; 1427 TSCCContext **pjunkCtx = &junkCtx; 1428 1429 /* "recreate" it */ 1430 log_verbose("TSCC_toU: cloning..\n"); 1431 newCtx = TSCC_clone(ctx); 1432 1433 if(newCtx == NULL) { 1434 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1435 } 1436 1437 /* now, SET it */ 1438 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1439 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1440 1441 if(U_FAILURE(subErr)) { 1442 *err = subErr; 1443 } 1444 } 1445 1446 if(reason == UCNV_CLOSE) { 1447 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1448 ctx->wasClosed = TRUE; 1449 } 1450} 1451 1452static void TSCC_init(TSCCContext *q) 1453{ 1454 q->magic = 0xC0FFEE; 1455 q->serial = TSCC_nextSerial(); 1456 q->wasClosed = 0; 1457} 1458 1459static void TSCC_print_log(TSCCContext *q, const char *name) 1460{ 1461 if(q==NULL) { 1462 log_verbose("TSCContext: %s is NULL!!\n", name); 1463 } else { 1464 if(q->magic != 0xC0FFEE) { 1465 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1466 q,q->serial, q->magic); 1467 } 1468 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1469 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1470 } 1471} 1472 1473static void TestConvertSafeCloneCallback() 1474{ 1475 UErrorCode err = U_ZERO_ERROR; 1476 TSCCContext from1, to1; 1477 TSCCContext *from2, *from3, *to2, *to3; 1478 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1479 char hunk[8192]; 1480 int32_t hunkSize = 8192; 1481 UConverterFromUCallback junkFrom; 1482 UConverterToUCallback junkTo; 1483 UConverter *conv1, *conv2 = NULL; 1484 1485 conv1 = ucnv_open("iso-8859-3", &err); 1486 1487 if(U_FAILURE(err)) { 1488 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1489 return; 1490 } 1491 1492 log_verbose("Opened conv1=%p\n", conv1); 1493 1494 TSCC_init(&from1); 1495 TSCC_init(&to1); 1496 1497 TSCC_print_log(&from1, "from1"); 1498 TSCC_print_log(&to1, "to1"); 1499 1500 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1501 log_verbose("Set from1 on conv1\n"); 1502 TSCC_print_log(&from1, "from1"); 1503 1504 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1505 log_verbose("Set to1 on conv1\n"); 1506 TSCC_print_log(&to1, "to1"); 1507 1508 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1509 if(U_FAILURE(err)) { 1510 log_err("safeClone failed: %s\n", u_errorName(err)); 1511 return; 1512 } 1513 log_verbose("Cloned to conv2=%p.\n", conv2); 1514 1515/********** from *********************/ 1516 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1517 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1518 1519 TSCC_print_log(from2, "from2"); 1520 TSCC_print_log(from3, "from3(==from1)"); 1521 1522 if(from2 == NULL) { 1523 log_err("FAIL! from2 is null \n"); 1524 return; 1525 } 1526 1527 if(from3 == NULL) { 1528 log_err("FAIL! from3 is null \n"); 1529 return; 1530 } 1531 1532 if(from3 != (&from1) ) { 1533 log_err("FAIL! conv1's FROM context changed!\n"); 1534 } 1535 1536 if(from2 == (&from1) ) { 1537 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1538 } 1539 1540 if(from1.wasClosed) { 1541 log_err("FAIL! from1 is closed \n"); 1542 } 1543 1544 if(from2->wasClosed) { 1545 log_err("FAIL! from2 was closed\n"); 1546 } 1547 1548/********** to *********************/ 1549 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1550 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1551 1552 TSCC_print_log(to2, "to2"); 1553 TSCC_print_log(to3, "to3(==to1)"); 1554 1555 if(to2 == NULL) { 1556 log_err("FAIL! to2 is null \n"); 1557 return; 1558 } 1559 1560 if(to3 == NULL) { 1561 log_err("FAIL! to3 is null \n"); 1562 return; 1563 } 1564 1565 if(to3 != (&to1) ) { 1566 log_err("FAIL! conv1's TO context changed!\n"); 1567 } 1568 1569 if(to2 == (&to1) ) { 1570 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1571 } 1572 1573 if(to1.wasClosed) { 1574 log_err("FAIL! to1 is closed \n"); 1575 } 1576 1577 if(to2->wasClosed) { 1578 log_err("FAIL! to2 was closed\n"); 1579 } 1580 1581/*************************************/ 1582 1583 ucnv_close(conv1); 1584 log_verbose("ucnv_closed (conv1)\n"); 1585 TSCC_print_log(&from1, "from1"); 1586 TSCC_print_log(from2, "from2"); 1587 TSCC_print_log(&to1, "to1"); 1588 TSCC_print_log(to2, "to2"); 1589 1590 if(from1.wasClosed == FALSE) { 1591 log_err("FAIL! from1 is NOT closed \n"); 1592 } 1593 1594 if(from2->wasClosed) { 1595 log_err("FAIL! from2 was closed\n"); 1596 } 1597 1598 if(to1.wasClosed == FALSE) { 1599 log_err("FAIL! to1 is NOT closed \n"); 1600 } 1601 1602 if(to2->wasClosed) { 1603 log_err("FAIL! to2 was closed\n"); 1604 } 1605 1606 ucnv_close(conv2); 1607 log_verbose("ucnv_closed (conv2)\n"); 1608 1609 TSCC_print_log(&from1, "from1"); 1610 TSCC_print_log(from2, "from2"); 1611 1612 if(from1.wasClosed == FALSE) { 1613 log_err("FAIL! from1 is NOT closed \n"); 1614 } 1615 1616 if(from2->wasClosed == FALSE) { 1617 log_err("FAIL! from2 was NOT closed\n"); 1618 } 1619 1620 TSCC_print_log(&to1, "to1"); 1621 TSCC_print_log(to2, "to2"); 1622 1623 if(to1.wasClosed == FALSE) { 1624 log_err("FAIL! to1 is NOT closed \n"); 1625 } 1626 1627 if(to2->wasClosed == FALSE) { 1628 log_err("FAIL! to2 was NOT closed\n"); 1629 } 1630 1631 if(to2 != (&to1)) { 1632 free(to2); /* to1 is stack based */ 1633 } 1634 if(from2 != (&from1)) { 1635 free(from2); /* from1 is stack based */ 1636 } 1637} 1638#endif 1639 1640static UBool 1641containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1642 while(length>0) { 1643 if(*p!=b) { 1644 return TRUE; 1645 } 1646 ++p; 1647 --length; 1648 } 1649 return FALSE; 1650} 1651 1652static void TestConvertSafeClone() 1653{ 1654 /* one 'regular' & all the 'private stateful' converters */ 1655 static const char *const names[] = { 1656#if !UCONFIG_NO_LEGACY_CONVERSION 1657 "ibm-1047", 1658 "ISO_2022,locale=zh,version=1", 1659#endif 1660 "SCSU", 1661#if !UCONFIG_NO_LEGACY_CONVERSION 1662 "HZ", 1663 "lmbcs", 1664 "ISCII,version=0", 1665 "ISO_2022,locale=kr,version=1", 1666 "ISO_2022,locale=jp,version=2", 1667#endif 1668 "BOCU-1", 1669 "UTF-7", 1670#if !UCONFIG_NO_LEGACY_CONVERSION 1671 "IMAP-mailbox-name", 1672 "ibm-1047-s390" 1673#else 1674 "IMAP=mailbox-name" 1675#endif 1676 }; 1677 1678 /* store the actual sizes of each converter */ 1679 int32_t actualSizes[LENGTHOF(names)]; 1680 1681 static const int32_t bufferSizes[] = { 1682 U_CNV_SAFECLONE_BUFFERSIZE, 1683 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1684 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1685 }; 1686 1687 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1688 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1689 int32_t bufferSize, maxBufferSize; 1690 const char *maxName; 1691 UConverter * cnv, *cnv2; 1692 UErrorCode err; 1693 1694 char *pCharBuffer; 1695 const char *pConstCharBuffer; 1696 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1697 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1698 UChar uniCharBuffer[20]; 1699 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1700 const char *pCharSource = charSourceBuffer; 1701 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1702 UChar *pUCharTarget = uniCharBuffer; 1703 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1704 const UChar * pUniBuffer; 1705 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1706 int32_t idx, j; 1707 1708 err = U_ZERO_ERROR; 1709 cnv = ucnv_open(names[0], &err); 1710 if(U_SUCCESS(err)) { 1711 /* Check the various error & informational states: */ 1712 1713 /* Null status - just returns NULL */ 1714 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1715 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) 1716 { 1717 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1718 } 1719 /* error status - should return 0 & keep error the same */ 1720 err = U_MEMORY_ALLOCATION_ERROR; 1721 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1722 { 1723 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1724 } 1725 err = U_ZERO_ERROR; 1726 1727 /* Null buffer size pointer is ok */ 1728 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) 1729 { 1730 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1731 } 1732 ucnv_close(cnv2); 1733 err = U_ZERO_ERROR; 1734 1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1736 bufferSize = 0; 1737 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1738 { 1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1740 } 1741 /* Verify our define is large enough */ 1742 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1743 { 1744 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1745 } 1746 /* Verify we can use this run-time calculated size */ 1747 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1748 { 1749 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1750 } 1751 if (cnv2) { 1752 ucnv_close(cnv2); 1753 } 1754 1755 /* size one byte too small - should allocate & let us know */ 1756 --bufferSize; 1757 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1758 { 1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1760 } 1761 if (cnv2) { 1762 ucnv_close(cnv2); 1763 } 1764 1765 err = U_ZERO_ERROR; 1766 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1767 1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1769 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1770 { 1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1772 } 1773 if (cnv2) { 1774 ucnv_close(cnv2); 1775 } 1776 1777 err = U_ZERO_ERROR; 1778 1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1780 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1781 { 1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1783 } 1784 1785 ucnv_close(cnv); 1786 } 1787 1788 maxBufferSize = 0; 1789 maxName = ""; 1790 1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1792 1793 for(j = 0; j < LENGTHOF(bufferSizes); ++j) { 1794 for (idx = 0; idx < LENGTHOF(names); idx++) 1795 { 1796 err = U_ZERO_ERROR; 1797 cnv = ucnv_open(names[idx], &err); 1798 if(U_FAILURE(err)) { 1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); 1800 continue; 1801 } 1802 1803 if(j == 0) { 1804 /* preflight to get maxBufferSize */ 1805 actualSizes[idx] = 0; 1806 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); 1807 if(actualSizes[idx] > maxBufferSize) { 1808 maxBufferSize = actualSizes[idx]; 1809 maxName = names[idx]; 1810 } 1811 } 1812 1813 memset(buffer, 0xaa, sizeof(buffer)); 1814 1815 bufferSize = bufferSizes[j]; 1816 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1817 1818 /* close the original immediately to make sure that the clone works by itself */ 1819 ucnv_close(cnv); 1820 1821 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1822 err == U_SAFECLONE_ALLOCATED_WARNING 1823 ) { 1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); 1825 } 1826 1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1828 if(bufferSize <= bufferSizes[j]) { 1829 /* used the stack buffer */ 1830 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1831 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1832 ) { 1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1834 names[idx], bufferSize, bufferSizes[j]); 1835 } 1836 } else { 1837 /* heap-allocated the clone */ 1838 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1840 names[idx], bufferSize, bufferSizes[j]); 1841 } 1842 } 1843 1844 pCharBuffer = charBuffer; 1845 pUniBuffer = uniBuffer; 1846 1847 ucnv_fromUnicode(cnv2, 1848 &pCharBuffer, 1849 charBufferLimit, 1850 &pUniBuffer, 1851 uniBufferLimit, 1852 NULL, 1853 TRUE, 1854 &err); 1855 if(U_FAILURE(err)){ 1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1857 } 1858 ucnv_toUnicode(cnv2, 1859 &pUCharTarget, 1860 pUCharTargetLimit, 1861 &pCharSource, 1862 pCharSourceLimit, 1863 NULL, 1864 TRUE, 1865 &err 1866 ); 1867 1868 if(U_FAILURE(err)){ 1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1870 } 1871 1872 pConstCharBuffer = charBuffer; 1873 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1874 { 1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1876 } 1877 ucnv_close(cnv2); 1878 } 1879 } 1880 1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1882 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1883 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1885 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1886 } 1887} 1888 1889static void TestCCSID() { 1890#if !UCONFIG_NO_LEGACY_CONVERSION 1891 UConverter *cnv; 1892 UErrorCode errorCode; 1893 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1894 int32_t i, ccsid; 1895 1896 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1897 ccsid=ccsids[i]; 1898 1899 errorCode=U_ZERO_ERROR; 1900 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1901 if(U_FAILURE(errorCode)) { 1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1903 continue; 1904 } 1905 1906 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1908 } 1909 1910 /* skip gb18030(ccsid 1392) */ 1911 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1913 } 1914 1915 ucnv_close(cnv); 1916 } 1917#endif 1918} 1919 1920/* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1921 1922/* CHUNK_SIZE defined in common\ucnv.c: */ 1923#define CHUNK_SIZE 1024 1924 1925static void bug1(void); 1926static void bug2(void); 1927static void bug3(void); 1928 1929static void 1930TestJ932(void) 1931{ 1932 bug1(); /* Unicode intermediate buffer straddle bug */ 1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1935} 1936 1937/* 1938 * jitterbug 932: test chunking boundary conditions in 1939 1940 int32_t ucnv_convert(const char *toConverterName, 1941 const char *fromConverterName, 1942 char *target, 1943 int32_t targetSize, 1944 const char *source, 1945 int32_t sourceSize, 1946 UErrorCode * err) 1947 1948 * See discussions on the icu mailing list in 1949 * 2001-April with the subject "converter 'flush' question". 1950 * 1951 * Bug report and test code provided by Edward J. Batutis. 1952 */ 1953static void bug1() 1954{ 1955#if !UCONFIG_NO_LEGACY_CONVERSION 1956 char char_in[CHUNK_SIZE+32]; 1957 char char_out[CHUNK_SIZE*2]; 1958 1959 /* GB 18030 equivalent of U+10000 is 90308130 */ 1960 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 1961 1962 UErrorCode err = U_ZERO_ERROR; 1963 int32_t i, test_seq_len = sizeof(test_seq); 1964 1965 /* 1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could 1968 * be expanded - however this is the only type of straddle bug I can think of at the moment - 1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 1970 * other Unicode sequences cause a bug since combining sequences are not supported by the 1971 * converters. 1972 */ 1973 1974 for (i = test_seq_len; i >= 0; i--) { 1975 /* put character sequence into input buffer */ 1976 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 1977 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 1978 1979 /* do the conversion */ 1980 ucnv_convert("us-ascii", /* out */ 1981 "gb18030", /* in */ 1982 char_out, 1983 sizeof(char_out), 1984 char_in, 1985 sizeof(char_in), 1986 &err); 1987 1988 /* bug1: */ 1989 if (err == U_TRUNCATED_CHAR_FOUND) { 1990 /* this happens when surrogate pair straddles the intermediate buffer in 1991 * T_UConverter_fromCodepageToCodepage */ 1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 1993 } 1994 } 1995#endif 1996} 1997 1998/* bug2: pre-flighting loop bug: simple overflow causes bug */ 1999static void bug2() 2000{ 2001 /* US-ASCII "1234567890" */ 2002 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2003 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2004 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2005 0x00, 0x00, 0x00, 0x31, 2006 0x00, 0x00, 0x00, 0x32, 2007 0x00, 0x00, 0x00, 0x33, 2008 0x00, 0x00, 0x00, 0x34, 2009 0x00, 0x00, 0x00, 0x35, 2010 0x00, 0x00, 0x00, 0x36, 2011 0x00, 0x00, 0x00, 0x37, 2012 0x00, 0x00, 0x00, 0x38, 2013 0x00, 0x00, (char)0xf0, 0x00}; 2014 static char target[5]; 2015 2016 UErrorCode err = U_ZERO_ERROR; 2017 int32_t size; 2018 2019 /* do the conversion */ 2020 size = ucnv_convert("iso-8859-1", /* out */ 2021 "us-ascii", /* in */ 2022 target, 2023 sizeof(target), 2024 source, 2025 sizeof(source), 2026 &err); 2027 2028 if ( size != 10 ) { 2029 /* bug2: size is 5, should be 10 */ 2030 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2031 } 2032 2033 err = U_ZERO_ERROR; 2034 /* do the conversion */ 2035 size = ucnv_convert("UTF-32BE", /* out */ 2036 "UTF-8", /* in */ 2037 target, 2038 sizeof(target), 2039 sourceUTF8, 2040 sizeof(sourceUTF8), 2041 &err); 2042 2043 if ( size != 32 ) { 2044 /* bug2: size is 5, should be 32 */ 2045 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2046 } 2047 2048 err = U_ZERO_ERROR; 2049 /* do the conversion */ 2050 size = ucnv_convert("UTF-8", /* out */ 2051 "UTF-32BE", /* in */ 2052 target, 2053 sizeof(target), 2054 sourceUTF32, 2055 sizeof(sourceUTF32), 2056 &err); 2057 2058 if ( size != 12 ) { 2059 /* bug2: size is 5, should be 12 */ 2060 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2061 } 2062} 2063 2064/* 2065 * bug3: when the characters expand going from source to target codepage 2066 * you get bug3 in addition to bug2 2067 */ 2068static void bug3() 2069{ 2070#if !UCONFIG_NO_LEGACY_CONVERSION 2071 char char_in[CHUNK_SIZE*4]; 2072 char target[5]; 2073 UErrorCode err = U_ZERO_ERROR; 2074 int32_t size; 2075 2076 /* 2077 * first get the buggy size from bug2 then 2078 * compare it to buggy size with an expansion 2079 */ 2080 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2081 2082 /* do the conversion */ 2083 size = ucnv_convert("lmbcs", /* out */ 2084 "us-ascii", /* in */ 2085 target, 2086 sizeof(target), 2087 char_in, 2088 sizeof(char_in), 2089 &err); 2090 2091 if ( size != sizeof(char_in) ) { 2092 /* 2093 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2094 * in the converter?), should be CHUNK_SIZE*4 2095 * 2096 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2097 */ 2098 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2099 } 2100 2101 /* 2102 * now do the conversion with expansion 2103 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2104 */ 2105 memset(char_in, 8, sizeof(char_in)); 2106 err = U_ZERO_ERROR; 2107 2108 /* do the conversion */ 2109 size = ucnv_convert("lmbcs", /* out */ 2110 "us-ascii", /* in */ 2111 target, 2112 sizeof(target), 2113 char_in, 2114 sizeof(char_in), 2115 &err); 2116 2117 /* expect 2X expansion */ 2118 if ( size != sizeof(char_in) * 2 ) { 2119 /* 2120 * bug3: 2121 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2122 */ 2123 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2124 } 2125#endif 2126} 2127 2128static void 2129convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2130 const char *src, int32_t srcLength, 2131 const char *expectTarget, int32_t expectTargetLength, 2132 int32_t chunkSize, 2133 const char *testName, 2134 UErrorCode expectCode) { 2135 UChar pivotBuffer[CHUNK_SIZE]; 2136 UChar *pivotSource, *pivotTarget; 2137 const UChar *pivotLimit; 2138 2139 char targetBuffer[CHUNK_SIZE]; 2140 char *target; 2141 const char *srcLimit, *finalSrcLimit, *targetLimit; 2142 2143 int32_t targetLength; 2144 2145 UBool flush; 2146 2147 UErrorCode errorCode; 2148 2149 /* setup */ 2150 if(chunkSize>CHUNK_SIZE) { 2151 chunkSize=CHUNK_SIZE; 2152 } 2153 2154 pivotSource=pivotTarget=pivotBuffer; 2155 pivotLimit=pivotBuffer+chunkSize; 2156 2157 finalSrcLimit=src+srcLength; 2158 target=targetBuffer; 2159 targetLimit=targetBuffer+chunkSize; 2160 2161 ucnv_resetToUnicode(srcCnv); 2162 ucnv_resetFromUnicode(targetCnv); 2163 2164 errorCode=U_ZERO_ERROR; 2165 flush=FALSE; 2166 2167 /* convert, streaming-style (both converters and pivot keep state) */ 2168 for(;;) { 2169 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2170 if(src+chunkSize<=finalSrcLimit) { 2171 srcLimit=src+chunkSize; 2172 } else { 2173 srcLimit=finalSrcLimit; 2174 } 2175 ucnv_convertEx(targetCnv, srcCnv, 2176 &target, targetLimit, 2177 &src, srcLimit, 2178 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2179 FALSE, flush, &errorCode); 2180 targetLength=(int32_t)(target-targetBuffer); 2181 if(target>targetLimit) { 2182 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2183 testName, chunkSize, target, targetLimit); 2184 break; /* TODO: major problem! */ 2185 } 2186 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2187 /* continue converting another chunk */ 2188 errorCode=U_ZERO_ERROR; 2189 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2190 targetLimit=target+chunkSize; 2191 } else { 2192 targetLimit=targetBuffer+sizeof(targetBuffer); 2193 } 2194 } else if(U_FAILURE(errorCode)) { 2195 /* failure */ 2196 break; 2197 } else if(flush) { 2198 /* all done */ 2199 break; 2200 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2201 /* all consumed, now flush without input (separate from conversion for testing) */ 2202 flush=TRUE; 2203 } 2204 } 2205 2206 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2207 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2208 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2209 } else if(targetLength!=expectTargetLength) { 2210 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2211 testName, chunkSize, targetLength, expectTargetLength); 2212 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2213 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2214 testName, chunkSize); 2215 } 2216} 2217 2218static void 2219convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2220 const char *src, int32_t srcLength, 2221 const char *expectTarget, int32_t expectTargetLength, 2222 const char *testName, 2223 UErrorCode expectCode) { 2224 convertExStreaming(srcCnv, targetCnv, 2225 src, srcLength, 2226 expectTarget, expectTargetLength, 2227 1, testName, expectCode); 2228 convertExStreaming(srcCnv, targetCnv, 2229 src, srcLength, 2230 expectTarget, expectTargetLength, 2231 3, testName, expectCode); 2232 convertExStreaming(srcCnv, targetCnv, 2233 src, srcLength, 2234 expectTarget, expectTargetLength, 2235 7, testName, expectCode); 2236} 2237 2238static void TestConvertEx() { 2239#if !UCONFIG_NO_LEGACY_CONVERSION 2240 static const uint8_t 2241 utf8[]={ 2242 /* 4e00 30a1 ff61 0410 */ 2243 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2244 }, 2245 shiftJIS[]={ 2246 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2247 }, 2248 errorTarget[]={ 2249 /* 2250 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2251 * SUB, SUB, 0x40, SUB, SUB, 0x40 2252 */ 2253 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2254 }; 2255 2256 char srcBuffer[100], targetBuffer[100]; 2257 2258 const char *src; 2259 char *target; 2260 2261 UChar pivotBuffer[100]; 2262 UChar *pivotSource, *pivotTarget; 2263 2264 UConverter *cnv1, *cnv2; 2265 UErrorCode errorCode; 2266 2267 errorCode=U_ZERO_ERROR; 2268 cnv1=ucnv_open("UTF-8", &errorCode); 2269 if(U_FAILURE(errorCode)) { 2270 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2271 return; 2272 } 2273 2274 cnv2=ucnv_open("Shift-JIS", &errorCode); 2275 if(U_FAILURE(errorCode)) { 2276 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2277 ucnv_close(cnv1); 2278 return; 2279 } 2280 2281 /* test ucnv_convertEx() with streaming conversion style */ 2282 convertExMultiStreaming(cnv1, cnv2, 2283 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2284 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2285 2286 convertExMultiStreaming(cnv2, cnv1, 2287 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2288 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2289 2290 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2291 convertExMultiStreaming(cnv1, cnv2, 2292 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2293 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2294 2295 /* test some simple conversions */ 2296 2297 /* NUL-terminated source and target */ 2298 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2299 memcpy(srcBuffer, utf8, sizeof(utf8)); 2300 srcBuffer[sizeof(utf8)]=0; 2301 src=srcBuffer; 2302 target=targetBuffer; 2303 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2304 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2305 if( errorCode!=U_ZERO_ERROR || 2306 target-targetBuffer!=sizeof(shiftJIS) || 2307 *target!=0 || 2308 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2309 ) { 2310 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2311 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2312 } 2313 2314 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2315 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2316 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2317 src=srcBuffer; 2318 target=targetBuffer; 2319 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2320 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2321 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2322 target-targetBuffer!=sizeof(shiftJIS) || 2323 *target!=(char)0xff || 2324 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2325 ) { 2326 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2327 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2328 } 2329 2330 /* bad arguments */ 2331 errorCode=U_MESSAGE_PARSE_ERROR; 2332 src=srcBuffer; 2333 target=targetBuffer; 2334 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2335 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2336 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2337 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2338 } 2339 2340 /* pivotLimit==pivotStart */ 2341 errorCode=U_ZERO_ERROR; 2342 pivotSource=pivotTarget=pivotBuffer; 2343 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2344 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2346 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2347 } 2348 2349 /* *pivotSource==NULL */ 2350 errorCode=U_ZERO_ERROR; 2351 pivotSource=NULL; 2352 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2353 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2354 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2355 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2356 } 2357 2358 /* *source==NULL */ 2359 errorCode=U_ZERO_ERROR; 2360 src=NULL; 2361 pivotSource=pivotBuffer; 2362 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2363 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2364 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2365 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2366 } 2367 2368 /* streaming conversion without a pivot buffer */ 2369 errorCode=U_ZERO_ERROR; 2370 src=srcBuffer; 2371 pivotSource=pivotBuffer; 2372 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2373 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2374 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2375 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2376 } 2377 2378 ucnv_close(cnv1); 2379 ucnv_close(cnv2); 2380#endif 2381} 2382 2383/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2384static const char *const badUTF8[]={ 2385 /* trail byte */ 2386 "\x80", 2387 2388 /* truncated multi-byte sequences */ 2389 "\xd0", 2390 "\xe0", 2391 "\xe1", 2392 "\xed", 2393 "\xee", 2394 "\xf0", 2395 "\xf1", 2396 "\xf4", 2397 "\xf8", 2398 "\xfc", 2399 2400 "\xe0\x80", 2401 "\xe0\xa0", 2402 "\xe1\x80", 2403 "\xed\x80", 2404 "\xed\xa0", 2405 "\xee\x80", 2406 "\xf0\x80", 2407 "\xf0\x90", 2408 "\xf1\x80", 2409 "\xf4\x80", 2410 "\xf4\x90", 2411 "\xf8\x80", 2412 "\xfc\x80", 2413 2414 "\xf0\x80\x80", 2415 "\xf0\x90\x80", 2416 "\xf1\x80\x80", 2417 "\xf4\x80\x80", 2418 "\xf4\x90\x80", 2419 "\xf8\x80\x80", 2420 "\xfc\x80\x80", 2421 2422 "\xf8\x80\x80\x80", 2423 "\xfc\x80\x80\x80", 2424 2425 "\xfc\x80\x80\x80\x80", 2426 2427 /* complete sequences but non-shortest forms or out of range etc. */ 2428 "\xc0\x80", 2429 "\xe0\x80\x80", 2430 "\xed\xa0\x80", 2431 "\xf0\x80\x80\x80", 2432 "\xf4\x90\x80\x80", 2433 "\xf8\x80\x80\x80\x80", 2434 "\xfc\x80\x80\x80\x80\x80", 2435 "\xfe", 2436 "\xff" 2437}; 2438 2439#define ARG_CHAR_ARR_SIZE 8 2440 2441/* get some character that can be converted and convert it */ 2442static UBool getTestChar(UConverter *cnv, const char *converterName, 2443 char charUTF8[4], int32_t *pCharUTF8Length, 2444 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, 2445 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { 2446 UChar utf16[U16_MAX_LENGTH]; 2447 int32_t utf16Length; 2448 2449 const UChar *utf16Source; 2450 char *target; 2451 2452 USet *set; 2453 UChar32 c; 2454 UErrorCode errorCode; 2455 2456 errorCode=U_ZERO_ERROR; 2457 set=uset_open(1, 0); 2458 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2459 c=uset_charAt(set, uset_size(set)/2); 2460 uset_close(set); 2461 2462 utf16Length=0; 2463 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2464 *pCharUTF8Length=0; 2465 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2466 2467 utf16Source=utf16; 2468 target=char0; 2469 ucnv_fromUnicode(cnv, 2470 &target, char0+ARG_CHAR_ARR_SIZE, 2471 &utf16Source, utf16+utf16Length, 2472 NULL, FALSE, &errorCode); 2473 *pChar0Length=(int32_t)(target-char0); 2474 2475 utf16Source=utf16; 2476 target=char1; 2477 ucnv_fromUnicode(cnv, 2478 &target, char1+ARG_CHAR_ARR_SIZE, 2479 &utf16Source, utf16+utf16Length, 2480 NULL, FALSE, &errorCode); 2481 *pChar1Length=(int32_t)(target-char1); 2482 2483 if(U_FAILURE(errorCode)) { 2484 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2485 return FALSE; 2486 } 2487 return TRUE; 2488} 2489 2490static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2491 char charUTF8[4], int32_t charUTF8Length, 2492 char char0[8], int32_t char0Length, 2493 char char1[8], int32_t char1Length) { 2494 char utf8[16]; 2495 int32_t utf8Length; 2496 2497 char output[16]; 2498 int32_t outputLength; 2499 2500 char invalidChars[8]; 2501 int8_t invalidLength; 2502 2503 const char *source; 2504 char *target; 2505 2506 UChar pivotBuffer[8]; 2507 UChar *pivotSource, *pivotTarget; 2508 2509 UErrorCode errorCode; 2510 int32_t i; 2511 2512 /* test truncated sequences */ 2513 errorCode=U_ZERO_ERROR; 2514 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2515 2516 memcpy(utf8, charUTF8, charUTF8Length); 2517 2518 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2519 /* truncated sequence? */ 2520 int32_t length=strlen(badUTF8[i]); 2521 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2522 continue; 2523 } 2524 2525 /* assemble a string with the test character and the truncated sequence */ 2526 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2527 utf8Length=charUTF8Length+length; 2528 2529 /* convert and check the invalidChars */ 2530 source=utf8; 2531 target=output; 2532 pivotSource=pivotTarget=pivotBuffer; 2533 errorCode=U_ZERO_ERROR; 2534 ucnv_convertEx(cnv, utf8Cnv, 2535 &target, output+sizeof(output), 2536 &source, utf8+utf8Length, 2537 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2538 TRUE, TRUE, /* reset & flush */ 2539 &errorCode); 2540 outputLength=(int32_t)(target-output); 2541 (void)outputLength; /* Suppress set but not used warning. */ 2542 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2543 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2544 continue; 2545 } 2546 2547 errorCode=U_ZERO_ERROR; 2548 invalidLength=(int8_t)sizeof(invalidChars); 2549 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2550 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2551 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2552 } 2553 } 2554} 2555 2556static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2557 char charUTF8[4], int32_t charUTF8Length, 2558 char char0[8], int32_t char0Length, 2559 char char1[8], int32_t char1Length) { 2560 char utf8[600], expect[600]; 2561 int32_t utf8Length, expectLength; 2562 2563 char testName[32]; 2564 2565 UErrorCode errorCode; 2566 int32_t i; 2567 2568 errorCode=U_ZERO_ERROR; 2569 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2570 2571 /* 2572 * assemble an input string with the test character between each 2573 * bad sequence, 2574 * and an expected string with repeated test character output 2575 */ 2576 memcpy(utf8, charUTF8, charUTF8Length); 2577 utf8Length=charUTF8Length; 2578 2579 memcpy(expect, char0, char0Length); 2580 expectLength=char0Length; 2581 2582 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2583 int32_t length=strlen(badUTF8[i]); 2584 memcpy(utf8+utf8Length, badUTF8[i], length); 2585 utf8Length+=length; 2586 2587 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2588 utf8Length+=charUTF8Length; 2589 2590 memcpy(expect+expectLength, char1, char1Length); 2591 expectLength+=char1Length; 2592 } 2593 2594 /* expect that each bad UTF-8 sequence is detected and skipped */ 2595 strcpy(testName, "from bad UTF-8 to "); 2596 strcat(testName, converterName); 2597 2598 convertExMultiStreaming(utf8Cnv, cnv, 2599 utf8, utf8Length, 2600 expect, expectLength, 2601 testName, 2602 U_ZERO_ERROR); 2603} 2604 2605/* Test illegal UTF-8 input. */ 2606static void TestConvertExFromUTF8() { 2607 static const char *const converterNames[]={ 2608#if !UCONFIG_NO_LEGACY_CONVERSION 2609 "windows-1252", 2610 "shift-jis", 2611#endif 2612 "us-ascii", 2613 "iso-8859-1", 2614 "utf-8" 2615 }; 2616 2617 UConverter *utf8Cnv, *cnv; 2618 UErrorCode errorCode; 2619 int32_t i; 2620 2621 /* fromUnicode versions of some character, from initial state and later */ 2622 char charUTF8[4], char0[8], char1[8]; 2623 int32_t charUTF8Length, char0Length, char1Length; 2624 2625 errorCode=U_ZERO_ERROR; 2626 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2627 if(U_FAILURE(errorCode)) { 2628 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2629 return; 2630 } 2631 2632 for(i=0; i<LENGTHOF(converterNames); ++i) { 2633 errorCode=U_ZERO_ERROR; 2634 cnv=ucnv_open(converterNames[i], &errorCode); 2635 if(U_FAILURE(errorCode)) { 2636 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2637 continue; 2638 } 2639 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2640 continue; 2641 } 2642 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2643 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2644 ucnv_close(cnv); 2645 } 2646 ucnv_close(utf8Cnv); 2647} 2648 2649static void TestConvertExFromUTF8_C5F0() { 2650 static const char *const converterNames[]={ 2651#if !UCONFIG_NO_LEGACY_CONVERSION 2652 "windows-1251", 2653 "shift-jis", 2654#endif 2655 "us-ascii", 2656 "iso-8859-1", 2657 "utf-8" 2658 }; 2659 2660 UConverter *utf8Cnv, *cnv; 2661 UErrorCode errorCode; 2662 int32_t i; 2663 2664 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; 2665 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2666 static const char twoNCRs[16]={ 2667 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2668 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2669 }; 2670 static const char twoFFFD[6]={ 2671 (char)0xef, (char)0xbf, (char)0xbd, 2672 (char)0xef, (char)0xbf, (char)0xbd 2673 }; 2674 const char *expected; 2675 int32_t expectedLength; 2676 char dest[20]; /* longer than longest expectedLength */ 2677 2678 const char *src; 2679 char *target; 2680 2681 UChar pivotBuffer[128]; 2682 UChar *pivotSource, *pivotTarget; 2683 2684 errorCode=U_ZERO_ERROR; 2685 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2686 if(U_FAILURE(errorCode)) { 2687 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2688 return; 2689 } 2690 2691 for(i=0; i<LENGTHOF(converterNames); ++i) { 2692 errorCode=U_ZERO_ERROR; 2693 cnv=ucnv_open(converterNames[i], &errorCode); 2694 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2695 NULL, NULL, &errorCode); 2696 if(U_FAILURE(errorCode)) { 2697 log_data_err("unable to open %s converter - %s\n", 2698 converterNames[i], u_errorName(errorCode)); 2699 continue; 2700 } 2701 src=bad_utf8; 2702 target=dest; 2703 uprv_memset(dest, 9, sizeof(dest)); 2704 if(i==LENGTHOF(converterNames)-1) { 2705 /* conversion to UTF-8 yields two U+FFFD directly */ 2706 expected=twoFFFD; 2707 expectedLength=6; 2708 } else { 2709 /* conversion to a non-Unicode charset yields two NCRs */ 2710 expected=twoNCRs; 2711 expectedLength=16; 2712 } 2713 pivotBuffer[0]=0; 2714 pivotBuffer[1]=1; 2715 pivotBuffer[2]=2; 2716 pivotSource=pivotTarget=pivotBuffer; 2717 ucnv_convertEx( 2718 cnv, utf8Cnv, 2719 &target, dest+expectedLength, 2720 &src, bad_utf8+sizeof(bad_utf8), 2721 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2722 TRUE, TRUE, &errorCode); 2723 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2724 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2725 dest[expectedLength]!=9 2726 ) { 2727 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2728 } 2729 ucnv_close(cnv); 2730 } 2731 ucnv_close(utf8Cnv); 2732} 2733 2734static void 2735TestConvertAlgorithmic() { 2736#if !UCONFIG_NO_LEGACY_CONVERSION 2737 static const uint8_t 2738 utf8[]={ 2739 /* 4e00 30a1 ff61 0410 */ 2740 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2741 }, 2742 shiftJIS[]={ 2743 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2744 }, 2745 /*errorTarget[]={*/ 2746 /* 2747 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2748 * SUB, SUB, 0x40, SUB, SUB, 0x40 2749 */ 2750 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2751 /*},*/ 2752 utf16[]={ 2753 0xfe, 0xff /* BOM only, no text */ 2754 }, 2755 utf32[]={ 2756 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2757 }; 2758 2759 char target[100], utf8NUL[100], shiftJISNUL[100]; 2760 2761 UConverter *cnv; 2762 UErrorCode errorCode; 2763 2764 int32_t length; 2765 2766 errorCode=U_ZERO_ERROR; 2767 cnv=ucnv_open("Shift-JIS", &errorCode); 2768 if(U_FAILURE(errorCode)) { 2769 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2770 ucnv_close(cnv); 2771 return; 2772 } 2773 2774 memcpy(utf8NUL, utf8, sizeof(utf8)); 2775 utf8NUL[sizeof(utf8)]=0; 2776 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2777 shiftJISNUL[sizeof(shiftJIS)]=0; 2778 2779 /* 2780 * The to/from algorithmic convenience functions share a common implementation, 2781 * so we need not test all permutations of them. 2782 */ 2783 2784 /* length in, not terminated out */ 2785 errorCode=U_ZERO_ERROR; 2786 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2787 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2788 length!=sizeof(shiftJIS) || 2789 memcmp(target, shiftJIS, length)!=0 2790 ) { 2791 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2792 u_errorName(errorCode), length, sizeof(shiftJIS)); 2793 } 2794 2795 /* terminated in and out */ 2796 memset(target, 0x55, sizeof(target)); 2797 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2798 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2799 if( errorCode!=U_ZERO_ERROR || 2800 length!=sizeof(utf8) || 2801 memcmp(target, utf8, length)!=0 2802 ) { 2803 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2804 u_errorName(errorCode), length, sizeof(shiftJIS)); 2805 } 2806 2807 /* empty string, some target buffer */ 2808 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2809 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2810 if( errorCode!=U_ZERO_ERROR || 2811 length!=0 2812 ) { 2813 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2814 u_errorName(errorCode), length); 2815 } 2816 2817 /* pseudo-empty string, no target buffer */ 2818 errorCode=U_ZERO_ERROR; 2819 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2820 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2821 length!=0 2822 ) { 2823 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2824 u_errorName(errorCode), length); 2825 } 2826 2827 errorCode=U_ZERO_ERROR; 2828 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2829 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2830 length!=0 2831 ) { 2832 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2833 u_errorName(errorCode), length); 2834 } 2835 2836 /* bad arguments */ 2837 errorCode=U_MESSAGE_PARSE_ERROR; 2838 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2839 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2840 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2841 } 2842 2843 /* source==NULL */ 2844 errorCode=U_ZERO_ERROR; 2845 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2846 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2847 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2848 } 2849 2850 /* illegal alg. type */ 2851 errorCode=U_ZERO_ERROR; 2852 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2853 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2854 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2855 } 2856ucnv_close(cnv); 2857#endif 2858} 2859 2860#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 2861static void TestLMBCSMaxChar(void) { 2862 static const struct { 2863 int8_t maxSize; 2864 const char *name; 2865 } converter[] = { 2866 /* some non-LMBCS converters - perfect test setup here */ 2867 { 1, "US-ASCII"}, 2868 { 1, "ISO-8859-1"}, 2869 2870 { 2, "UTF-16"}, 2871 { 2, "UTF-16BE"}, 2872 { 3, "UTF-8"}, 2873 { 3, "CESU-8"}, 2874 { 3, "SCSU"}, 2875 { 4, "UTF-32"}, 2876 { 4, "UTF-7"}, 2877 { 4, "IMAP-mailbox-name"}, 2878 { 4, "BOCU-1"}, 2879 2880 { 1, "windows-1256"}, 2881 { 2, "Shift-JIS"}, 2882 { 2, "ibm-16684"}, 2883 { 3, "ibm-930"}, 2884 { 3, "ibm-1390"}, 2885 { 4, "*test3"}, 2886 { 16,"*test4"}, 2887 2888 { 4, "ISCII"}, 2889 { 4, "HZ"}, 2890 2891 { 3, "ISO-2022"}, 2892 { 3, "ISO-2022-KR"}, 2893 { 6, "ISO-2022-JP"}, 2894 { 8, "ISO-2022-CN"}, 2895 2896 /* LMBCS */ 2897 { 3, "LMBCS-1"}, 2898 { 3, "LMBCS-2"}, 2899 { 3, "LMBCS-3"}, 2900 { 3, "LMBCS-4"}, 2901 { 3, "LMBCS-5"}, 2902 { 3, "LMBCS-6"}, 2903 { 3, "LMBCS-8"}, 2904 { 3, "LMBCS-11"}, 2905 { 3, "LMBCS-16"}, 2906 { 3, "LMBCS-17"}, 2907 { 3, "LMBCS-18"}, 2908 { 3, "LMBCS-19"} 2909 }; 2910 int32_t idx; 2911 2912 for (idx = 0; idx < LENGTHOF(converter); idx++) { 2913 UErrorCode status = U_ZERO_ERROR; 2914 UConverter *cnv = cnv_open(converter[idx].name, &status); 2915 if (U_FAILURE(status)) { 2916 continue; 2917 } 2918 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2919 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2920 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2921 } 2922 ucnv_close(cnv); 2923 } 2924 2925 /* mostly test that the macro compiles */ 2926 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2927 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2928 } 2929} 2930#endif 2931 2932static void TestJ1968(void) { 2933 UErrorCode err = U_ZERO_ERROR; 2934 UConverter *cnv; 2935 char myConvName[] = "My really really really really really really really really really really really" 2936 " really really really really really really really really really really really" 2937 " really really really really really really really really long converter name"; 2938 UChar myConvNameU[sizeof(myConvName)]; 2939 2940 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2941 2942 err = U_ZERO_ERROR; 2943 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2944 cnv = ucnv_openU(myConvNameU, &err); 2945 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2946 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2947 } 2948 2949 err = U_ZERO_ERROR; 2950 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2951 cnv = ucnv_openU(myConvNameU, &err); 2952 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2953 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2954 } 2955 2956 err = U_ZERO_ERROR; 2957 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2958 cnv = ucnv_openU(myConvNameU, &err); 2959 if (cnv || err != U_FILE_ACCESS_ERROR) { 2960 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2961 } 2962 2963 2964 2965 2966 err = U_ZERO_ERROR; 2967 cnv = ucnv_open(myConvName, &err); 2968 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2969 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2970 } 2971 2972 err = U_ZERO_ERROR; 2973 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 2974 cnv = ucnv_open(myConvName, &err); 2975 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2976 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2977 } 2978 2979 err = U_ZERO_ERROR; 2980 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2981 cnv = ucnv_open(myConvName, &err); 2982 if (cnv || err != U_FILE_ACCESS_ERROR) { 2983 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2984 } 2985 2986 err = U_ZERO_ERROR; 2987 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2988 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 2989 cnv = ucnv_open(myConvName, &err); 2990 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2991 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2992 } 2993 2994 /* The comma isn't really a part of the converter name. */ 2995 err = U_ZERO_ERROR; 2996 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2997 cnv = ucnv_open(myConvName, &err); 2998 if (cnv || err != U_FILE_ACCESS_ERROR) { 2999 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3000 } 3001 3002 err = U_ZERO_ERROR; 3003 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3004 cnv = ucnv_open(myConvName, &err); 3005 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3006 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3007 } 3008 3009 err = U_ZERO_ERROR; 3010 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3011 cnv = ucnv_open(myConvName, &err); 3012 if (cnv || err != U_FILE_ACCESS_ERROR) { 3013 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3014 } 3015 3016} 3017 3018#if !UCONFIG_NO_LEGACY_CONVERSION 3019static void 3020testSwap(const char *name, UBool swap) { 3021 /* 3022 * Test Unicode text. 3023 * Contains characters that are the highest for some of the 3024 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3025 * tables copies the entire tables. 3026 */ 3027 static const UChar text[]={ 3028 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3029 }; 3030 3031 UChar uNormal[32], uSwapped[32]; 3032 char normal[32], swapped[32]; 3033 const UChar *pcu; 3034 UChar *pu; 3035 char *pc; 3036 int32_t i, normalLength, swappedLength; 3037 UChar u; 3038 char c; 3039 3040 const char *swappedName; 3041 UConverter *cnv, *swapCnv; 3042 UErrorCode errorCode; 3043 3044 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3045 3046 /* open both the normal and the LF/NL-swapping converters */ 3047 strcpy(swapped, name); 3048 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3049 3050 errorCode=U_ZERO_ERROR; 3051 swapCnv=ucnv_open(swapped, &errorCode); 3052 cnv=ucnv_open(name, &errorCode); 3053 if(U_FAILURE(errorCode)) { 3054 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3055 goto cleanup; 3056 } 3057 3058 /* the name must contain the swap option if and only if we expect the converter to swap */ 3059 swappedName=ucnv_getName(swapCnv, &errorCode); 3060 if(U_FAILURE(errorCode)) { 3061 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3062 goto cleanup; 3063 } 3064 3065 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3066 if(swap != (pc!=NULL)) { 3067 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3068 goto cleanup; 3069 } 3070 3071 /* convert to EBCDIC */ 3072 pcu=text; 3073 pc=normal; 3074 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3075 normalLength=(int32_t)(pc-normal); 3076 3077 pcu=text; 3078 pc=swapped; 3079 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3080 swappedLength=(int32_t)(pc-swapped); 3081 3082 if(U_FAILURE(errorCode)) { 3083 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3084 goto cleanup; 3085 } 3086 3087 /* compare EBCDIC output */ 3088 if(normalLength!=swappedLength) { 3089 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3090 goto cleanup; 3091 } 3092 for(i=0; i<normalLength; ++i) { 3093 /* swap EBCDIC LF/NL for comparison */ 3094 c=normal[i]; 3095 if(swap) { 3096 if(c==0x15) { 3097 c=0x25; 3098 } else if(c==0x25) { 3099 c=0x15; 3100 } 3101 } 3102 3103 if(c!=swapped[i]) { 3104 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3105 goto cleanup; 3106 } 3107 } 3108 3109 /* convert back to Unicode (may not roundtrip) */ 3110 pc=normal; 3111 pu=uNormal; 3112 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3113 normalLength=(int32_t)(pu-uNormal); 3114 3115 pc=normal; 3116 pu=uSwapped; 3117 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3118 swappedLength=(int32_t)(pu-uSwapped); 3119 3120 if(U_FAILURE(errorCode)) { 3121 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3122 goto cleanup; 3123 } 3124 3125 /* compare EBCDIC output */ 3126 if(normalLength!=swappedLength) { 3127 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3128 goto cleanup; 3129 } 3130 for(i=0; i<normalLength; ++i) { 3131 /* swap EBCDIC LF/NL for comparison */ 3132 u=uNormal[i]; 3133 if(swap) { 3134 if(u==0xa) { 3135 u=0x85; 3136 } else if(u==0x85) { 3137 u=0xa; 3138 } 3139 } 3140 3141 if(u!=uSwapped[i]) { 3142 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3143 goto cleanup; 3144 } 3145 } 3146 3147 /* clean up */ 3148cleanup: 3149 ucnv_close(cnv); 3150 ucnv_close(swapCnv); 3151} 3152 3153static void 3154TestEBCDICSwapLFNL() { 3155 static const struct { 3156 const char *name; 3157 UBool swap; 3158 } tests[]={ 3159 { "ibm-37", TRUE }, 3160 { "ibm-1047", TRUE }, 3161 { "ibm-1140", TRUE }, 3162 { "ibm-930", TRUE }, 3163 { "iso-8859-3", FALSE } 3164 }; 3165 3166 int i; 3167 3168 for(i=0; i<LENGTHOF(tests); ++i) { 3169 testSwap(tests[i].name, tests[i].swap); 3170 } 3171} 3172#else 3173static void 3174TestEBCDICSwapLFNL() { 3175 /* test nothing... */ 3176} 3177#endif 3178 3179static const UVersionInfo ICU_34 = {3,4,0,0}; 3180 3181static void TestFromUCountPending(){ 3182#if !UCONFIG_NO_LEGACY_CONVERSION 3183 UErrorCode status = U_ZERO_ERROR; 3184/* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3185 static const struct { 3186 UChar input[6]; 3187 int32_t len; 3188 int32_t exp; 3189 }fromUnicodeTests[] = { 3190 /*m:n conversion*/ 3191 {{0xdbc4},1,1}, 3192 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3193 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3194 }; 3195 int i; 3196 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3197 if(U_FAILURE(status)){ 3198 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3199 return; 3200 } 3201 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) { 3202 char tgt[10]; 3203 char* target = tgt; 3204 char* targetLimit = target + 10; 3205 const UChar* source = fromUnicodeTests[i].input; 3206 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3207 int32_t len = 0; 3208 ucnv_reset(cnv); 3209 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3210 len = ucnv_fromUCountPending(cnv, &status); 3211 if(U_FAILURE(status)){ 3212 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3213 status = U_ZERO_ERROR; 3214 continue; 3215 } 3216 if(len != fromUnicodeTests[i].exp){ 3217 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3218 } 3219 } 3220 status = U_ZERO_ERROR; 3221 { 3222 /* 3223 * The converter has to read the tail before it knows that 3224 * only head alone matches. 3225 * At the end, the output for head will overflow the target, 3226 * middle will be pending, and tail will not have been consumed. 3227 */ 3228 /* 3229 \U00101234 -> x (<U101234> \x07 |0) 3230 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3231 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3232 \U00060007 -> unassigned 3233 */ 3234 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3235 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3236 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3237 char tgt[10]; 3238 char* target = tgt; 3239 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3240 const UChar* source = head; 3241 const UChar* sourceLimit = source + u_strlen(head); 3242 int32_t len = 0; 3243 ucnv_reset(cnv); 3244 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3245 len = ucnv_fromUCountPending(cnv, &status); 3246 if(U_FAILURE(status)){ 3247 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3248 status = U_ZERO_ERROR; 3249 } 3250 if(len!=4){ 3251 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3252 } 3253 source = middle; 3254 sourceLimit = source + u_strlen(middle); 3255 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3256 len = ucnv_fromUCountPending(cnv, &status); 3257 if(U_FAILURE(status)){ 3258 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3259 status = U_ZERO_ERROR; 3260 } 3261 if(len!=5){ 3262 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3263 } 3264 source = tail; 3265 sourceLimit = source + u_strlen(tail); 3266 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3267 if(status != U_BUFFER_OVERFLOW_ERROR){ 3268 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3269 } 3270 status = U_ZERO_ERROR; 3271 len = ucnv_fromUCountPending(cnv, &status); 3272 /* middle[1] is pending, tail has not been consumed */ 3273 if(U_FAILURE(status)){ 3274 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3275 } 3276 if(len!=1){ 3277 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3278 } 3279 } 3280 ucnv_close(cnv); 3281#endif 3282} 3283 3284static void 3285TestToUCountPending(){ 3286#if !UCONFIG_NO_LEGACY_CONVERSION 3287 UErrorCode status = U_ZERO_ERROR; 3288 static const struct { 3289 char input[6]; 3290 int32_t len; 3291 int32_t exp; 3292 }toUnicodeTests[] = { 3293 /*m:n conversion*/ 3294 {{0x05, 0x01, 0x02},3,3}, 3295 {{0x01, 0x02},2,2}, 3296 {{0x07, 0x00, 0x01, 0x02},4,4}, 3297 }; 3298 3299 int i; 3300 UConverterToUCallback *oldToUAction= NULL; 3301 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3302 if(U_FAILURE(status)){ 3303 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3304 return; 3305 } 3306 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3307 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) { 3308 UChar tgt[20]; 3309 UChar* target = tgt; 3310 UChar* targetLimit = target + 20; 3311 const char* source = toUnicodeTests[i].input; 3312 const char* sourceLimit = source + toUnicodeTests[i].len; 3313 int32_t len = 0; 3314 ucnv_reset(cnv); 3315 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3316 len = ucnv_toUCountPending(cnv,&status); 3317 if(U_FAILURE(status)){ 3318 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3319 status = U_ZERO_ERROR; 3320 continue; 3321 } 3322 if(len != toUnicodeTests[i].exp){ 3323 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3324 } 3325 } 3326 status = U_ZERO_ERROR; 3327 ucnv_close(cnv); 3328 3329 { 3330 /* 3331 * The converter has to read the tail before it knows that 3332 * only head alone matches. 3333 * At the end, the output for head will overflow the target, 3334 * mid will be pending, and tail will not have been consumed. 3335 */ 3336 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3337 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3338 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3339 /* 3340 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3341 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3342 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3343 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3344 */ 3345 UChar tgt[10]; 3346 UChar* target = tgt; 3347 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3348 const char* source = head; 3349 const char* sourceLimit = source + strlen(head); 3350 int32_t len = 0; 3351 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3352 if(U_FAILURE(status)){ 3353 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3354 return; 3355 } 3356 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3357 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3358 len = ucnv_toUCountPending(cnv,&status); 3359 if(U_FAILURE(status)){ 3360 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3361 } 3362 if(len != 4){ 3363 log_err("Did not get the expected len for head.\n"); 3364 } 3365 source=mid; 3366 sourceLimit = source+strlen(mid); 3367 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3368 len = ucnv_toUCountPending(cnv,&status); 3369 if(U_FAILURE(status)){ 3370 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3371 } 3372 if(len != 8){ 3373 log_err("Did not get the expected len for mid.\n"); 3374 } 3375 3376 source=tail; 3377 sourceLimit = source+strlen(tail); 3378 targetLimit = target; 3379 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3380 if(status != U_BUFFER_OVERFLOW_ERROR){ 3381 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3382 } 3383 status = U_ZERO_ERROR; 3384 len = ucnv_toUCountPending(cnv,&status); 3385 /* mid[4] is pending, tail has not been consumed */ 3386 if(U_FAILURE(status)){ 3387 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3388 } 3389 if(len != 4){ 3390 log_err("Did not get the expected len for tail.\n"); 3391 } 3392 ucnv_close(cnv); 3393 } 3394#endif 3395} 3396 3397static void TestOneDefaultNameChange(const char *name, const char *expected) { 3398 UErrorCode status = U_ZERO_ERROR; 3399 UConverter *cnv; 3400 ucnv_setDefaultName(name); 3401 if(strcmp(ucnv_getDefaultName(), expected)==0) 3402 log_verbose("setDefaultName of %s works.\n", name); 3403 else 3404 log_err("setDefaultName of %s failed\n", name); 3405 cnv=ucnv_open(NULL, &status); 3406 if (U_FAILURE(status) || cnv == NULL) { 3407 log_err("opening the default converter of %s failed\n", name); 3408 return; 3409 } 3410 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3411 log_verbose("ucnv_getName of %s works.\n", name); 3412 else 3413 log_err("ucnv_getName of %s failed\n", name); 3414 ucnv_close(cnv); 3415} 3416 3417static void TestDefaultName(void) { 3418 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3419 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3420 strcpy(defaultName, ucnv_getDefaultName()); 3421 3422 log_verbose("getDefaultName returned %s\n", defaultName); 3423 3424 /*change the default name by setting it */ 3425 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3426#if U_CHARSET_IS_UTF8 3427 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3428 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3429 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3430#else 3431# if !UCONFIG_NO_LEGACY_CONVERSION 3432 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3433 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3434# endif 3435 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3436#endif 3437 3438 /*set the default name back*/ 3439 ucnv_setDefaultName(defaultName); 3440} 3441 3442/* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3443 3444static int 3445sign(int n) { 3446 if(n==0) { 3447 return 0; 3448 } else if(n<0) { 3449 return -1; 3450 } else /* n>0 */ { 3451 return 1; 3452 } 3453} 3454 3455static void 3456compareNames(const char **names) { 3457 const char *relation, *name1, *name2; 3458 int rel, result; 3459 3460 relation=*names++; 3461 if(*relation=='=') { 3462 rel = 0; 3463 } else if(*relation=='<') { 3464 rel = -1; 3465 } else { 3466 rel = 1; 3467 } 3468 3469 name1=*names++; 3470 if(name1==NULL) { 3471 return; 3472 } 3473 while((name2=*names++)!=NULL) { 3474 result=ucnv_compareNames(name1, name2); 3475 if(sign(result)!=rel) { 3476 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3477 } 3478 name1=name2; 3479 } 3480} 3481 3482static void 3483TestCompareNames() { 3484 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3485 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3486 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3487 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3488 3489 compareNames(equalUTF8); 3490 compareNames(equalIBM); 3491 compareNames(lessMac); 3492 compareNames(lessUTF080); 3493} 3494 3495static void 3496TestSubstString() { 3497 static const UChar surrogate[1]={ 0xd900 }; 3498 char buffer[16]; 3499 3500 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3501 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3502 UConverter *cnv; 3503 UErrorCode errorCode; 3504 int32_t length; 3505 int8_t len8; 3506 3507 /* UTF-16/32: test that the BOM is output before the sub character */ 3508 errorCode=U_ZERO_ERROR; 3509 cnv=ucnv_open("UTF-16", &errorCode); 3510 if(U_FAILURE(errorCode)) { 3511 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3512 return; 3513 } 3514 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3515 ucnv_close(cnv); 3516 if(U_FAILURE(errorCode) || 3517 length!=4 || 3518 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3519 ) { 3520 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3521 } 3522 3523 errorCode=U_ZERO_ERROR; 3524 cnv=ucnv_open("UTF-32", &errorCode); 3525 if(U_FAILURE(errorCode)) { 3526 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3527 return; 3528 } 3529 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3530 ucnv_close(cnv); 3531 if(U_FAILURE(errorCode) || 3532 length!=8 || 3533 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3534 ) { 3535 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3536 } 3537 3538 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3539 errorCode=U_ZERO_ERROR; 3540 cnv=ucnv_open("ISO-8859-1", &errorCode); 3541 if(U_FAILURE(errorCode)) { 3542 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3543 return; 3544 } 3545 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3546 if(U_FAILURE(errorCode)) { 3547 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3548 } else { 3549 len8 = sizeof(buffer); 3550 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3551 /* Stateless converter, we expect the string converted to charset bytes. */ 3552 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3553 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3554 } 3555 } 3556 ucnv_close(cnv); 3557 3558#if !UCONFIG_NO_LEGACY_CONVERSION 3559 errorCode=U_ZERO_ERROR; 3560 cnv=ucnv_open("HZ", &errorCode); 3561 if(U_FAILURE(errorCode)) { 3562 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3563 return; 3564 } 3565 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3566 if(U_FAILURE(errorCode)) { 3567 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3568 } else { 3569 len8 = sizeof(buffer); 3570 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3571 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3572 if(U_FAILURE(errorCode) || len8!=0) { 3573 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3574 } 3575 } 3576 ucnv_close(cnv); 3577#endif 3578 /* 3579 * Further testing of ucnv_setSubstString() is done via intltest convert. 3580 * We do not test edge cases of illegal arguments and similar because the 3581 * function implementation uses all of its parameters in calls to other 3582 * functions with UErrorCode parameters. 3583 */ 3584} 3585 3586static void 3587InvalidArguments() { 3588 UConverter *cnv; 3589 UErrorCode errorCode; 3590 char charBuffer[2] = {1, 1}; 3591 char ucharAsCharBuffer[2] = {2, 2}; 3592 char *charsPtr = charBuffer; 3593 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3594 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3595 3596 errorCode=U_ZERO_ERROR; 3597 cnv=ucnv_open("UTF-8", &errorCode); 3598 if(U_FAILURE(errorCode)) { 3599 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3600 return; 3601 } 3602 3603 errorCode=U_ZERO_ERROR; 3604 /* This one should fail because an incomplete UChar is being passed in */ 3605 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3606 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3607 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3608 } 3609 3610 errorCode=U_ZERO_ERROR; 3611 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3612 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3613 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3614 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3615 } 3616 3617 errorCode=U_ZERO_ERROR; 3618 /* This one should fail because an incomplete UChar is being passed in */ 3619 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3620 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3621 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3622 } 3623 3624 errorCode=U_ZERO_ERROR; 3625 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3626 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3627 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3628 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3629 } 3630 3631 if (charBuffer[0] != 1 || charBuffer[1] != 1 3632 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3633 { 3634 log_err("Data was incorrectly written to buffers\n"); 3635 } 3636 3637 ucnv_close(cnv); 3638} 3639 3640static void TestGetName() { 3641 static const char *const names[] = { 3642 "Unicode", "UTF-16", 3643 "UnicodeBigUnmarked", "UTF-16BE", 3644 "UnicodeBig", "UTF-16BE,version=1", 3645 "UnicodeLittleUnmarked", "UTF-16LE", 3646 "UnicodeLittle", "UTF-16LE,version=1", 3647 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3648 }; 3649 int32_t i; 3650 for(i = 0; i < LENGTHOF(names); i += 2) { 3651 UErrorCode errorCode = U_ZERO_ERROR; 3652 UConverter *cnv = ucnv_open(names[i], &errorCode); 3653 if(U_SUCCESS(errorCode)) { 3654 const char *name = ucnv_getName(cnv, &errorCode); 3655 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3656 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3657 names[i], name, names[i+1], u_errorName(errorCode)); 3658 } 3659 ucnv_close(cnv); 3660 } 3661 } 3662} 3663 3664static void TestUTFBOM() { 3665 static const UChar a16[] = { 0x61 }; 3666 static const char *const names[] = { 3667 "UTF-16", 3668 "UTF-16,version=1", 3669 "UTF-16BE", 3670 "UnicodeBig", 3671 "UTF-16LE", 3672 "UnicodeLittle" 3673 }; 3674 static const uint8_t expected[][5] = { 3675#if U_IS_BIG_ENDIAN 3676 { 4, 0xfe, 0xff, 0, 0x61 }, 3677 { 4, 0xfe, 0xff, 0, 0x61 }, 3678#else 3679 { 4, 0xff, 0xfe, 0x61, 0 }, 3680 { 4, 0xff, 0xfe, 0x61, 0 }, 3681#endif 3682 3683 { 2, 0, 0x61 }, 3684 { 4, 0xfe, 0xff, 0, 0x61 }, 3685 3686 { 2, 0x61, 0 }, 3687 { 4, 0xff, 0xfe, 0x61, 0 } 3688 }; 3689 3690 char bytes[10]; 3691 int32_t i; 3692 3693 for(i = 0; i < LENGTHOF(names); ++i) { 3694 UErrorCode errorCode = U_ZERO_ERROR; 3695 UConverter *cnv = ucnv_open(names[i], &errorCode); 3696 int32_t length = 0; 3697 const uint8_t *exp = expected[i]; 3698 if (U_FAILURE(errorCode)) { 3699 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3700 continue; 3701 } 3702 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3703 3704 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3705 log_err("unexpected %s BOM writing behavior -- %s\n", 3706 names[i], u_errorName(errorCode)); 3707 } 3708 ucnv_close(cnv); 3709 } 3710} 3711