1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/***************************************************************************** 7* 8* File ccapitst.c 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda Ported for C API 13****************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <ctype.h> 19#include "unicode/uloc.h" 20#include "unicode/ucnv.h" 21#include "unicode/ucnv_err.h" 22#include "unicode/putil.h" 23#include "unicode/uset.h" 24#include "unicode/ustring.h" 25#include "ucnv_bld.h" /* for sizeof(UConverter) */ 26#include "cmemory.h" /* for UAlignedMemory */ 27#include "cintltst.h" 28#include "ccapitst.h" 29#include "cstring.h" 30 31#define NUM_CODEPAGE 1 32#define MAX_FILE_LEN 1024*20 33#define UCS_FILE_NAME_SIZE 512 34 35/*returns an action other than the one provided*/ 36#if !UCONFIG_NO_LEGACY_CONVERSION 37static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 38static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 39#endif 40 41static UConverter * 42cnv_open(const char *name, UErrorCode *pErrorCode) { 43 if(name!=NULL && name[0]=='*') { 44 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 45 } else { 46 return ucnv_open(name, pErrorCode); 47 } 48} 49 50 51static void ListNames(void); 52static void TestFlushCache(void); 53static void TestDuplicateAlias(void); 54static void TestCCSID(void); 55static void TestJ932(void); 56static void TestJ1968(void); 57#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 58static void TestLMBCSMaxChar(void); 59#endif 60 61#if !UCONFIG_NO_LEGACY_CONVERSION 62static void TestConvertSafeCloneCallback(void); 63#endif 64 65static void TestEBCDICSwapLFNL(void); 66static void TestConvertEx(void); 67static void TestConvertExFromUTF8(void); 68static void TestConvertExFromUTF8_C5F0(void); 69static void TestConvertAlgorithmic(void); 70 void TestDefaultConverterError(void); /* defined in cctest.c */ 71 void TestDefaultConverterSet(void); /* defined in cctest.c */ 72static void TestToUCountPending(void); 73static void TestFromUCountPending(void); 74static void TestDefaultName(void); 75static void TestCompareNames(void); 76static void TestSubstString(void); 77static void InvalidArguments(void); 78static void TestGetName(void); 79static void TestUTFBOM(void); 80 81void addTestConvert(TestNode** root); 82 83void addTestConvert(TestNode** root) 84{ 85 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 86 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 87 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 88 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 89 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 90 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 91#if !UCONFIG_NO_LEGACY_CONVERSION 92 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 93#endif 94 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 95 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 96 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 97#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 98 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 99#endif 100 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 101 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 102 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 103 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 104 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 105 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 106 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); 107#if !UCONFIG_NO_FILE_IO 108 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 109 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 110#endif 111 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 112 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 113 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 114 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 115 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 116 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 117} 118 119static void ListNames(void) { 120 UErrorCode err = U_ZERO_ERROR; 121 int32_t testLong1 = 0; 122 const char* available_conv; 123 UEnumeration *allNamesEnum = NULL; 124 int32_t allNamesCount = 0; 125 uint16_t count; 126 127 log_verbose("Testing ucnv_openAllNames()..."); 128 allNamesEnum = ucnv_openAllNames(&err); 129 if(U_FAILURE(err)) { 130 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 131 } 132 else { 133 const char *string = NULL; 134 int32_t len = 0; 135 int32_t count1 = 0; 136 int32_t count2 = 0; 137 allNamesCount = uenum_count(allNamesEnum, &err); 138 while ((string = uenum_next(allNamesEnum, &len, &err))) { 139 count1++; 140 log_verbose("read \"%s\", length %i\n", string, len); 141 } 142 if (U_FAILURE(err)) { 143 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 144 err = U_ZERO_ERROR; 145 } 146 uenum_reset(allNamesEnum, &err); 147 while ((string = uenum_next(allNamesEnum, &len, &err))) { 148 count2++; 149 ucnv_close(ucnv_open(string, &err)); 150 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 151 err = U_ZERO_ERROR; 152 } 153 if (count1 != count2) { 154 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 155 } 156 } 157 uenum_close(allNamesEnum); 158 err = U_ZERO_ERROR; 159 160 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 161 162 log_verbose("Testing ucnv_countAvailable()..."); 163 164 testLong1=ucnv_countAvailable(); 165 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 166 167 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 168 169 available_conv = ucnv_getAvailableName(testLong1); 170 /*test ucnv_getAvailableName with err condition*/ 171 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 172 available_conv = ucnv_getAvailableName(-1); 173 if(available_conv != NULL){ 174 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 175 } 176 177 /* Test ucnv_countAliases() etc. */ 178 count = ucnv_countAliases("utf-8", &err); 179 if(U_FAILURE(err)) { 180 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 181 } else if(count <= 0) { 182 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 183 } else { 184 /* try to get the aliases individually */ 185 const char *alias; 186 alias = ucnv_getAlias("utf-8", 0, &err); 187 if(U_FAILURE(err)) { 188 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 189 } else if(strcmp("UTF-8", alias) != 0) { 190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 191 } else { 192 uint16_t aliasNum; 193 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 194 alias = ucnv_getAlias("utf-8", aliasNum, &err); 195 if(U_FAILURE(err)) { 196 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 197 } else if(strlen(alias) > 20) { 198 /* sanity check */ 199 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 200 } else { 201 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 202 } 203 } 204 if(U_SUCCESS(err)) { 205 /* try to fill an array with all aliases */ 206 const char **aliases; 207 aliases=(const char **)malloc(count * sizeof(const char *)); 208 if(aliases != 0) { 209 ucnv_getAliases("utf-8", aliases, &err); 210 if(U_FAILURE(err)) { 211 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 212 } else { 213 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 214 /* compare the pointers with the ones returned individually */ 215 alias = ucnv_getAlias("utf-8", aliasNum, &err); 216 if(U_FAILURE(err)) { 217 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 218 } else if(aliases[aliasNum] != alias) { 219 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 220 } 221 } 222 } 223 free((char **)aliases); 224 } 225 } 226 } 227 } 228} 229 230 231static void TestConvert() 232{ 233#if !UCONFIG_NO_LEGACY_CONVERSION 234 char myptr[4]; 235 char save[4]; 236 int32_t testLong1 = 0; 237 uint16_t rest = 0; 238 int32_t len = 0; 239 int32_t x = 0; 240 FILE* ucs_file_in = NULL; 241 UChar BOM = 0x0000; 242 UChar myUChar = 0x0000; 243 char* mytarget; /* [MAX_FILE_LEN] */ 244 char* mytarget_1; 245 char* mytarget_use; 246 UChar* consumedUni = NULL; 247 char* consumed = NULL; 248 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 249 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 250 UChar* ucs_file_buffer_use; 251 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 252 UChar* my_ucs_file_buffer_1; 253 int8_t ii = 0; 254 uint16_t codepage_index = 0; 255 int32_t cp = 0; 256 UErrorCode err = U_ZERO_ERROR; 257 char ucs_file_name[UCS_FILE_NAME_SIZE]; 258 UConverterFromUCallback MIA1, MIA1_2; 259 UConverterToUCallback MIA2, MIA2_2; 260 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 261 UConverter* someConverters[5]; 262 UConverter* myConverter = 0; 263 UChar* displayname = 0; 264 265 const char* locale; 266 267 UChar* uchar1 = 0; 268 UChar* uchar2 = 0; 269 UChar* uchar3 = 0; 270 int32_t targetcapacity2; 271 int32_t targetcapacity; 272 int32_t targetsize; 273 int32_t disnamelen; 274 275 const UChar* tmp_ucs_buf; 276 const UChar* tmp_consumedUni=NULL; 277 const char* tmp_mytarget_use; 278 const char* tmp_consumed; 279 280 /****************************************************************** 281 Checking Unicode -> ksc 282 ******************************************************************/ 283 284 const char* CodePagesToTest[NUM_CODEPAGE] = 285 { 286 "ibm-949_P110-1999" 287 288 289 }; 290 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 291 { 292 949 293 }; 294 295 296 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 297 { 298 1 299 300 }; 301 302 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 303 { 304 2 305 306 }; 307 308 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 309 { 310 0xAFFE 311 }; 312 313 const char* CodePagesTestFiles[NUM_CODEPAGE] = 314 { 315 "uni-text.bin" 316 }; 317 318 319 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 320 { 321 UCNV_IBM 322 323 }; 324 325 const char* CodePagesLocale[NUM_CODEPAGE] = 326 { 327 "ko_KR" 328 }; 329 330 UConverterFromUCallback oldFromUAction = NULL; 331 UConverterToUCallback oldToUAction = NULL; 332 const void* oldFromUContext = NULL; 333 const void* oldToUContext = NULL; 334 335 /* Allocate memory */ 336 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 337 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 338 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 339 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 340 341 ucs_file_buffer_use = ucs_file_buffer; 342 mytarget_1=mytarget; 343 mytarget_use = mytarget; 344 my_ucs_file_buffer_1=my_ucs_file_buffer; 345 346 /* flush the converter cache to get a consistent state before the flushing is tested */ 347 ucnv_flushCache(); 348 349 /*Testing ucnv_openU()*/ 350 { 351 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 352 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 353 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 354 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 355 UChar illegalName[100]; 356 UConverter *converter=NULL; 357 err=U_ZERO_ERROR; 358 converter=ucnv_openU(converterName, &err); 359 if(U_FAILURE(err)){ 360 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 361 } 362 ucnv_close(converter); 363 err=U_ZERO_ERROR; 364 converter=ucnv_openU(NULL, &err); 365 if(U_FAILURE(err)){ 366 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 367 } 368 ucnv_close(converter); 369 /*testing with error value*/ 370 err=U_ILLEGAL_ARGUMENT_ERROR; 371 converter=ucnv_openU(converterName, &err); 372 if(!(converter == NULL)){ 373 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 374 } 375 ucnv_close(converter); 376 err=U_ZERO_ERROR; 377 u_uastrcpy(illegalName, ""); 378 u_uastrcpy(illegalName, illegalNameChars); 379 ucnv_openU(illegalName, &err); 380 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 381 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 382 } 383 384 err=U_ZERO_ERROR; 385 ucnv_openU(firstSortedName, &err); 386 if(err!=U_FILE_ACCESS_ERROR){ 387 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 388 } 389 390 err=U_ZERO_ERROR; 391 ucnv_openU(lastSortedName, &err); 392 if(err!=U_FILE_ACCESS_ERROR){ 393 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 394 } 395 396 err=U_ZERO_ERROR; 397 } 398 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 399 { 400 UConverter *cnv=NULL; 401 err=U_ZERO_ERROR; 402 cnv=ucnv_open("ibm-949,Madhu", &err); 403 if(U_FAILURE(err)){ 404 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 405 } 406 ucnv_close(cnv); 407 408 } 409 /*Testing ucnv_convert()*/ 410 { 411 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 412 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 413 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 414 char *target=0; 415 sourceLimit=sizeof(source)/sizeof(source[0]); 416 err=U_ZERO_ERROR; 417 targetLimit=0; 418 419 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 420 if(err == U_BUFFER_OVERFLOW_ERROR){ 421 err=U_ZERO_ERROR; 422 targetLimit=targetCapacity+1; 423 target=(char*)malloc(sizeof(char) * targetLimit); 424 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 425 } 426 if(U_FAILURE(err)){ 427 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 428 } 429 else { 430 for(i=0; i<targetCapacity; i++){ 431 if(target[i] != expectedTarget[i]){ 432 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 433 } 434 } 435 436 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 437 if(U_FAILURE(err) || i!=7){ 438 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 439 u_errorName(err), i); 440 } 441 442 /*Test error conditions*/ 443 err=U_ZERO_ERROR; 444 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 445 if(i !=0){ 446 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 447 } 448 449 err=U_ILLEGAL_ARGUMENT_ERROR; 450 sourceLimit=sizeof(source)/sizeof(source[0]); 451 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 452 if(i !=0 ){ 453 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 454 } 455 456 err=U_ZERO_ERROR; 457 sourceLimit=sizeof(source)/sizeof(source[0]); 458 targetLimit=0; 459 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 460 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 461 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 462 } 463 err=U_ZERO_ERROR; 464 free(target); 465 } 466 } 467 468 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 469 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 470 err=U_ILLEGAL_ARGUMENT_ERROR; 471 if(ucnv_open(NULL, &err) != NULL){ 472 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 473 } 474 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 475 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 476 } 477 err=U_ZERO_ERROR; 478 479 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 480 log_verbose("\n---Testing ucnv_open default...\n"); 481 someConverters[0] = ucnv_open(NULL,&err); 482 someConverters[1] = ucnv_open(NULL,&err); 483 someConverters[2] = ucnv_open("utf8", &err); 484 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 485 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 486 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 487 488 /* Testing ucnv_getName()*/ 489 /*default code page */ 490 ucnv_getName(someConverters[0], &err); 491 if(U_FAILURE(err)) { 492 log_data_err("getName[0] failed\n"); 493 } else { 494 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 495 } 496 ucnv_getName(someConverters[1], &err); 497 if(U_FAILURE(err)) { 498 log_data_err("getName[1] failed\n"); 499 } else { 500 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 501 } 502 503 ucnv_close(someConverters[0]); 504 ucnv_close(someConverters[1]); 505 ucnv_close(someConverters[2]); 506 ucnv_close(someConverters[3]); 507 508 509 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 510 { 511 int32_t i = 0; 512 513 err = U_ZERO_ERROR; 514#ifdef U_TOPSRCDIR 515 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 516#else 517 strcpy(ucs_file_name, loadTestData(&err)); 518 519 if(U_FAILURE(err)){ 520 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 521 return; 522 } 523 524 { 525 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 526 527 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 528 *(index+1)=0; 529 } 530 } 531 532 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 533#endif 534 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 535 536 ucs_file_in = fopen(ucs_file_name,"rb"); 537 if (!ucs_file_in) 538 { 539 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 540 return; 541 } 542 543 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 544 545 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 546 /* ucnv_flushCache(); */ 547 myConverter =ucnv_open( "ibm-949", &err); 548 if (!myConverter || U_FAILURE(err)) 549 { 550 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 551 fclose(ucs_file_in); 552 break; 553 } 554 555 /*testing for ucnv_getName() */ 556 log_verbose("Testing ucnv_getName()...\n"); 557 ucnv_getName(myConverter, &err); 558 if(U_FAILURE(err)) 559 log_err("Error in getName\n"); 560 else 561 { 562 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 563 } 564 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 565 log_err("getName failed\n"); 566 else 567 log_verbose("getName ok\n"); 568 /*Test getName with error condition*/ 569 { 570 const char* name=0; 571 err=U_ILLEGAL_ARGUMENT_ERROR; 572 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 573 name=ucnv_getName(myConverter, &err); 574 if(name != NULL){ 575 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 576 } 577 err=U_ZERO_ERROR; 578 } 579 580 581 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 582 583 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 584 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 585 log_verbose("Max byte per character OK\n"); 586 else 587 log_err("Max byte per character failed\n"); 588 589 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 590 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 591 log_verbose("Min byte per character OK\n"); 592 else 593 log_err("Min byte per character failed\n"); 594 595 596 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 597 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 598 ii=4; 599 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 600 if (ii <= 0) { 601 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 602 } 603 604 for(x=0;x<ii;x++) 605 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 606 if (rest==CodePagesSubstitutionChars[codepage_index]) 607 log_verbose("Substitution character ok\n"); 608 else 609 log_err("Substitution character failed.\n"); 610 611 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 612 ucnv_setSubstChars(myConverter, myptr, ii, &err); 613 if (U_FAILURE(err)) 614 { 615 log_err("FAILURE! %s\n", myErrorName(err)); 616 } 617 ucnv_getSubstChars(myConverter,save, &ii, &err); 618 if (U_FAILURE(err)) 619 { 620 log_err("FAILURE! %s\n", myErrorName(err)); 621 } 622 623 if (strncmp(save, myptr, ii)) 624 log_err("Saved substitution character failed\n"); 625 else 626 log_verbose("Saved substitution character ok\n"); 627 628 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 629 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 630 ii=1; 631 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 632 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 633 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 634 } 635 err=U_ZERO_ERROR; 636 ii=4; 637 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 638 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 639 ucnv_setSubstChars(myConverter, myptr, 0, &err); 640 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 641 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 642 } 643 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 644 strcpy(myptr, "abc"); 645 ucnv_setSubstChars(myConverter, myptr, ii, &err); 646 err=U_ZERO_ERROR; 647 ucnv_getSubstChars(myConverter, save, &ii, &err); 648 if(strncmp(save, myptr, ii) == 0){ 649 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 650 } 651 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 652 err=U_ZERO_ERROR; 653 strcpy(myptr, "abc"); 654 ucnv_setSubstChars(myConverter, myptr, ii, &err); 655 err=U_ILLEGAL_ARGUMENT_ERROR; 656 ucnv_getSubstChars(myConverter, save, &ii, &err); 657 if(strncmp(save, myptr, ii) == 0){ 658 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 659 } 660 err=U_ZERO_ERROR; 661 /*------*/ 662 663#ifdef U_ENABLE_GENERIC_ISO_2022 664 /*resetState ucnv_reset()*/ 665 log_verbose("\n---Testing ucnv_reset()..\n"); 666 ucnv_reset(myConverter); 667 { 668 UChar32 c; 669 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 670 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 671 UConverter *cnv=ucnv_open("ISO_2022", &err); 672 if(U_FAILURE(err)) { 673 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 674 } 675 c=ucnv_getNextUChar(cnv, &source, limit, &err); 676 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 677 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 678 } 679 ucnv_reset(cnv); 680 ucnv_close(cnv); 681 682 } 683#endif 684 685 /*getDisplayName*/ 686 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 687 locale=CodePagesLocale[codepage_index]; 688 len=0; 689 displayname=NULL; 690 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 691 if(err==U_BUFFER_OVERFLOW_ERROR) { 692 err=U_ZERO_ERROR; 693 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 694 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 695 if(U_FAILURE(err)) { 696 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 697 } 698 else { 699 log_verbose(" getDisplayName o.k.\n"); 700 } 701 free(displayname); 702 displayname=NULL; 703 } 704 else { 705 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 706 } 707 /*test ucnv_getDiaplayName with error condition*/ 708 err= U_ILLEGAL_ARGUMENT_ERROR; 709 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 710 if( len !=0 ){ 711 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 712 } 713 /*test ucnv_getDiaplayName with error condition*/ 714 err=U_ZERO_ERROR; 715 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 716 if( len !=0 || U_SUCCESS(err)){ 717 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 718 } 719 err=U_ZERO_ERROR; 720 721 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 722 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 723 724 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 725 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 726 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 727 { 728 log_err("FAILURE! %s\n", myErrorName(err)); 729 } 730 731 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 732 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 733 log_err("get From UCallBack failed\n"); 734 else 735 log_verbose("get From UCallBack ok\n"); 736 737 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 738 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 739 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 740 { 741 log_err("FAILURE! %s\n", myErrorName(err)); 742 } 743 744 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 745 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 746 log_err("get From UCallBack action failed\n"); 747 else 748 log_verbose("get From UCallBack action ok\n"); 749 750 /*testing ucnv_setToUCallBack with error conditions*/ 751 err=U_ILLEGAL_ARGUMENT_ERROR; 752 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 753 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 754 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 755 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 756 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 757 } 758 err=U_ZERO_ERROR; 759 760 761 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 762 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 763 764 log_verbose("\n---Testing setTo UCallBack...\n"); 765 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 766 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 767 { 768 log_err("FAILURE! %s\n", myErrorName(err)); 769 } 770 771 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 772 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 773 log_err("To UCallBack failed\n"); 774 else 775 log_verbose("To UCallBack ok\n"); 776 777 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 778 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 779 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 780 { log_err("FAILURE! %s\n", myErrorName(err)); } 781 782 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 783 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 784 log_err("To UCallBack failed\n"); 785 else 786 log_verbose("To UCallBack ok\n"); 787 788 /*testing ucnv_setToUCallBack with error conditions*/ 789 err=U_ILLEGAL_ARGUMENT_ERROR; 790 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 791 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 792 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 793 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 794 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 795 } 796 err=U_ZERO_ERROR; 797 798 799 /*getcodepageid testing ucnv_getCCSID() */ 800 log_verbose("\n----Testing getCCSID....\n"); 801 cp = ucnv_getCCSID(myConverter,&err); 802 if (U_FAILURE(err)) 803 { 804 log_err("FAILURE!..... %s\n", myErrorName(err)); 805 } 806 if (cp != CodePageNumberToTest[codepage_index]) 807 log_err("Codepage number test failed\n"); 808 else 809 log_verbose("Codepage number test OK\n"); 810 811 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 812 err=U_ILLEGAL_ARGUMENT_ERROR; 813 if( ucnv_getCCSID(myConverter,&err) != -1){ 814 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 815 } 816 err=U_ZERO_ERROR; 817 818 /*getCodepagePlatform testing ucnv_getPlatform()*/ 819 log_verbose("\n---Testing getCodepagePlatform ..\n"); 820 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 821 log_err("Platform codepage test failed\n"); 822 else 823 log_verbose("Platform codepage test ok\n"); 824 825 if (U_FAILURE(err)) 826 { 827 log_err("FAILURE! %s\n", myErrorName(err)); 828 } 829 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 830 err= U_ILLEGAL_ARGUMENT_ERROR; 831 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 832 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 833 } 834 err=U_ZERO_ERROR; 835 836 837 /*Reads the BOM*/ 838 { 839 // Note: gcc produces a compile warning if the return value from fread() is ignored. 840 size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in); 841 (void)numRead; 842 } 843 if (BOM!=0xFEFF && BOM!=0xFFFE) 844 { 845 log_err("File Missing BOM...Bailing!\n"); 846 fclose(ucs_file_in); 847 break; 848 } 849 850 851 /*Reads in the file*/ 852 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 853 { 854 myUChar = ucs_file_buffer[i-1]; 855 856 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 857 } 858 859 myUChar = ucs_file_buffer[i-1]; 860 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 861 862 863 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 864 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 865 866 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 867 u_uastrcpy(uchar1,""); 868 u_strncpy(uchar1,ucs_file_buffer,i); 869 uchar1[i] = 0; 870 871 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 872 u_uastrcpy(uchar3,""); 873 u_strncpy(uchar3,ucs_file_buffer,i); 874 uchar3[i] = 0; 875 876 /*Calls the Conversion Routine */ 877 testLong1 = MAX_FILE_LEN; 878 log_verbose("\n---Testing ucnv_fromUChars()\n"); 879 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 880 if (U_FAILURE(err)) 881 { 882 log_err("\nFAILURE...%s\n", myErrorName(err)); 883 } 884 else 885 log_verbose(" ucnv_fromUChars() o.k.\n"); 886 887 /*test the conversion routine */ 888 log_verbose("\n---Testing ucnv_toUChars()\n"); 889 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 890 targetcapacity2=0; 891 targetsize = ucnv_toUChars(myConverter, 892 NULL, 893 targetcapacity2, 894 output_cp_buffer, 895 strlen(output_cp_buffer), 896 &err); 897 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 898 899 if(err==U_BUFFER_OVERFLOW_ERROR) 900 { 901 err=U_ZERO_ERROR; 902 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 903 targetsize = ucnv_toUChars(myConverter, 904 uchar2, 905 targetsize+1, 906 output_cp_buffer, 907 strlen(output_cp_buffer), 908 &err); 909 910 if(U_FAILURE(err)) 911 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 912 else 913 log_verbose(" ucnv_toUChars() o.k.\n"); 914 915 if(u_strcmp(uchar1,uchar2)!=0) 916 log_err("equality test failed with conversion routine\n"); 917 } 918 else 919 { 920 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 921 } 922 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 923 err=U_ILLEGAL_ARGUMENT_ERROR; 924 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 925 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 926 if (targetcapacity !=0) { 927 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 928 } 929 err=U_ZERO_ERROR; 930 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 931 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 932 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 933 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 934 } 935 err=U_ZERO_ERROR; 936 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 937 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 938 if (targetcapacity !=0) { 939 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 940 } 941 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 942 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 943 if (err != U_BUFFER_OVERFLOW_ERROR) { 944 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 945 } 946 /*toUChars with error conditions*/ 947 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 948 if(targetsize != 0){ 949 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 950 } 951 err=U_ZERO_ERROR; 952 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 953 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 954 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 955 } 956 err=U_ZERO_ERROR; 957 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 958 if (targetsize !=0) { 959 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 960 } 961 targetcapacity2=0; 962 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 963 if (err != U_STRING_NOT_TERMINATED_WARNING) { 964 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 965 u_errorName(err)); 966 } 967 err=U_ZERO_ERROR; 968 /*-----*/ 969 970 971 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 972 /*Clean up re-usable vars*/ 973 log_verbose("Testing ucnv_fromUnicode().....\n"); 974 tmp_ucs_buf=ucs_file_buffer_use; 975 ucnv_fromUnicode(myConverter, &mytarget_1, 976 mytarget + MAX_FILE_LEN, 977 &tmp_ucs_buf, 978 ucs_file_buffer_use+i, 979 NULL, 980 TRUE, 981 &err); 982 consumedUni = (UChar*)tmp_consumedUni; 983 (void)consumedUni; /* Suppress set but not used warning. */ 984 985 if (U_FAILURE(err)) 986 { 987 log_err("FAILURE! %s\n", myErrorName(err)); 988 } 989 else 990 log_verbose("ucnv_fromUnicode() o.k.\n"); 991 992 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 993 log_verbose("Testing ucnv_toUnicode().....\n"); 994 tmp_mytarget_use=mytarget_use; 995 tmp_consumed = consumed; 996 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 997 my_ucs_file_buffer + MAX_FILE_LEN, 998 &tmp_mytarget_use, 999 mytarget_use + (mytarget_1 - mytarget), 1000 NULL, 1001 FALSE, 1002 &err); 1003 consumed = (char*)tmp_consumed; 1004 if (U_FAILURE(err)) 1005 { 1006 log_err("FAILURE! %s\n", myErrorName(err)); 1007 } 1008 else 1009 log_verbose("ucnv_toUnicode() o.k.\n"); 1010 1011 1012 log_verbose("\n---Testing RoundTrip ...\n"); 1013 1014 1015 u_strncpy(uchar3, my_ucs_file_buffer,i); 1016 uchar3[i] = 0; 1017 1018 if(u_strcmp(uchar1,uchar3)==0) 1019 log_verbose("Equality test o.k.\n"); 1020 else 1021 log_err("Equality test failed\n"); 1022 1023 /*sanity compare */ 1024 if(uchar2 == NULL) 1025 { 1026 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1027 } 1028 else 1029 { 1030 if(u_strcmp(uchar2, uchar3)==0) 1031 log_verbose("Equality test o.k.\n"); 1032 else 1033 log_err("Equality test failed\n"); 1034 } 1035 1036 fclose(ucs_file_in); 1037 ucnv_close(myConverter); 1038 if (uchar1 != 0) free(uchar1); 1039 if (uchar2 != 0) free(uchar2); 1040 if (uchar3 != 0) free(uchar3); 1041 } 1042 1043 free((void*)mytarget); 1044 free((void*)output_cp_buffer); 1045 free((void*)ucs_file_buffer); 1046 free((void*)my_ucs_file_buffer); 1047#endif 1048} 1049 1050#if !UCONFIG_NO_LEGACY_CONVERSION 1051static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1052{ 1053 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1054} 1055 1056static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1057{ 1058 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1059} 1060#endif 1061 1062static void TestFlushCache(void) { 1063#if !UCONFIG_NO_LEGACY_CONVERSION 1064 UErrorCode err = U_ZERO_ERROR; 1065 UConverter* someConverters[5]; 1066 int flushCount = 0; 1067 1068 /* flush the converter cache to get a consistent state before the flushing is tested */ 1069 ucnv_flushCache(); 1070 1071 /*Testing ucnv_open()*/ 1072 /* Note: These converters have been chosen because they do NOT 1073 encode the Latin characters (U+0041, ...), and therefore are 1074 highly unlikely to be chosen as system default codepages */ 1075 1076 someConverters[0] = ucnv_open("ibm-1047", &err); 1077 if (U_FAILURE(err)) { 1078 log_data_err("FAILURE! %s\n", myErrorName(err)); 1079 } 1080 1081 someConverters[1] = ucnv_open("ibm-1047", &err); 1082 if (U_FAILURE(err)) { 1083 log_data_err("FAILURE! %s\n", myErrorName(err)); 1084 } 1085 1086 someConverters[2] = ucnv_open("ibm-1047", &err); 1087 if (U_FAILURE(err)) { 1088 log_data_err("FAILURE! %s\n", myErrorName(err)); 1089 } 1090 1091 someConverters[3] = ucnv_open("gb18030", &err); 1092 if (U_FAILURE(err)) { 1093 log_data_err("FAILURE! %s\n", myErrorName(err)); 1094 } 1095 1096 someConverters[4] = ucnv_open("ibm-954", &err); 1097 if (U_FAILURE(err)) { 1098 log_data_err("FAILURE! %s\n", myErrorName(err)); 1099 } 1100 1101 1102 /* Testing ucnv_flushCache() */ 1103 log_verbose("\n---Testing ucnv_flushCache...\n"); 1104 if ((flushCount=ucnv_flushCache())==0) 1105 log_verbose("Flush cache ok\n"); 1106 else 1107 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1108 1109 /*testing ucnv_close() and ucnv_flushCache() */ 1110 ucnv_close(someConverters[0]); 1111 ucnv_close(someConverters[1]); 1112 1113 if ((flushCount=ucnv_flushCache())==0) 1114 log_verbose("Flush cache ok\n"); 1115 else 1116 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1117 1118 ucnv_close(someConverters[2]); 1119 ucnv_close(someConverters[3]); 1120 1121 if ((flushCount=ucnv_flushCache())==2) 1122 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1123 else 1124 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1125 __LINE__, 1126 flushCount); 1127 1128 ucnv_close(someConverters[4]); 1129 if ( (flushCount=ucnv_flushCache())==1) 1130 log_verbose("Flush cache ok\n"); 1131 else 1132 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1133#endif 1134} 1135 1136/** 1137 * Test the converter alias API, specifically the fuzzy matching of 1138 * alias names and the alias table integrity. Make sure each 1139 * converter has at least one alias (itself), and that its listed 1140 * aliases map back to itself. Check some hard-coded UTF-8 and 1141 * ISO_2022 aliases to make sure they work. 1142 */ 1143static void TestAlias() { 1144 int32_t i, ncnv; 1145 UErrorCode status = U_ZERO_ERROR; 1146 1147 /* Predetermined aliases that we expect to map back to ISO_2022 1148 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1149 const char* ISO_2022_NAMES[] = 1150 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1151 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1152 int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); 1153 const char *UTF8_NAMES[] = 1154 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1155 "utf_8", "ibm1208", "cp1208" }; 1156 int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); 1157 1158 struct { 1159 const char *name; 1160 const char *alias; 1161 } CONVERTERS_NAMES[] = { 1162 { "UTF-32BE", "UTF32_BigEndian" }, 1163 { "UTF-32LE", "UTF32_LittleEndian" }, 1164 { "UTF-32", "ISO-10646-UCS-4" }, 1165 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1166 { "UTF-32", "ucs-4" } 1167 }; 1168 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1169 1170 /* When there are bugs in gencnval or in ucnv_io, converters can 1171 appear to have no aliases. */ 1172 ncnv = ucnv_countAvailable(); 1173 log_verbose("%d converters\n", ncnv); 1174 for (i=0; i<ncnv; ++i) { 1175 const char *name = ucnv_getAvailableName(i); 1176 const char *alias0; 1177 uint16_t na = ucnv_countAliases(name, &status); 1178 uint16_t j; 1179 UConverter *cnv; 1180 1181 if (na == 0) { 1182 log_err("FAIL: Converter \"%s\" (i=%d)" 1183 " has no aliases; expect at least one\n", 1184 name, i); 1185 continue; 1186 } 1187 cnv = ucnv_open(name, &status); 1188 if (U_FAILURE(status)) { 1189 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1190 " can't be opened.\n", 1191 name, i); 1192 } 1193 else { 1194 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1195 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1196 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1197 "They should be the same\n", 1198 name, ucnv_getName(cnv, &status)); 1199 } 1200 } 1201 ucnv_close(cnv); 1202 1203 status = U_ZERO_ERROR; 1204 alias0 = ucnv_getAlias(name, 0, &status); 1205 for (j=1; j<na; ++j) { 1206 const char *alias; 1207 /* Make sure each alias maps back to the the same list of 1208 aliases. Assume that if alias 0 is the same, the whole 1209 list is the same (this should always be true). */ 1210 const char *mapBack; 1211 1212 status = U_ZERO_ERROR; 1213 alias = ucnv_getAlias(name, j, &status); 1214 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1215 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1216 } 1217 1218 if (alias == NULL) { 1219 log_err("FAIL: Converter \"%s\" -> " 1220 "alias[%d]=NULL\n", 1221 name, j); 1222 continue; 1223 } 1224 1225 mapBack = ucnv_getAlias(alias, 0, &status); 1226 1227 if (mapBack == NULL) { 1228 log_err("FAIL: Converter \"%s\" -> " 1229 "alias[%d]=\"%s\" -> " 1230 "alias[0]=NULL, exp. \"%s\"\n", 1231 name, j, alias, alias0); 1232 continue; 1233 } 1234 1235 if (0 != strcmp(alias0, mapBack)) { 1236 int32_t idx; 1237 UBool foundAlias = FALSE; 1238 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1239 /* Make sure that we only get this mismapping when there is 1240 an ambiguous alias, and the other converter has this alias too. */ 1241 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1242 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1243 foundAlias = TRUE; 1244 break; 1245 } 1246 } 1247 } 1248 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1249 1250 if (!foundAlias) { 1251 log_err("FAIL: Converter \"%s\" -> " 1252 "alias[%d]=\"%s\" -> " 1253 "alias[0]=\"%s\", exp. \"%s\"\n", 1254 name, j, alias, mapBack, alias0); 1255 } 1256 } 1257 } 1258 } 1259 1260 1261 /* Check a list of predetermined aliases that we expect to map 1262 * back to ISO_2022 and UTF-8. */ 1263 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1264 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1265 if(!mapBack) { 1266 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1267 continue; 1268 } 1269 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1270 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1271 ISO_2022_NAMES[i], mapBack); 1272 } 1273 } 1274 1275 1276 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1277 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1278 if(!mapBack) { 1279 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1280 continue; 1281 } 1282 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1283 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1284 UTF8_NAMES[i], mapBack); 1285 } 1286 } 1287 1288 /* 1289 * Check a list of predetermined aliases that we expect to map 1290 * back to predermined converter names. 1291 */ 1292 1293 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1294 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1295 if(!mapBack) { 1296 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1297 continue; 1298 } 1299 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1300 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1301 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1302 } 1303 } 1304 1305} 1306 1307static void TestDuplicateAlias(void) { 1308 const char *alias; 1309 UErrorCode status = U_ZERO_ERROR; 1310 1311 status = U_ZERO_ERROR; 1312 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1313 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1314 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1315 } 1316 status = U_ZERO_ERROR; 1317 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1318 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1319 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1320 } 1321 status = U_ZERO_ERROR; 1322 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1323 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1324 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1325 } 1326} 1327 1328 1329/* Test safe clone callback */ 1330 1331static uint32_t TSCC_nextSerial() 1332{ 1333 static uint32_t n = 1; 1334 1335 return (n++); 1336} 1337 1338typedef struct 1339{ 1340 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1341 uint32_t serial; /* minted from nextSerial, above */ 1342 UBool wasClosed; /* close happened on the object */ 1343} TSCCContext; 1344 1345static TSCCContext *TSCC_clone(TSCCContext *ctx) 1346{ 1347 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1348 1349 newCtx->serial = TSCC_nextSerial(); 1350 newCtx->wasClosed = 0; 1351 newCtx->magic = 0xC0FFEE; 1352 1353 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1354 1355 return newCtx; 1356} 1357 1358#if !UCONFIG_NO_LEGACY_CONVERSION 1359static void TSCC_fromU(const void *context, 1360 UConverterFromUnicodeArgs *fromUArgs, 1361 const UChar* codeUnits, 1362 int32_t length, 1363 UChar32 codePoint, 1364 UConverterCallbackReason reason, 1365 UErrorCode * err) 1366{ 1367 TSCCContext *ctx = (TSCCContext*)context; 1368 UConverterFromUCallback junkFrom; 1369 1370 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1371 1372 if(ctx->magic != 0xC0FFEE) { 1373 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1374 return; 1375 } 1376 1377 if(reason == UCNV_CLONE) { 1378 UErrorCode subErr = U_ZERO_ERROR; 1379 TSCCContext *newCtx; 1380 TSCCContext *junkCtx; 1381 TSCCContext **pjunkCtx = &junkCtx; 1382 1383 /* "recreate" it */ 1384 log_verbose("TSCC_fromU: cloning..\n"); 1385 newCtx = TSCC_clone(ctx); 1386 1387 if(newCtx == NULL) { 1388 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1389 } 1390 1391 /* now, SET it */ 1392 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1393 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1394 1395 if(U_FAILURE(subErr)) { 1396 *err = subErr; 1397 } 1398 } 1399 1400 if(reason == UCNV_CLOSE) { 1401 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1402 ctx->wasClosed = TRUE; 1403 } 1404} 1405 1406static void TSCC_toU(const void *context, 1407 UConverterToUnicodeArgs *toUArgs, 1408 const char* codeUnits, 1409 int32_t length, 1410 UConverterCallbackReason reason, 1411 UErrorCode * err) 1412{ 1413 TSCCContext *ctx = (TSCCContext*)context; 1414 UConverterToUCallback junkFrom; 1415 1416 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1417 1418 if(ctx->magic != 0xC0FFEE) { 1419 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1420 return; 1421 } 1422 1423 if(reason == UCNV_CLONE) { 1424 UErrorCode subErr = U_ZERO_ERROR; 1425 TSCCContext *newCtx; 1426 TSCCContext *junkCtx; 1427 TSCCContext **pjunkCtx = &junkCtx; 1428 1429 /* "recreate" it */ 1430 log_verbose("TSCC_toU: cloning..\n"); 1431 newCtx = TSCC_clone(ctx); 1432 1433 if(newCtx == NULL) { 1434 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1435 } 1436 1437 /* now, SET it */ 1438 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1439 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1440 1441 if(U_FAILURE(subErr)) { 1442 *err = subErr; 1443 } 1444 } 1445 1446 if(reason == UCNV_CLOSE) { 1447 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1448 ctx->wasClosed = TRUE; 1449 } 1450} 1451 1452static void TSCC_init(TSCCContext *q) 1453{ 1454 q->magic = 0xC0FFEE; 1455 q->serial = TSCC_nextSerial(); 1456 q->wasClosed = 0; 1457} 1458 1459static void TSCC_print_log(TSCCContext *q, const char *name) 1460{ 1461 if(q==NULL) { 1462 log_verbose("TSCContext: %s is NULL!!\n", name); 1463 } else { 1464 if(q->magic != 0xC0FFEE) { 1465 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1466 q,q->serial, q->magic); 1467 } 1468 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1469 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1470 } 1471} 1472 1473static void TestConvertSafeCloneCallback() 1474{ 1475 UErrorCode err = U_ZERO_ERROR; 1476 TSCCContext from1, to1; 1477 TSCCContext *from2, *from3, *to2, *to3; 1478 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1479 char hunk[8192]; 1480 int32_t hunkSize = 8192; 1481 UConverterFromUCallback junkFrom; 1482 UConverterToUCallback junkTo; 1483 UConverter *conv1, *conv2 = NULL; 1484 1485 conv1 = ucnv_open("iso-8859-3", &err); 1486 1487 if(U_FAILURE(err)) { 1488 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1489 return; 1490 } 1491 1492 log_verbose("Opened conv1=%p\n", conv1); 1493 1494 TSCC_init(&from1); 1495 TSCC_init(&to1); 1496 1497 TSCC_print_log(&from1, "from1"); 1498 TSCC_print_log(&to1, "to1"); 1499 1500 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1501 log_verbose("Set from1 on conv1\n"); 1502 TSCC_print_log(&from1, "from1"); 1503 1504 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1505 log_verbose("Set to1 on conv1\n"); 1506 TSCC_print_log(&to1, "to1"); 1507 1508 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1509 if(U_FAILURE(err)) { 1510 log_err("safeClone failed: %s\n", u_errorName(err)); 1511 return; 1512 } 1513 log_verbose("Cloned to conv2=%p.\n", conv2); 1514 1515/********** from *********************/ 1516 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1517 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1518 1519 TSCC_print_log(from2, "from2"); 1520 TSCC_print_log(from3, "from3(==from1)"); 1521 1522 if(from2 == NULL) { 1523 log_err("FAIL! from2 is null \n"); 1524 return; 1525 } 1526 1527 if(from3 == NULL) { 1528 log_err("FAIL! from3 is null \n"); 1529 return; 1530 } 1531 1532 if(from3 != (&from1) ) { 1533 log_err("FAIL! conv1's FROM context changed!\n"); 1534 } 1535 1536 if(from2 == (&from1) ) { 1537 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1538 } 1539 1540 if(from1.wasClosed) { 1541 log_err("FAIL! from1 is closed \n"); 1542 } 1543 1544 if(from2->wasClosed) { 1545 log_err("FAIL! from2 was closed\n"); 1546 } 1547 1548/********** to *********************/ 1549 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1550 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1551 1552 TSCC_print_log(to2, "to2"); 1553 TSCC_print_log(to3, "to3(==to1)"); 1554 1555 if(to2 == NULL) { 1556 log_err("FAIL! to2 is null \n"); 1557 return; 1558 } 1559 1560 if(to3 == NULL) { 1561 log_err("FAIL! to3 is null \n"); 1562 return; 1563 } 1564 1565 if(to3 != (&to1) ) { 1566 log_err("FAIL! conv1's TO context changed!\n"); 1567 } 1568 1569 if(to2 == (&to1) ) { 1570 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1571 } 1572 1573 if(to1.wasClosed) { 1574 log_err("FAIL! to1 is closed \n"); 1575 } 1576 1577 if(to2->wasClosed) { 1578 log_err("FAIL! to2 was closed\n"); 1579 } 1580 1581/*************************************/ 1582 1583 ucnv_close(conv1); 1584 log_verbose("ucnv_closed (conv1)\n"); 1585 TSCC_print_log(&from1, "from1"); 1586 TSCC_print_log(from2, "from2"); 1587 TSCC_print_log(&to1, "to1"); 1588 TSCC_print_log(to2, "to2"); 1589 1590 if(from1.wasClosed == FALSE) { 1591 log_err("FAIL! from1 is NOT closed \n"); 1592 } 1593 1594 if(from2->wasClosed) { 1595 log_err("FAIL! from2 was closed\n"); 1596 } 1597 1598 if(to1.wasClosed == FALSE) { 1599 log_err("FAIL! to1 is NOT closed \n"); 1600 } 1601 1602 if(to2->wasClosed) { 1603 log_err("FAIL! to2 was closed\n"); 1604 } 1605 1606 ucnv_close(conv2); 1607 log_verbose("ucnv_closed (conv2)\n"); 1608 1609 TSCC_print_log(&from1, "from1"); 1610 TSCC_print_log(from2, "from2"); 1611 1612 if(from1.wasClosed == FALSE) { 1613 log_err("FAIL! from1 is NOT closed \n"); 1614 } 1615 1616 if(from2->wasClosed == FALSE) { 1617 log_err("FAIL! from2 was NOT closed\n"); 1618 } 1619 1620 TSCC_print_log(&to1, "to1"); 1621 TSCC_print_log(to2, "to2"); 1622 1623 if(to1.wasClosed == FALSE) { 1624 log_err("FAIL! to1 is NOT closed \n"); 1625 } 1626 1627 if(to2->wasClosed == FALSE) { 1628 log_err("FAIL! to2 was NOT closed\n"); 1629 } 1630 1631 if(to2 != (&to1)) { 1632 free(to2); /* to1 is stack based */ 1633 } 1634 if(from2 != (&from1)) { 1635 free(from2); /* from1 is stack based */ 1636 } 1637} 1638#endif 1639 1640static UBool 1641containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1642 while(length>0) { 1643 if(*p!=b) { 1644 return TRUE; 1645 } 1646 ++p; 1647 --length; 1648 } 1649 return FALSE; 1650} 1651 1652static void TestConvertSafeClone() 1653{ 1654 /* one 'regular' & all the 'private stateful' converters */ 1655 static const char *const names[] = { 1656#if !UCONFIG_NO_LEGACY_CONVERSION 1657 "ibm-1047", 1658 "ISO_2022,locale=zh,version=1", 1659#endif 1660 "SCSU", 1661#if !UCONFIG_NO_LEGACY_CONVERSION 1662 "HZ", 1663 "lmbcs", 1664 "ISCII,version=0", 1665 "ISO_2022,locale=kr,version=1", 1666 "ISO_2022,locale=jp,version=2", 1667#endif 1668 "BOCU-1", 1669 "UTF-7", 1670#if !UCONFIG_NO_LEGACY_CONVERSION 1671 "IMAP-mailbox-name", 1672 "ibm-1047-s390" 1673#else 1674 "IMAP=mailbox-name" 1675#endif 1676 }; 1677 1678 /* store the actual sizes of each converter */ 1679 int32_t actualSizes[UPRV_LENGTHOF(names)]; 1680 1681 static const int32_t bufferSizes[] = { 1682 U_CNV_SAFECLONE_BUFFERSIZE, 1683 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1684 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1685 }; 1686 1687 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1688 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1689 int32_t bufferSize, maxBufferSize; 1690 const char *maxName; 1691 UConverter * cnv, *cnv2; 1692 UErrorCode err; 1693 1694 char *pCharBuffer; 1695 const char *pConstCharBuffer; 1696 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1697 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1698 UChar uniCharBuffer[20]; 1699 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1700 const char *pCharSource = charSourceBuffer; 1701 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1702 UChar *pUCharTarget = uniCharBuffer; 1703 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1704 const UChar * pUniBuffer; 1705 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1706 int32_t idx, j; 1707 1708 err = U_ZERO_ERROR; 1709 cnv = ucnv_open(names[0], &err); 1710 if(U_SUCCESS(err)) { 1711 /* Check the various error & informational states: */ 1712 1713 /* Null status - just returns NULL */ 1714 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1715 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) 1716 { 1717 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1718 } 1719 /* error status - should return 0 & keep error the same */ 1720 err = U_MEMORY_ALLOCATION_ERROR; 1721 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1722 { 1723 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1724 } 1725 err = U_ZERO_ERROR; 1726 1727 /* Null buffer size pointer is ok */ 1728 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) 1729 { 1730 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1731 } 1732 ucnv_close(cnv2); 1733 err = U_ZERO_ERROR; 1734 1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1736 bufferSize = 0; 1737 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1738 { 1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1740 } 1741 /* Verify our define is large enough */ 1742 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1743 { 1744 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1745 } 1746 /* Verify we can use this run-time calculated size */ 1747 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1748 { 1749 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1750 } 1751 if (cnv2) { 1752 ucnv_close(cnv2); 1753 } 1754 1755 /* size one byte too small - should allocate & let us know */ 1756 --bufferSize; 1757 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1758 { 1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1760 } 1761 if (cnv2) { 1762 ucnv_close(cnv2); 1763 } 1764 1765 err = U_ZERO_ERROR; 1766 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1767 1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1769 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1770 { 1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1772 } 1773 if (cnv2) { 1774 ucnv_close(cnv2); 1775 } 1776 1777 err = U_ZERO_ERROR; 1778 1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1780 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1781 { 1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1783 } 1784 1785 ucnv_close(cnv); 1786 } 1787 1788 maxBufferSize = 0; 1789 maxName = ""; 1790 1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1792 1793 for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) { 1794 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++) 1795 { 1796 err = U_ZERO_ERROR; 1797 cnv = ucnv_open(names[idx], &err); 1798 if(U_FAILURE(err)) { 1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); 1800 continue; 1801 } 1802 1803 if(j == 0) { 1804 /* preflight to get maxBufferSize */ 1805 actualSizes[idx] = 0; 1806 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); 1807 if(actualSizes[idx] > maxBufferSize) { 1808 maxBufferSize = actualSizes[idx]; 1809 maxName = names[idx]; 1810 } 1811 } 1812 1813 memset(buffer, 0xaa, sizeof(buffer)); 1814 1815 bufferSize = bufferSizes[j]; 1816 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1817 1818 /* close the original immediately to make sure that the clone works by itself */ 1819 ucnv_close(cnv); 1820 1821 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1822 err == U_SAFECLONE_ALLOCATED_WARNING 1823 ) { 1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); 1825 } 1826 1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1828 if(bufferSize <= bufferSizes[j]) { 1829 /* used the stack buffer */ 1830 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1831 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1832 ) { 1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1834 names[idx], bufferSize, bufferSizes[j]); 1835 } 1836 } else { 1837 /* heap-allocated the clone */ 1838 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1840 names[idx], bufferSize, bufferSizes[j]); 1841 } 1842 } 1843 1844 pCharBuffer = charBuffer; 1845 pUniBuffer = uniBuffer; 1846 1847 ucnv_fromUnicode(cnv2, 1848 &pCharBuffer, 1849 charBufferLimit, 1850 &pUniBuffer, 1851 uniBufferLimit, 1852 NULL, 1853 TRUE, 1854 &err); 1855 if(U_FAILURE(err)){ 1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1857 } 1858 ucnv_toUnicode(cnv2, 1859 &pUCharTarget, 1860 pUCharTargetLimit, 1861 &pCharSource, 1862 pCharSourceLimit, 1863 NULL, 1864 TRUE, 1865 &err 1866 ); 1867 1868 if(U_FAILURE(err)){ 1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1870 } 1871 1872 pConstCharBuffer = charBuffer; 1873 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1874 { 1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1876 } 1877 ucnv_close(cnv2); 1878 } 1879 } 1880 1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1882 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1883 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1885 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1886 } 1887} 1888 1889static void TestCCSID() { 1890#if !UCONFIG_NO_LEGACY_CONVERSION 1891 UConverter *cnv; 1892 UErrorCode errorCode; 1893 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1894 int32_t i, ccsid; 1895 1896 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1897 ccsid=ccsids[i]; 1898 1899 errorCode=U_ZERO_ERROR; 1900 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1901 if(U_FAILURE(errorCode)) { 1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1903 continue; 1904 } 1905 1906 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1908 } 1909 1910 /* skip gb18030(ccsid 1392) */ 1911 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1913 } 1914 1915 ucnv_close(cnv); 1916 } 1917#endif 1918} 1919 1920/* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1921 1922/* CHUNK_SIZE defined in common\ucnv.c: */ 1923#define CHUNK_SIZE 1024 1924 1925static void bug1(void); 1926static void bug2(void); 1927static void bug3(void); 1928 1929static void 1930TestJ932(void) 1931{ 1932 bug1(); /* Unicode intermediate buffer straddle bug */ 1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1935} 1936 1937/* 1938 * jitterbug 932: test chunking boundary conditions in 1939 1940 int32_t ucnv_convert(const char *toConverterName, 1941 const char *fromConverterName, 1942 char *target, 1943 int32_t targetSize, 1944 const char *source, 1945 int32_t sourceSize, 1946 UErrorCode * err) 1947 1948 * See discussions on the icu mailing list in 1949 * 2001-April with the subject "converter 'flush' question". 1950 * 1951 * Bug report and test code provided by Edward J. Batutis. 1952 */ 1953static void bug1() 1954{ 1955#if !UCONFIG_NO_LEGACY_CONVERSION 1956 char char_in[CHUNK_SIZE+32]; 1957 char char_out[CHUNK_SIZE*2]; 1958 1959 /* GB 18030 equivalent of U+10000 is 90308130 */ 1960 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 1961 1962 UErrorCode err = U_ZERO_ERROR; 1963 int32_t i, test_seq_len = sizeof(test_seq); 1964 1965 /* 1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could 1968 * be expanded - however this is the only type of straddle bug I can think of at the moment - 1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 1970 * other Unicode sequences cause a bug since combining sequences are not supported by the 1971 * converters. 1972 */ 1973 1974 for (i = test_seq_len; i >= 0; i--) { 1975 /* put character sequence into input buffer */ 1976 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 1977 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 1978 1979 /* do the conversion */ 1980 ucnv_convert("us-ascii", /* out */ 1981 "gb18030", /* in */ 1982 char_out, 1983 sizeof(char_out), 1984 char_in, 1985 sizeof(char_in), 1986 &err); 1987 1988 /* bug1: */ 1989 if (err == U_TRUNCATED_CHAR_FOUND) { 1990 /* this happens when surrogate pair straddles the intermediate buffer in 1991 * T_UConverter_fromCodepageToCodepage */ 1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 1993 } 1994 } 1995#endif 1996} 1997 1998/* bug2: pre-flighting loop bug: simple overflow causes bug */ 1999static void bug2() 2000{ 2001 /* US-ASCII "1234567890" */ 2002 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2003#if !UCONFIG_ONLY_HTML_CONVERSION 2004 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2005 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2006 0x00, 0x00, 0x00, 0x31, 2007 0x00, 0x00, 0x00, 0x32, 2008 0x00, 0x00, 0x00, 0x33, 2009 0x00, 0x00, 0x00, 0x34, 2010 0x00, 0x00, 0x00, 0x35, 2011 0x00, 0x00, 0x00, 0x36, 2012 0x00, 0x00, 0x00, 0x37, 2013 0x00, 0x00, 0x00, 0x38, 2014 0x00, 0x00, (char)0xf0, 0x00}; 2015#endif 2016 2017 static char target[5]; 2018 2019 UErrorCode err = U_ZERO_ERROR; 2020 int32_t size; 2021 2022 /* do the conversion */ 2023 size = ucnv_convert("iso-8859-1", /* out */ 2024 "us-ascii", /* in */ 2025 target, 2026 sizeof(target), 2027 source, 2028 sizeof(source), 2029 &err); 2030 2031 if ( size != 10 ) { 2032 /* bug2: size is 5, should be 10 */ 2033 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2034 } 2035 2036#if !UCONFIG_ONLY_HTML_CONVERSION 2037 err = U_ZERO_ERROR; 2038 /* do the conversion */ 2039 size = ucnv_convert("UTF-32BE", /* out */ 2040 "UTF-8", /* in */ 2041 target, 2042 sizeof(target), 2043 sourceUTF8, 2044 sizeof(sourceUTF8), 2045 &err); 2046 2047 if ( size != 32 ) { 2048 /* bug2: size is 5, should be 32 */ 2049 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2050 } 2051 2052 err = U_ZERO_ERROR; 2053 /* do the conversion */ 2054 size = ucnv_convert("UTF-8", /* out */ 2055 "UTF-32BE", /* in */ 2056 target, 2057 sizeof(target), 2058 sourceUTF32, 2059 sizeof(sourceUTF32), 2060 &err); 2061 2062 if ( size != 12 ) { 2063 /* bug2: size is 5, should be 12 */ 2064 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2065 } 2066#endif 2067} 2068 2069/* 2070 * bug3: when the characters expand going from source to target codepage 2071 * you get bug3 in addition to bug2 2072 */ 2073static void bug3() 2074{ 2075#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 2076 char char_in[CHUNK_SIZE*4]; 2077 char target[5]; 2078 UErrorCode err = U_ZERO_ERROR; 2079 int32_t size; 2080 2081 /* 2082 * first get the buggy size from bug2 then 2083 * compare it to buggy size with an expansion 2084 */ 2085 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2086 2087 /* do the conversion */ 2088 size = ucnv_convert("lmbcs", /* out */ 2089 "us-ascii", /* in */ 2090 target, 2091 sizeof(target), 2092 char_in, 2093 sizeof(char_in), 2094 &err); 2095 2096 if ( size != sizeof(char_in) ) { 2097 /* 2098 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2099 * in the converter?), should be CHUNK_SIZE*4 2100 * 2101 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2102 */ 2103 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2104 } 2105 2106 /* 2107 * now do the conversion with expansion 2108 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2109 */ 2110 memset(char_in, 8, sizeof(char_in)); 2111 err = U_ZERO_ERROR; 2112 2113 /* do the conversion */ 2114 size = ucnv_convert("lmbcs", /* out */ 2115 "us-ascii", /* in */ 2116 target, 2117 sizeof(target), 2118 char_in, 2119 sizeof(char_in), 2120 &err); 2121 2122 /* expect 2X expansion */ 2123 if ( size != sizeof(char_in) * 2 ) { 2124 /* 2125 * bug3: 2126 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2127 */ 2128 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2129 } 2130#endif 2131} 2132 2133static void 2134convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2135 const char *src, int32_t srcLength, 2136 const char *expectTarget, int32_t expectTargetLength, 2137 int32_t chunkSize, 2138 const char *testName, 2139 UErrorCode expectCode) { 2140 UChar pivotBuffer[CHUNK_SIZE]; 2141 UChar *pivotSource, *pivotTarget; 2142 const UChar *pivotLimit; 2143 2144 char targetBuffer[CHUNK_SIZE]; 2145 char *target; 2146 const char *srcLimit, *finalSrcLimit, *targetLimit; 2147 2148 int32_t targetLength; 2149 2150 UBool flush; 2151 2152 UErrorCode errorCode; 2153 2154 /* setup */ 2155 if(chunkSize>CHUNK_SIZE) { 2156 chunkSize=CHUNK_SIZE; 2157 } 2158 2159 pivotSource=pivotTarget=pivotBuffer; 2160 pivotLimit=pivotBuffer+chunkSize; 2161 2162 finalSrcLimit=src+srcLength; 2163 target=targetBuffer; 2164 targetLimit=targetBuffer+chunkSize; 2165 2166 ucnv_resetToUnicode(srcCnv); 2167 ucnv_resetFromUnicode(targetCnv); 2168 2169 errorCode=U_ZERO_ERROR; 2170 flush=FALSE; 2171 2172 /* convert, streaming-style (both converters and pivot keep state) */ 2173 for(;;) { 2174 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2175 if(src+chunkSize<=finalSrcLimit) { 2176 srcLimit=src+chunkSize; 2177 } else { 2178 srcLimit=finalSrcLimit; 2179 } 2180 ucnv_convertEx(targetCnv, srcCnv, 2181 &target, targetLimit, 2182 &src, srcLimit, 2183 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2184 FALSE, flush, &errorCode); 2185 targetLength=(int32_t)(target-targetBuffer); 2186 if(target>targetLimit) { 2187 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2188 testName, chunkSize, target, targetLimit); 2189 break; /* TODO: major problem! */ 2190 } 2191 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2192 /* continue converting another chunk */ 2193 errorCode=U_ZERO_ERROR; 2194 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2195 targetLimit=target+chunkSize; 2196 } else { 2197 targetLimit=targetBuffer+sizeof(targetBuffer); 2198 } 2199 } else if(U_FAILURE(errorCode)) { 2200 /* failure */ 2201 break; 2202 } else if(flush) { 2203 /* all done */ 2204 break; 2205 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2206 /* all consumed, now flush without input (separate from conversion for testing) */ 2207 flush=TRUE; 2208 } 2209 } 2210 2211 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2212 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2213 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2214 } else if(targetLength!=expectTargetLength) { 2215 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2216 testName, chunkSize, targetLength, expectTargetLength); 2217 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2218 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2219 testName, chunkSize); 2220 } 2221} 2222 2223static void 2224convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2225 const char *src, int32_t srcLength, 2226 const char *expectTarget, int32_t expectTargetLength, 2227 const char *testName, 2228 UErrorCode expectCode) { 2229 convertExStreaming(srcCnv, targetCnv, 2230 src, srcLength, 2231 expectTarget, expectTargetLength, 2232 1, testName, expectCode); 2233 convertExStreaming(srcCnv, targetCnv, 2234 src, srcLength, 2235 expectTarget, expectTargetLength, 2236 3, testName, expectCode); 2237 convertExStreaming(srcCnv, targetCnv, 2238 src, srcLength, 2239 expectTarget, expectTargetLength, 2240 7, testName, expectCode); 2241} 2242 2243static void TestConvertEx() { 2244#if !UCONFIG_NO_LEGACY_CONVERSION 2245 static const uint8_t 2246 utf8[]={ 2247 /* 4e00 30a1 ff61 0410 */ 2248 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2249 }, 2250 shiftJIS[]={ 2251 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2252 }, 2253 errorTarget[]={ 2254 /* 2255 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2256 * SUB, SUB, 0x40, SUB, SUB, 0x40 2257 */ 2258 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2259 }; 2260 2261 char srcBuffer[100], targetBuffer[100]; 2262 2263 const char *src; 2264 char *target; 2265 2266 UChar pivotBuffer[100]; 2267 UChar *pivotSource, *pivotTarget; 2268 2269 UConverter *cnv1, *cnv2; 2270 UErrorCode errorCode; 2271 2272 errorCode=U_ZERO_ERROR; 2273 cnv1=ucnv_open("UTF-8", &errorCode); 2274 if(U_FAILURE(errorCode)) { 2275 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2276 return; 2277 } 2278 2279 cnv2=ucnv_open("Shift-JIS", &errorCode); 2280 if(U_FAILURE(errorCode)) { 2281 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2282 ucnv_close(cnv1); 2283 return; 2284 } 2285 2286 /* test ucnv_convertEx() with streaming conversion style */ 2287 convertExMultiStreaming(cnv1, cnv2, 2288 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2289 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2290 2291 convertExMultiStreaming(cnv2, cnv1, 2292 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2293 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2294 2295 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2296 convertExMultiStreaming(cnv1, cnv2, 2297 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2298 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2299 2300 /* test some simple conversions */ 2301 2302 /* NUL-terminated source and target */ 2303 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2304 memcpy(srcBuffer, utf8, sizeof(utf8)); 2305 srcBuffer[sizeof(utf8)]=0; 2306 src=srcBuffer; 2307 target=targetBuffer; 2308 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2309 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2310 if( errorCode!=U_ZERO_ERROR || 2311 target-targetBuffer!=sizeof(shiftJIS) || 2312 *target!=0 || 2313 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2314 ) { 2315 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2316 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2317 } 2318 2319 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2320 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2321 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2322 src=srcBuffer; 2323 target=targetBuffer; 2324 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2325 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2326 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2327 target-targetBuffer!=sizeof(shiftJIS) || 2328 *target!=(char)0xff || 2329 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2330 ) { 2331 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2332 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2333 } 2334 2335 /* bad arguments */ 2336 errorCode=U_MESSAGE_PARSE_ERROR; 2337 src=srcBuffer; 2338 target=targetBuffer; 2339 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2340 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2341 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2342 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2343 } 2344 2345 /* pivotLimit==pivotStart */ 2346 errorCode=U_ZERO_ERROR; 2347 pivotSource=pivotTarget=pivotBuffer; 2348 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2349 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2350 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2351 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2352 } 2353 2354 /* *pivotSource==NULL */ 2355 errorCode=U_ZERO_ERROR; 2356 pivotSource=NULL; 2357 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2358 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2359 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2360 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2361 } 2362 2363 /* *source==NULL */ 2364 errorCode=U_ZERO_ERROR; 2365 src=NULL; 2366 pivotSource=pivotBuffer; 2367 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2368 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2369 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2370 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2371 } 2372 2373 /* streaming conversion without a pivot buffer */ 2374 errorCode=U_ZERO_ERROR; 2375 src=srcBuffer; 2376 pivotSource=pivotBuffer; 2377 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2378 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2379 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2380 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2381 } 2382 2383 ucnv_close(cnv1); 2384 ucnv_close(cnv2); 2385#endif 2386} 2387 2388/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2389static const char *const badUTF8[]={ 2390 /* trail byte */ 2391 "\x80", 2392 2393 /* truncated multi-byte sequences */ 2394 "\xd0", 2395 "\xe0", 2396 "\xe1", 2397 "\xed", 2398 "\xee", 2399 "\xf0", 2400 "\xf1", 2401 "\xf4", 2402 "\xf8", 2403 "\xfc", 2404 2405 "\xe0\x80", 2406 "\xe0\xa0", 2407 "\xe1\x80", 2408 "\xed\x80", 2409 "\xed\xa0", 2410 "\xee\x80", 2411 "\xf0\x80", 2412 "\xf0\x90", 2413 "\xf1\x80", 2414 "\xf4\x80", 2415 "\xf4\x90", 2416 "\xf8\x80", 2417 "\xfc\x80", 2418 2419 "\xf0\x80\x80", 2420 "\xf0\x90\x80", 2421 "\xf1\x80\x80", 2422 "\xf4\x80\x80", 2423 "\xf4\x90\x80", 2424 "\xf8\x80\x80", 2425 "\xfc\x80\x80", 2426 2427 "\xf8\x80\x80\x80", 2428 "\xfc\x80\x80\x80", 2429 2430 "\xfc\x80\x80\x80\x80", 2431 2432 /* complete sequences but non-shortest forms or out of range etc. */ 2433 "\xc0\x80", 2434 "\xe0\x80\x80", 2435 "\xed\xa0\x80", 2436 "\xf0\x80\x80\x80", 2437 "\xf4\x90\x80\x80", 2438 "\xf8\x80\x80\x80\x80", 2439 "\xfc\x80\x80\x80\x80\x80", 2440 "\xfe", 2441 "\xff" 2442}; 2443 2444#define ARG_CHAR_ARR_SIZE 8 2445 2446/* get some character that can be converted and convert it */ 2447static UBool getTestChar(UConverter *cnv, const char *converterName, 2448 char charUTF8[4], int32_t *pCharUTF8Length, 2449 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, 2450 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { 2451 UChar utf16[U16_MAX_LENGTH]; 2452 int32_t utf16Length; 2453 2454 const UChar *utf16Source; 2455 char *target; 2456 2457 USet *set; 2458 UChar32 c; 2459 UErrorCode errorCode; 2460 2461 errorCode=U_ZERO_ERROR; 2462 set=uset_open(1, 0); 2463 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2464 c=uset_charAt(set, uset_size(set)/2); 2465 uset_close(set); 2466 2467 utf16Length=0; 2468 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2469 *pCharUTF8Length=0; 2470 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2471 2472 utf16Source=utf16; 2473 target=char0; 2474 ucnv_fromUnicode(cnv, 2475 &target, char0+ARG_CHAR_ARR_SIZE, 2476 &utf16Source, utf16+utf16Length, 2477 NULL, FALSE, &errorCode); 2478 *pChar0Length=(int32_t)(target-char0); 2479 2480 utf16Source=utf16; 2481 target=char1; 2482 ucnv_fromUnicode(cnv, 2483 &target, char1+ARG_CHAR_ARR_SIZE, 2484 &utf16Source, utf16+utf16Length, 2485 NULL, FALSE, &errorCode); 2486 *pChar1Length=(int32_t)(target-char1); 2487 2488 if(U_FAILURE(errorCode)) { 2489 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2490 return FALSE; 2491 } 2492 return TRUE; 2493} 2494 2495static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2496 char charUTF8[4], int32_t charUTF8Length, 2497 char char0[8], int32_t char0Length, 2498 char char1[8], int32_t char1Length) { 2499 char utf8[16]; 2500 int32_t utf8Length; 2501 2502 char output[16]; 2503 int32_t outputLength; 2504 2505 char invalidChars[8]; 2506 int8_t invalidLength; 2507 2508 const char *source; 2509 char *target; 2510 2511 UChar pivotBuffer[8]; 2512 UChar *pivotSource, *pivotTarget; 2513 2514 UErrorCode errorCode; 2515 int32_t i; 2516 2517 /* test truncated sequences */ 2518 errorCode=U_ZERO_ERROR; 2519 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2520 2521 memcpy(utf8, charUTF8, charUTF8Length); 2522 2523 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) { 2524 /* truncated sequence? */ 2525 int32_t length=strlen(badUTF8[i]); 2526 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2527 continue; 2528 } 2529 2530 /* assemble a string with the test character and the truncated sequence */ 2531 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2532 utf8Length=charUTF8Length+length; 2533 2534 /* convert and check the invalidChars */ 2535 source=utf8; 2536 target=output; 2537 pivotSource=pivotTarget=pivotBuffer; 2538 errorCode=U_ZERO_ERROR; 2539 ucnv_convertEx(cnv, utf8Cnv, 2540 &target, output+sizeof(output), 2541 &source, utf8+utf8Length, 2542 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer), 2543 TRUE, TRUE, /* reset & flush */ 2544 &errorCode); 2545 outputLength=(int32_t)(target-output); 2546 (void)outputLength; /* Suppress set but not used warning. */ 2547 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2548 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2549 continue; 2550 } 2551 2552 errorCode=U_ZERO_ERROR; 2553 invalidLength=(int8_t)sizeof(invalidChars); 2554 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2555 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2556 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2557 } 2558 } 2559} 2560 2561static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2562 char charUTF8[4], int32_t charUTF8Length, 2563 char char0[8], int32_t char0Length, 2564 char char1[8], int32_t char1Length) { 2565 char utf8[600], expect[600]; 2566 int32_t utf8Length, expectLength; 2567 2568 char testName[32]; 2569 2570 UErrorCode errorCode; 2571 int32_t i; 2572 2573 errorCode=U_ZERO_ERROR; 2574 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2575 2576 /* 2577 * assemble an input string with the test character between each 2578 * bad sequence, 2579 * and an expected string with repeated test character output 2580 */ 2581 memcpy(utf8, charUTF8, charUTF8Length); 2582 utf8Length=charUTF8Length; 2583 2584 memcpy(expect, char0, char0Length); 2585 expectLength=char0Length; 2586 2587 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) { 2588 int32_t length=strlen(badUTF8[i]); 2589 memcpy(utf8+utf8Length, badUTF8[i], length); 2590 utf8Length+=length; 2591 2592 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2593 utf8Length+=charUTF8Length; 2594 2595 memcpy(expect+expectLength, char1, char1Length); 2596 expectLength+=char1Length; 2597 } 2598 2599 /* expect that each bad UTF-8 sequence is detected and skipped */ 2600 strcpy(testName, "from bad UTF-8 to "); 2601 strcat(testName, converterName); 2602 2603 convertExMultiStreaming(utf8Cnv, cnv, 2604 utf8, utf8Length, 2605 expect, expectLength, 2606 testName, 2607 U_ZERO_ERROR); 2608} 2609 2610/* Test illegal UTF-8 input. */ 2611static void TestConvertExFromUTF8() { 2612 static const char *const converterNames[]={ 2613#if !UCONFIG_NO_LEGACY_CONVERSION 2614 "windows-1252", 2615 "shift-jis", 2616#endif 2617 "us-ascii", 2618 "iso-8859-1", 2619 "utf-8" 2620 }; 2621 2622 UConverter *utf8Cnv, *cnv; 2623 UErrorCode errorCode; 2624 int32_t i; 2625 2626 /* fromUnicode versions of some character, from initial state and later */ 2627 char charUTF8[4], char0[8], char1[8]; 2628 int32_t charUTF8Length, char0Length, char1Length; 2629 2630 errorCode=U_ZERO_ERROR; 2631 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2632 if(U_FAILURE(errorCode)) { 2633 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2634 return; 2635 } 2636 2637 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) { 2638 errorCode=U_ZERO_ERROR; 2639 cnv=ucnv_open(converterNames[i], &errorCode); 2640 if(U_FAILURE(errorCode)) { 2641 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2642 continue; 2643 } 2644 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2645 continue; 2646 } 2647 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2648 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2649 ucnv_close(cnv); 2650 } 2651 ucnv_close(utf8Cnv); 2652} 2653 2654static void TestConvertExFromUTF8_C5F0() { 2655 static const char *const converterNames[]={ 2656#if !UCONFIG_NO_LEGACY_CONVERSION 2657 "windows-1251", 2658 "shift-jis", 2659#endif 2660 "us-ascii", 2661 "iso-8859-1", 2662 "utf-8" 2663 }; 2664 2665 UConverter *utf8Cnv, *cnv; 2666 UErrorCode errorCode; 2667 int32_t i; 2668 2669 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; 2670 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2671 static const char twoNCRs[16]={ 2672 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2673 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2674 }; 2675 static const char twoFFFD[6]={ 2676 (char)0xef, (char)0xbf, (char)0xbd, 2677 (char)0xef, (char)0xbf, (char)0xbd 2678 }; 2679 const char *expected; 2680 int32_t expectedLength; 2681 char dest[20]; /* longer than longest expectedLength */ 2682 2683 const char *src; 2684 char *target; 2685 2686 UChar pivotBuffer[128]; 2687 UChar *pivotSource, *pivotTarget; 2688 2689 errorCode=U_ZERO_ERROR; 2690 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2691 if(U_FAILURE(errorCode)) { 2692 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2693 return; 2694 } 2695 2696 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) { 2697 errorCode=U_ZERO_ERROR; 2698 cnv=ucnv_open(converterNames[i], &errorCode); 2699 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2700 NULL, NULL, &errorCode); 2701 if(U_FAILURE(errorCode)) { 2702 log_data_err("unable to open %s converter - %s\n", 2703 converterNames[i], u_errorName(errorCode)); 2704 continue; 2705 } 2706 src=bad_utf8; 2707 target=dest; 2708 uprv_memset(dest, 9, sizeof(dest)); 2709 if(i==UPRV_LENGTHOF(converterNames)-1) { 2710 /* conversion to UTF-8 yields two U+FFFD directly */ 2711 expected=twoFFFD; 2712 expectedLength=6; 2713 } else { 2714 /* conversion to a non-Unicode charset yields two NCRs */ 2715 expected=twoNCRs; 2716 expectedLength=16; 2717 } 2718 pivotBuffer[0]=0; 2719 pivotBuffer[1]=1; 2720 pivotBuffer[2]=2; 2721 pivotSource=pivotTarget=pivotBuffer; 2722 ucnv_convertEx( 2723 cnv, utf8Cnv, 2724 &target, dest+expectedLength, 2725 &src, bad_utf8+sizeof(bad_utf8), 2726 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer), 2727 TRUE, TRUE, &errorCode); 2728 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2729 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2730 dest[expectedLength]!=9 2731 ) { 2732 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2733 } 2734 ucnv_close(cnv); 2735 } 2736 ucnv_close(utf8Cnv); 2737} 2738 2739static void 2740TestConvertAlgorithmic() { 2741#if !UCONFIG_NO_LEGACY_CONVERSION 2742 static const uint8_t 2743 utf8[]={ 2744 /* 4e00 30a1 ff61 0410 */ 2745 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2746 }, 2747 shiftJIS[]={ 2748 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2749 }, 2750 /*errorTarget[]={*/ 2751 /* 2752 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2753 * SUB, SUB, 0x40, SUB, SUB, 0x40 2754 */ 2755 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2756 /*},*/ 2757 utf16[]={ 2758 0xfe, 0xff /* BOM only, no text */ 2759 }; 2760#if !UCONFIG_ONLY_HTML_CONVERSION 2761 static const uint8_t utf32[]={ 2762 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2763 }; 2764#endif 2765 2766 char target[100], utf8NUL[100], shiftJISNUL[100]; 2767 2768 UConverter *cnv; 2769 UErrorCode errorCode; 2770 2771 int32_t length; 2772 2773 errorCode=U_ZERO_ERROR; 2774 cnv=ucnv_open("Shift-JIS", &errorCode); 2775 if(U_FAILURE(errorCode)) { 2776 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2777 ucnv_close(cnv); 2778 return; 2779 } 2780 2781 memcpy(utf8NUL, utf8, sizeof(utf8)); 2782 utf8NUL[sizeof(utf8)]=0; 2783 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2784 shiftJISNUL[sizeof(shiftJIS)]=0; 2785 2786 /* 2787 * The to/from algorithmic convenience functions share a common implementation, 2788 * so we need not test all permutations of them. 2789 */ 2790 2791 /* length in, not terminated out */ 2792 errorCode=U_ZERO_ERROR; 2793 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2794 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2795 length!=sizeof(shiftJIS) || 2796 memcmp(target, shiftJIS, length)!=0 2797 ) { 2798 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2799 u_errorName(errorCode), length, sizeof(shiftJIS)); 2800 } 2801 2802 /* terminated in and out */ 2803 memset(target, 0x55, sizeof(target)); 2804 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2805 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2806 if( errorCode!=U_ZERO_ERROR || 2807 length!=sizeof(utf8) || 2808 memcmp(target, utf8, length)!=0 2809 ) { 2810 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2811 u_errorName(errorCode), length, sizeof(shiftJIS)); 2812 } 2813 2814 /* empty string, some target buffer */ 2815 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2816 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2817 if( errorCode!=U_ZERO_ERROR || 2818 length!=0 2819 ) { 2820 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2821 u_errorName(errorCode), length); 2822 } 2823 2824 /* pseudo-empty string, no target buffer */ 2825 errorCode=U_ZERO_ERROR; 2826 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2827 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2828 length!=0 2829 ) { 2830 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2831 u_errorName(errorCode), length); 2832 } 2833 2834#if !UCONFIG_ONLY_HTML_CONVERSION 2835 errorCode=U_ZERO_ERROR; 2836 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2837 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2838 length!=0 2839 ) { 2840 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2841 u_errorName(errorCode), length); 2842 } 2843#endif 2844 2845 /* bad arguments */ 2846 errorCode=U_MESSAGE_PARSE_ERROR; 2847 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2848 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2849 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2850 } 2851 2852 /* source==NULL */ 2853 errorCode=U_ZERO_ERROR; 2854 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2855 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2856 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2857 } 2858 2859 /* illegal alg. type */ 2860 errorCode=U_ZERO_ERROR; 2861 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2862 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2863 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2864 } 2865ucnv_close(cnv); 2866#endif 2867} 2868 2869#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 2870static void TestLMBCSMaxChar(void) { 2871 static const struct { 2872 int8_t maxSize; 2873 const char *name; 2874 } converter[] = { 2875 /* some non-LMBCS converters - perfect test setup here */ 2876 { 1, "US-ASCII"}, 2877 { 1, "ISO-8859-1"}, 2878 2879 { 2, "UTF-16"}, 2880 { 2, "UTF-16BE"}, 2881 { 3, "UTF-8"}, 2882 { 3, "CESU-8"}, 2883 { 3, "SCSU"}, 2884 { 4, "UTF-32"}, 2885 { 4, "UTF-7"}, 2886 { 4, "IMAP-mailbox-name"}, 2887 { 4, "BOCU-1"}, 2888 2889 { 1, "windows-1256"}, 2890 { 2, "Shift-JIS"}, 2891 { 2, "ibm-16684"}, 2892 { 3, "ibm-930"}, 2893 { 3, "ibm-1390"}, 2894 { 4, "*test3"}, 2895 { 16,"*test4"}, 2896 2897 { 4, "ISCII"}, 2898 { 4, "HZ"}, 2899 2900 { 3, "ISO-2022"}, 2901 { 3, "ISO-2022-KR"}, 2902 { 6, "ISO-2022-JP"}, 2903 { 8, "ISO-2022-CN"}, 2904 2905 /* LMBCS */ 2906 { 3, "LMBCS-1"}, 2907 { 3, "LMBCS-2"}, 2908 { 3, "LMBCS-3"}, 2909 { 3, "LMBCS-4"}, 2910 { 3, "LMBCS-5"}, 2911 { 3, "LMBCS-6"}, 2912 { 3, "LMBCS-8"}, 2913 { 3, "LMBCS-11"}, 2914 { 3, "LMBCS-16"}, 2915 { 3, "LMBCS-17"}, 2916 { 3, "LMBCS-18"}, 2917 { 3, "LMBCS-19"} 2918 }; 2919 int32_t idx; 2920 2921 for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) { 2922 UErrorCode status = U_ZERO_ERROR; 2923 UConverter *cnv = cnv_open(converter[idx].name, &status); 2924 if (U_FAILURE(status)) { 2925 continue; 2926 } 2927 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2928 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2929 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2930 } 2931 ucnv_close(cnv); 2932 } 2933 2934 /* mostly test that the macro compiles */ 2935 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2936 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2937 } 2938} 2939#endif 2940 2941static void TestJ1968(void) { 2942 UErrorCode err = U_ZERO_ERROR; 2943 UConverter *cnv; 2944 char myConvName[] = "My really really really really really really really really really really really" 2945 " really really really really really really really really really really really" 2946 " really really really really really really really really long converter name"; 2947 UChar myConvNameU[sizeof(myConvName)]; 2948 2949 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2950 2951 err = U_ZERO_ERROR; 2952 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2953 cnv = ucnv_openU(myConvNameU, &err); 2954 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2955 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2956 } 2957 2958 err = U_ZERO_ERROR; 2959 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2960 cnv = ucnv_openU(myConvNameU, &err); 2961 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2962 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2963 } 2964 2965 err = U_ZERO_ERROR; 2966 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2967 cnv = ucnv_openU(myConvNameU, &err); 2968 if (cnv || err != U_FILE_ACCESS_ERROR) { 2969 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2970 } 2971 2972 2973 2974 2975 err = U_ZERO_ERROR; 2976 cnv = ucnv_open(myConvName, &err); 2977 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2978 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2979 } 2980 2981 err = U_ZERO_ERROR; 2982 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 2983 cnv = ucnv_open(myConvName, &err); 2984 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2985 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2986 } 2987 2988 err = U_ZERO_ERROR; 2989 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2990 cnv = ucnv_open(myConvName, &err); 2991 if (cnv || err != U_FILE_ACCESS_ERROR) { 2992 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2993 } 2994 2995 err = U_ZERO_ERROR; 2996 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2997 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 2998 cnv = ucnv_open(myConvName, &err); 2999 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3000 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3001 } 3002 3003 /* The comma isn't really a part of the converter name. */ 3004 err = U_ZERO_ERROR; 3005 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 3006 cnv = ucnv_open(myConvName, &err); 3007 if (cnv || err != U_FILE_ACCESS_ERROR) { 3008 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3009 } 3010 3011 err = U_ZERO_ERROR; 3012 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3013 cnv = ucnv_open(myConvName, &err); 3014 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3015 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3016 } 3017 3018 err = U_ZERO_ERROR; 3019 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3020 cnv = ucnv_open(myConvName, &err); 3021 if (cnv || err != U_FILE_ACCESS_ERROR) { 3022 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3023 } 3024 3025} 3026 3027#if !UCONFIG_NO_LEGACY_CONVERSION 3028static void 3029testSwap(const char *name, UBool swap) { 3030 /* 3031 * Test Unicode text. 3032 * Contains characters that are the highest for some of the 3033 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3034 * tables copies the entire tables. 3035 */ 3036 static const UChar text[]={ 3037 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3038 }; 3039 3040 UChar uNormal[32], uSwapped[32]; 3041 char normal[32], swapped[32]; 3042 const UChar *pcu; 3043 UChar *pu; 3044 char *pc; 3045 int32_t i, normalLength, swappedLength; 3046 UChar u; 3047 char c; 3048 3049 const char *swappedName; 3050 UConverter *cnv, *swapCnv; 3051 UErrorCode errorCode; 3052 3053 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3054 3055 /* open both the normal and the LF/NL-swapping converters */ 3056 strcpy(swapped, name); 3057 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3058 3059 errorCode=U_ZERO_ERROR; 3060 swapCnv=ucnv_open(swapped, &errorCode); 3061 cnv=ucnv_open(name, &errorCode); 3062 if(U_FAILURE(errorCode)) { 3063 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3064 goto cleanup; 3065 } 3066 3067 /* the name must contain the swap option if and only if we expect the converter to swap */ 3068 swappedName=ucnv_getName(swapCnv, &errorCode); 3069 if(U_FAILURE(errorCode)) { 3070 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3071 goto cleanup; 3072 } 3073 3074 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3075 if(swap != (pc!=NULL)) { 3076 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3077 goto cleanup; 3078 } 3079 3080 /* convert to EBCDIC */ 3081 pcu=text; 3082 pc=normal; 3083 ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); 3084 normalLength=(int32_t)(pc-normal); 3085 3086 pcu=text; 3087 pc=swapped; 3088 ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); 3089 swappedLength=(int32_t)(pc-swapped); 3090 3091 if(U_FAILURE(errorCode)) { 3092 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3093 goto cleanup; 3094 } 3095 3096 /* compare EBCDIC output */ 3097 if(normalLength!=swappedLength) { 3098 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3099 goto cleanup; 3100 } 3101 for(i=0; i<normalLength; ++i) { 3102 /* swap EBCDIC LF/NL for comparison */ 3103 c=normal[i]; 3104 if(swap) { 3105 if(c==0x15) { 3106 c=0x25; 3107 } else if(c==0x25) { 3108 c=0x15; 3109 } 3110 } 3111 3112 if(c!=swapped[i]) { 3113 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3114 goto cleanup; 3115 } 3116 } 3117 3118 /* convert back to Unicode (may not roundtrip) */ 3119 pc=normal; 3120 pu=uNormal; 3121 ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3122 normalLength=(int32_t)(pu-uNormal); 3123 3124 pc=normal; 3125 pu=uSwapped; 3126 ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3127 swappedLength=(int32_t)(pu-uSwapped); 3128 3129 if(U_FAILURE(errorCode)) { 3130 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3131 goto cleanup; 3132 } 3133 3134 /* compare EBCDIC output */ 3135 if(normalLength!=swappedLength) { 3136 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3137 goto cleanup; 3138 } 3139 for(i=0; i<normalLength; ++i) { 3140 /* swap EBCDIC LF/NL for comparison */ 3141 u=uNormal[i]; 3142 if(swap) { 3143 if(u==0xa) { 3144 u=0x85; 3145 } else if(u==0x85) { 3146 u=0xa; 3147 } 3148 } 3149 3150 if(u!=uSwapped[i]) { 3151 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3152 goto cleanup; 3153 } 3154 } 3155 3156 /* clean up */ 3157cleanup: 3158 ucnv_close(cnv); 3159 ucnv_close(swapCnv); 3160} 3161 3162static void 3163TestEBCDICSwapLFNL() { 3164 static const struct { 3165 const char *name; 3166 UBool swap; 3167 } tests[]={ 3168 { "ibm-37", TRUE }, 3169 { "ibm-1047", TRUE }, 3170 { "ibm-1140", TRUE }, 3171 { "ibm-930", TRUE }, 3172 { "iso-8859-3", FALSE } 3173 }; 3174 3175 int i; 3176 3177 for(i=0; i<UPRV_LENGTHOF(tests); ++i) { 3178 testSwap(tests[i].name, tests[i].swap); 3179 } 3180} 3181#else 3182static void 3183TestEBCDICSwapLFNL() { 3184 /* test nothing... */ 3185} 3186#endif 3187 3188static void TestFromUCountPending(){ 3189#if !UCONFIG_NO_LEGACY_CONVERSION 3190 UErrorCode status = U_ZERO_ERROR; 3191/* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3192 static const struct { 3193 UChar input[6]; 3194 int32_t len; 3195 int32_t exp; 3196 }fromUnicodeTests[] = { 3197 /*m:n conversion*/ 3198 {{0xdbc4},1,1}, 3199 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3200 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3201 }; 3202 int i; 3203 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3204 if(U_FAILURE(status)){ 3205 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3206 return; 3207 } 3208 for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) { 3209 char tgt[10]; 3210 char* target = tgt; 3211 char* targetLimit = target + 10; 3212 const UChar* source = fromUnicodeTests[i].input; 3213 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3214 int32_t len = 0; 3215 ucnv_reset(cnv); 3216 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3217 len = ucnv_fromUCountPending(cnv, &status); 3218 if(U_FAILURE(status)){ 3219 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3220 status = U_ZERO_ERROR; 3221 continue; 3222 } 3223 if(len != fromUnicodeTests[i].exp){ 3224 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3225 } 3226 } 3227 status = U_ZERO_ERROR; 3228 { 3229 /* 3230 * The converter has to read the tail before it knows that 3231 * only head alone matches. 3232 * At the end, the output for head will overflow the target, 3233 * middle will be pending, and tail will not have been consumed. 3234 */ 3235 /* 3236 \U00101234 -> x (<U101234> \x07 |0) 3237 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3238 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3239 \U00060007 -> unassigned 3240 */ 3241 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3242 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3243 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3244 char tgt[10]; 3245 char* target = tgt; 3246 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3247 const UChar* source = head; 3248 const UChar* sourceLimit = source + u_strlen(head); 3249 int32_t len = 0; 3250 ucnv_reset(cnv); 3251 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3252 len = ucnv_fromUCountPending(cnv, &status); 3253 if(U_FAILURE(status)){ 3254 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3255 status = U_ZERO_ERROR; 3256 } 3257 if(len!=4){ 3258 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3259 } 3260 source = middle; 3261 sourceLimit = source + u_strlen(middle); 3262 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3263 len = ucnv_fromUCountPending(cnv, &status); 3264 if(U_FAILURE(status)){ 3265 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3266 status = U_ZERO_ERROR; 3267 } 3268 if(len!=5){ 3269 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3270 } 3271 source = tail; 3272 sourceLimit = source + u_strlen(tail); 3273 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3274 if(status != U_BUFFER_OVERFLOW_ERROR){ 3275 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3276 } 3277 status = U_ZERO_ERROR; 3278 len = ucnv_fromUCountPending(cnv, &status); 3279 /* middle[1] is pending, tail has not been consumed */ 3280 if(U_FAILURE(status)){ 3281 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3282 } 3283 if(len!=1){ 3284 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3285 } 3286 } 3287 ucnv_close(cnv); 3288#endif 3289} 3290 3291static void 3292TestToUCountPending(){ 3293#if !UCONFIG_NO_LEGACY_CONVERSION 3294 UErrorCode status = U_ZERO_ERROR; 3295 static const struct { 3296 char input[6]; 3297 int32_t len; 3298 int32_t exp; 3299 }toUnicodeTests[] = { 3300 /*m:n conversion*/ 3301 {{0x05, 0x01, 0x02},3,3}, 3302 {{0x01, 0x02},2,2}, 3303 {{0x07, 0x00, 0x01, 0x02},4,4}, 3304 }; 3305 3306 int i; 3307 UConverterToUCallback *oldToUAction= NULL; 3308 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3309 if(U_FAILURE(status)){ 3310 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3311 return; 3312 } 3313 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3314 for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) { 3315 UChar tgt[20]; 3316 UChar* target = tgt; 3317 UChar* targetLimit = target + 20; 3318 const char* source = toUnicodeTests[i].input; 3319 const char* sourceLimit = source + toUnicodeTests[i].len; 3320 int32_t len = 0; 3321 ucnv_reset(cnv); 3322 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3323 len = ucnv_toUCountPending(cnv,&status); 3324 if(U_FAILURE(status)){ 3325 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3326 status = U_ZERO_ERROR; 3327 continue; 3328 } 3329 if(len != toUnicodeTests[i].exp){ 3330 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3331 } 3332 } 3333 status = U_ZERO_ERROR; 3334 ucnv_close(cnv); 3335 3336 { 3337 /* 3338 * The converter has to read the tail before it knows that 3339 * only head alone matches. 3340 * At the end, the output for head will overflow the target, 3341 * mid will be pending, and tail will not have been consumed. 3342 */ 3343 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3344 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3345 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3346 /* 3347 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3348 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3349 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3350 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3351 */ 3352 UChar tgt[10]; 3353 UChar* target = tgt; 3354 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3355 const char* source = head; 3356 const char* sourceLimit = source + strlen(head); 3357 int32_t len = 0; 3358 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3359 if(U_FAILURE(status)){ 3360 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3361 return; 3362 } 3363 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3364 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3365 len = ucnv_toUCountPending(cnv,&status); 3366 if(U_FAILURE(status)){ 3367 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3368 } 3369 if(len != 4){ 3370 log_err("Did not get the expected len for head.\n"); 3371 } 3372 source=mid; 3373 sourceLimit = source+strlen(mid); 3374 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3375 len = ucnv_toUCountPending(cnv,&status); 3376 if(U_FAILURE(status)){ 3377 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3378 } 3379 if(len != 8){ 3380 log_err("Did not get the expected len for mid.\n"); 3381 } 3382 3383 source=tail; 3384 sourceLimit = source+strlen(tail); 3385 targetLimit = target; 3386 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3387 if(status != U_BUFFER_OVERFLOW_ERROR){ 3388 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3389 } 3390 status = U_ZERO_ERROR; 3391 len = ucnv_toUCountPending(cnv,&status); 3392 /* mid[4] is pending, tail has not been consumed */ 3393 if(U_FAILURE(status)){ 3394 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3395 } 3396 if(len != 4){ 3397 log_err("Did not get the expected len for tail.\n"); 3398 } 3399 ucnv_close(cnv); 3400 } 3401#endif 3402} 3403 3404static void TestOneDefaultNameChange(const char *name, const char *expected) { 3405 UErrorCode status = U_ZERO_ERROR; 3406 UConverter *cnv; 3407 ucnv_setDefaultName(name); 3408 if(strcmp(ucnv_getDefaultName(), expected)==0) 3409 log_verbose("setDefaultName of %s works.\n", name); 3410 else 3411 log_err("setDefaultName of %s failed\n", name); 3412 cnv=ucnv_open(NULL, &status); 3413 if (U_FAILURE(status) || cnv == NULL) { 3414 log_err("opening the default converter of %s failed\n", name); 3415 return; 3416 } 3417 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3418 log_verbose("ucnv_getName of %s works.\n", name); 3419 else 3420 log_err("ucnv_getName of %s failed\n", name); 3421 ucnv_close(cnv); 3422} 3423 3424static void TestDefaultName(void) { 3425 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3426 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3427 strcpy(defaultName, ucnv_getDefaultName()); 3428 3429 log_verbose("getDefaultName returned %s\n", defaultName); 3430 3431 /*change the default name by setting it */ 3432 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3433#if U_CHARSET_IS_UTF8 3434 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3435 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3436 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3437#else 3438# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 3439 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3440 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3441# endif 3442 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3443#endif 3444 3445 /*set the default name back*/ 3446 ucnv_setDefaultName(defaultName); 3447} 3448 3449/* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3450 3451static int 3452sign(int n) { 3453 if(n==0) { 3454 return 0; 3455 } else if(n<0) { 3456 return -1; 3457 } else /* n>0 */ { 3458 return 1; 3459 } 3460} 3461 3462static void 3463compareNames(const char **names) { 3464 const char *relation, *name1, *name2; 3465 int rel, result; 3466 3467 relation=*names++; 3468 if(*relation=='=') { 3469 rel = 0; 3470 } else if(*relation=='<') { 3471 rel = -1; 3472 } else { 3473 rel = 1; 3474 } 3475 3476 name1=*names++; 3477 if(name1==NULL) { 3478 return; 3479 } 3480 while((name2=*names++)!=NULL) { 3481 result=ucnv_compareNames(name1, name2); 3482 if(sign(result)!=rel) { 3483 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3484 } 3485 name1=name2; 3486 } 3487} 3488 3489static void 3490TestCompareNames() { 3491 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3492 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3493 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3494 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3495 3496 compareNames(equalUTF8); 3497 compareNames(equalIBM); 3498 compareNames(lessMac); 3499 compareNames(lessUTF080); 3500} 3501 3502static void 3503TestSubstString() { 3504 static const UChar surrogate[1]={ 0xd900 }; 3505 char buffer[16]; 3506 3507 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3508 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3509 UConverter *cnv; 3510 UErrorCode errorCode; 3511 int32_t length; 3512 int8_t len8; 3513 3514 /* UTF-16/32: test that the BOM is output before the sub character */ 3515 errorCode=U_ZERO_ERROR; 3516 cnv=ucnv_open("UTF-16", &errorCode); 3517 if(U_FAILURE(errorCode)) { 3518 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3519 return; 3520 } 3521 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3522 ucnv_close(cnv); 3523 if(U_FAILURE(errorCode) || 3524 length!=4 || 3525 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3526 ) { 3527 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3528 } 3529 3530 errorCode=U_ZERO_ERROR; 3531 cnv=ucnv_open("UTF-32", &errorCode); 3532 if(U_FAILURE(errorCode)) { 3533 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3534 return; 3535 } 3536 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3537 ucnv_close(cnv); 3538 if(U_FAILURE(errorCode) || 3539 length!=8 || 3540 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3541 ) { 3542 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3543 } 3544 3545 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3546 errorCode=U_ZERO_ERROR; 3547 cnv=ucnv_open("ISO-8859-1", &errorCode); 3548 if(U_FAILURE(errorCode)) { 3549 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3550 return; 3551 } 3552 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); 3553 if(U_FAILURE(errorCode)) { 3554 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3555 } else { 3556 len8 = sizeof(buffer); 3557 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3558 /* Stateless converter, we expect the string converted to charset bytes. */ 3559 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3560 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3561 } 3562 } 3563 ucnv_close(cnv); 3564 3565#if !UCONFIG_NO_LEGACY_CONVERSION 3566 errorCode=U_ZERO_ERROR; 3567 cnv=ucnv_open("HZ", &errorCode); 3568 if(U_FAILURE(errorCode)) { 3569 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3570 return; 3571 } 3572 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); 3573 if(U_FAILURE(errorCode)) { 3574 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3575 } else { 3576 len8 = sizeof(buffer); 3577 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3578 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3579 if(U_FAILURE(errorCode) || len8!=0) { 3580 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3581 } 3582 } 3583 ucnv_close(cnv); 3584#endif 3585 /* 3586 * Further testing of ucnv_setSubstString() is done via intltest convert. 3587 * We do not test edge cases of illegal arguments and similar because the 3588 * function implementation uses all of its parameters in calls to other 3589 * functions with UErrorCode parameters. 3590 */ 3591} 3592 3593static void 3594InvalidArguments() { 3595 UConverter *cnv; 3596 UErrorCode errorCode; 3597 char charBuffer[2] = {1, 1}; 3598 char ucharAsCharBuffer[2] = {2, 2}; 3599 char *charsPtr = charBuffer; 3600 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3601 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3602 3603 errorCode=U_ZERO_ERROR; 3604 cnv=ucnv_open("UTF-8", &errorCode); 3605 if(U_FAILURE(errorCode)) { 3606 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3607 return; 3608 } 3609 3610 errorCode=U_ZERO_ERROR; 3611 /* This one should fail because an incomplete UChar is being passed in */ 3612 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3613 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3614 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3615 } 3616 3617 errorCode=U_ZERO_ERROR; 3618 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3619 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3620 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3621 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3622 } 3623 3624 errorCode=U_ZERO_ERROR; 3625 /* This one should fail because an incomplete UChar is being passed in */ 3626 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3627 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3628 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3629 } 3630 3631 errorCode=U_ZERO_ERROR; 3632 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3633 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3634 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3635 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3636 } 3637 3638 if (charBuffer[0] != 1 || charBuffer[1] != 1 3639 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3640 { 3641 log_err("Data was incorrectly written to buffers\n"); 3642 } 3643 3644 ucnv_close(cnv); 3645} 3646 3647static void TestGetName() { 3648 static const char *const names[] = { 3649 "Unicode", "UTF-16", 3650 "UnicodeBigUnmarked", "UTF-16BE", 3651 "UnicodeBig", "UTF-16BE,version=1", 3652 "UnicodeLittleUnmarked", "UTF-16LE", 3653 "UnicodeLittle", "UTF-16LE,version=1", 3654 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3655 }; 3656 int32_t i; 3657 for(i = 0; i < UPRV_LENGTHOF(names); i += 2) { 3658 UErrorCode errorCode = U_ZERO_ERROR; 3659 UConverter *cnv = ucnv_open(names[i], &errorCode); 3660 if(U_SUCCESS(errorCode)) { 3661 const char *name = ucnv_getName(cnv, &errorCode); 3662 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3663 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3664 names[i], name, names[i+1], u_errorName(errorCode)); 3665 } 3666 ucnv_close(cnv); 3667 } 3668 } 3669} 3670 3671static void TestUTFBOM() { 3672 static const UChar a16[] = { 0x61 }; 3673 static const char *const names[] = { 3674 "UTF-16", 3675 "UTF-16,version=1", 3676 "UTF-16BE", 3677 "UnicodeBig", 3678 "UTF-16LE", 3679 "UnicodeLittle" 3680 }; 3681 static const uint8_t expected[][5] = { 3682#if U_IS_BIG_ENDIAN 3683 { 4, 0xfe, 0xff, 0, 0x61 }, 3684 { 4, 0xfe, 0xff, 0, 0x61 }, 3685#else 3686 { 4, 0xff, 0xfe, 0x61, 0 }, 3687 { 4, 0xff, 0xfe, 0x61, 0 }, 3688#endif 3689 3690 { 2, 0, 0x61 }, 3691 { 4, 0xfe, 0xff, 0, 0x61 }, 3692 3693 { 2, 0x61, 0 }, 3694 { 4, 0xff, 0xfe, 0x61, 0 } 3695 }; 3696 3697 char bytes[10]; 3698 int32_t i; 3699 3700 for(i = 0; i < UPRV_LENGTHOF(names); ++i) { 3701 UErrorCode errorCode = U_ZERO_ERROR; 3702 UConverter *cnv = ucnv_open(names[i], &errorCode); 3703 int32_t length = 0; 3704 const uint8_t *exp = expected[i]; 3705 if (U_FAILURE(errorCode)) { 3706 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3707 continue; 3708 } 3709 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3710 3711 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3712 log_err("unexpected %s BOM writing behavior -- %s\n", 3713 names[i], u_errorName(errorCode)); 3714 } 3715 ucnv_close(cnv); 3716 } 3717} 3718