1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/******************************************************************************* 7* 8* File CCONVTST.C 9* 10* Modification History: 11* Name Description 12* Steven R. Loomis 7/8/1999 Adding input buffer test 13******************************************************************************** 14*/ 15#include <stdio.h> 16#include "cstring.h" 17#include "unicode/uloc.h" 18#include "unicode/ucnv.h" 19#include "unicode/ucnv_err.h" 20#include "unicode/ucnv_cb.h" 21#include "cintltst.h" 22#include "unicode/utypes.h" 23#include "unicode/ustring.h" 24#include "unicode/ucol.h" 25#include "unicode/utf16.h" 26#include "cmemory.h" 27#include "nucnvtst.h" 28 29#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 30 31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 33#if !UCONFIG_NO_COLLATION 34static void TestJitterbug981(void); 35#endif 36#if !UCONFIG_NO_LEGACY_CONVERSION 37static void TestJitterbug1293(void); 38#endif 39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 40static void TestConverterTypesAndStarters(void); 41static void TestAmbiguous(void); 42static void TestSignatureDetection(void); 43static void TestUTF7(void); 44static void TestIMAP(void); 45static void TestUTF8(void); 46static void TestCESU8(void); 47static void TestUTF16(void); 48static void TestUTF16BE(void); 49static void TestUTF16LE(void); 50static void TestUTF32(void); 51static void TestUTF32BE(void); 52static void TestUTF32LE(void); 53static void TestLATIN1(void); 54 55#if !UCONFIG_NO_LEGACY_CONVERSION 56static void TestSBCS(void); 57static void TestDBCS(void); 58static void TestMBCS(void); 59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 60static void TestICCRunout(void); 61#endif 62 63#ifdef U_ENABLE_GENERIC_ISO_2022 64static void TestISO_2022(void); 65#endif 66 67static void TestISO_2022_JP(void); 68static void TestISO_2022_JP_1(void); 69static void TestISO_2022_JP_2(void); 70static void TestISO_2022_KR(void); 71static void TestISO_2022_KR_1(void); 72static void TestISO_2022_CN(void); 73#if 0 74 /* 75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 76 */ 77static void TestISO_2022_CN_EXT(void); 78#endif 79static void TestJIS(void); 80static void TestHZ(void); 81#endif 82 83static void TestSCSU(void); 84 85#if !UCONFIG_NO_LEGACY_CONVERSION 86static void TestEBCDIC_STATEFUL(void); 87static void TestGB18030(void); 88static void TestLMBCS(void); 89static void TestJitterbug255(void); 90static void TestEBCDICUS4XML(void); 91#if 0 92 /* 93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 94 */ 95static void TestJitterbug915(void); 96#endif 97static void TestISCII(void); 98 99static void TestCoverageMBCS(void); 100static void TestJitterbug2346(void); 101static void TestJitterbug2411(void); 102static void TestJB5275(void); 103static void TestJB5275_1(void); 104static void TestJitterbug6175(void); 105 106static void TestIsFixedWidth(void); 107#endif 108 109static void TestInBufSizes(void); 110 111static void TestRoundTrippingAllUTF(void); 112static void TestConv(const uint16_t in[], 113 int len, 114 const char* conv, 115 const char* lang, 116 char byteArr[], 117 int byteArrLen); 118 119/* open a converter, using test data if it begins with '@' */ 120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 121 122 123#define NEW_MAX_BUFFER 999 124 125static int32_t gInBufferSize = NEW_MAX_BUFFER; 126static int32_t gOutBufferSize = NEW_MAX_BUFFER; 127static char gNuConvTestName[1024]; 128 129#define nct_min(x,y) ((x<y) ? x : y) 130 131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 132{ 133 if(cnv && cnv[0] == '@') { 134 return ucnv_openPackage(loadTestData(err), cnv+1, err); 135 } else { 136 return ucnv_open(cnv, err); 137 } 138} 139 140static void printSeq(const unsigned char* a, int len) 141{ 142 int i=0; 143 log_verbose("{"); 144 while (i<len) 145 log_verbose("0x%02x ", a[i++]); 146 log_verbose("}\n"); 147} 148 149static void printUSeq(const UChar* a, int len) 150{ 151 int i=0; 152 log_verbose("{U+"); 153 while (i<len) log_verbose("0x%04x ", a[i++]); 154 log_verbose("}\n"); 155} 156 157static void printSeqErr(const unsigned char* a, int len) 158{ 159 int i=0; 160 fprintf(stderr, "{"); 161 while (i<len) 162 fprintf(stderr, "0x%02x ", a[i++]); 163 fprintf(stderr, "}\n"); 164} 165 166static void printUSeqErr(const UChar* a, int len) 167{ 168 int i=0; 169 fprintf(stderr, "{U+"); 170 while (i<len) 171 fprintf(stderr, "0x%04x ", a[i++]); 172 fprintf(stderr,"}\n"); 173} 174 175static void 176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 177{ 178 const char* s0; 179 const char* s=(char*)source; 180 const int32_t *r=results; 181 UErrorCode errorCode=U_ZERO_ERROR; 182 UChar32 c; 183 184 while(s<limit) { 185 s0=s; 186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 188 break; /* no more significant input */ 189 } else if(U_FAILURE(errorCode)) { 190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 191 break; 192 } else if( 193 /* test the expected number of input bytes only if >=0 */ 194 (*r>=0 && (int32_t)(s-s0)!=*r) || 195 c!=*(r+1) 196 ) { 197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 198 message, c, (s-s0), *(r+1), *r); 199 break; 200 } 201 r+=2; 202 } 203} 204 205static void 206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 207{ 208 const char* s=(char*)source; 209 UErrorCode errorCode=U_ZERO_ERROR; 210 uint32_t c; 211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 212 if(errorCode != expected){ 213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 214 } 215 if(c != 0xFFFD && c != 0xffff){ 216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 217 } 218 219} 220 221static void TestInBufSizes(void) 222{ 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 224#if 1 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 230 TestNewConvertWithBufferSizes(1,1); 231 TestNewConvertWithBufferSizes(2,3); 232 TestNewConvertWithBufferSizes(3,2); 233#endif 234} 235 236static void TestOutBufSizes(void) 237{ 238#if 1 239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 245 246#endif 247} 248 249 250void addTestNewConvert(TestNode** root) 251{ 252#if !UCONFIG_NO_FILE_IO 253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 255#endif 256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 262 263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 271 272#if !UCONFIG_NO_LEGACY_CONVERSION 273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 274#endif 275 276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 277 278#if !UCONFIG_NO_LEGACY_CONVERSION 279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 280#if !UCONFIG_NO_FILE_IO 281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 283#endif 284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 285 286#ifdef U_ENABLE_GENERIC_ISO_2022 287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 288#endif 289 290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 293 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 296 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 297 /* 298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 301 */ 302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 303#endif 304 305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 306 307#if !UCONFIG_NO_LEGACY_CONVERSION 308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 315#if !UCONFIG_NO_COLLATION 316 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 317#endif 318 319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 320#endif 321 322 323#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 325#endif 326 327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 328 329#if !UCONFIG_NO_LEGACY_CONVERSION 330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 332 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 333 334 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 335#endif 336} 337 338 339/* Note that this test already makes use of statics, so it's not really 340 multithread safe. 341 This convenience function lets us make the error messages actually useful. 342*/ 343 344static void setNuConvTestName(const char *codepage, const char *direction) 345{ 346 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 347 codepage, 348 direction, 349 (int)gInBufferSize, 350 (int)gOutBufferSize); 351} 352 353typedef enum 354{ 355 TC_OK = 0, /* test was OK */ 356 TC_MISMATCH = 1, /* Match failed - err was printed */ 357 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 358} ETestConvertResult; 359 360/* Note: This function uses global variables and it will not do offset 361checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 362static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 363 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 364{ 365 UErrorCode status = U_ZERO_ERROR; 366 UConverter *conv = 0; 367 char junkout[NEW_MAX_BUFFER]; /* FIX */ 368 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 369 char *p; 370 const UChar *src; 371 char *end; 372 char *targ; 373 int32_t *offs; 374 int i; 375 int32_t realBufferSize; 376 char *realBufferEnd; 377 const UChar *realSourceEnd; 378 const UChar *sourceLimit; 379 UBool checkOffsets = TRUE; 380 UBool doFlush; 381 382 for(i=0;i<NEW_MAX_BUFFER;i++) 383 junkout[i] = (char)0xF0; 384 for(i=0;i<NEW_MAX_BUFFER;i++) 385 junokout[i] = 0xFF; 386 387 setNuConvTestName(codepage, "FROM"); 388 389 log_verbose("\n========= %s\n", gNuConvTestName); 390 391 conv = my_ucnv_open(codepage, &status); 392 393 if(U_FAILURE(status)) 394 { 395 log_data_err("Couldn't open converter %s\n",codepage); 396 return TC_FAIL; 397 } 398 if(useFallback){ 399 ucnv_setFallback(conv,useFallback); 400 } 401 402 log_verbose("Converter opened..\n"); 403 404 src = source; 405 targ = junkout; 406 offs = junokout; 407 408 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 409 realBufferEnd = junkout + realBufferSize; 410 realSourceEnd = source + sourceLen; 411 412 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 413 checkOffsets = FALSE; 414 415 do 416 { 417 end = nct_min(targ + gOutBufferSize, realBufferEnd); 418 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 419 420 doFlush = (UBool)(sourceLimit == realSourceEnd); 421 422 if(targ == realBufferEnd) { 423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 424 return TC_FAIL; 425 } 426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 427 428 429 status = U_ZERO_ERROR; 430 431 ucnv_fromUnicode (conv, 432 &targ, 433 end, 434 &src, 435 sourceLimit, 436 checkOffsets ? offs : NULL, 437 doFlush, /* flush if we're at the end of the input data */ 438 &status); 439 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 440 441 if(U_FAILURE(status)) { 442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 443 return TC_FAIL; 444 } 445 446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 447 sourceLen, targ-junkout); 448 449 if(getTestOption(VERBOSITY_OPTION)) 450 { 451 char junk[9999]; 452 char offset_str[9999]; 453 char *ptr; 454 455 junk[0] = 0; 456 offset_str[0] = 0; 457 for(ptr = junkout;ptr<targ;ptr++) { 458 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 459 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 460 } 461 462 log_verbose(junk); 463 printSeq((const uint8_t *)expect, expectLen); 464 if ( checkOffsets ) { 465 log_verbose("\nOffsets:"); 466 log_verbose(offset_str); 467 } 468 log_verbose("\n"); 469 } 470 ucnv_close(conv); 471 472 if(expectLen != targ-junkout) { 473 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 474 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 475 fprintf(stderr, "Got:\n"); 476 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 477 fprintf(stderr, "Expected:\n"); 478 printSeqErr((const unsigned char*)expect, expectLen); 479 return TC_MISMATCH; 480 } 481 482 if (checkOffsets && (expectOffsets != 0) ) { 483 log_verbose("comparing %d offsets..\n", targ-junkout); 484 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 485 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 486 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 487 log_err("\n"); 488 log_err("Got : "); 489 for(p=junkout;p<targ;p++) { 490 log_err("%d,", junokout[p-junkout]); 491 } 492 log_err("\n"); 493 log_err("Expected: "); 494 for(i=0; i<(targ-junkout); i++) { 495 log_err("%d,", expectOffsets[i]); 496 } 497 log_err("\n"); 498 } 499 } 500 501 log_verbose("comparing..\n"); 502 if(!memcmp(junkout, expect, expectLen)) { 503 log_verbose("Matches!\n"); 504 return TC_OK; 505 } else { 506 log_err("String does not match u->%s\n", gNuConvTestName); 507 printUSeqErr(source, sourceLen); 508 fprintf(stderr, "Got:\n"); 509 printSeqErr((const unsigned char *)junkout, expectLen); 510 fprintf(stderr, "Expected:\n"); 511 printSeqErr((const unsigned char *)expect, expectLen); 512 513 return TC_MISMATCH; 514 } 515} 516 517/* Note: This function uses global variables and it will not do offset 518checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 519static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 520 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 521{ 522 UErrorCode status = U_ZERO_ERROR; 523 UConverter *conv = 0; 524 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 525 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 526 const char *src; 527 const char *realSourceEnd; 528 const char *srcLimit; 529 UChar *p; 530 UChar *targ; 531 UChar *end; 532 int32_t *offs; 533 int i; 534 UBool checkOffsets = TRUE; 535 536 int32_t realBufferSize; 537 UChar *realBufferEnd; 538 539 540 for(i=0;i<NEW_MAX_BUFFER;i++) 541 junkout[i] = 0xFFFE; 542 543 for(i=0;i<NEW_MAX_BUFFER;i++) 544 junokout[i] = -1; 545 546 setNuConvTestName(codepage, "TO"); 547 548 log_verbose("\n========= %s\n", gNuConvTestName); 549 550 conv = my_ucnv_open(codepage, &status); 551 552 if(U_FAILURE(status)) 553 { 554 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 555 return TC_FAIL; 556 } 557 if(useFallback){ 558 ucnv_setFallback(conv,useFallback); 559 } 560 log_verbose("Converter opened..\n"); 561 562 src = (const char *)source; 563 targ = junkout; 564 offs = junokout; 565 566 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 567 realBufferEnd = junkout + realBufferSize; 568 realSourceEnd = src + sourcelen; 569 570 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 571 checkOffsets = FALSE; 572 573 do 574 { 575 end = nct_min( targ + gOutBufferSize, realBufferEnd); 576 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 577 578 if(targ == realBufferEnd) 579 { 580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 581 return TC_FAIL; 582 } 583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 584 585 /* oldTarg = targ; */ 586 587 status = U_ZERO_ERROR; 588 589 ucnv_toUnicode (conv, 590 &targ, 591 end, 592 &src, 593 srcLimit, 594 checkOffsets ? offs : NULL, 595 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 596 &status); 597 598 /* offs += (targ-oldTarg); */ 599 600 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 601 602 if(U_FAILURE(status)) 603 { 604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 605 return TC_FAIL; 606 } 607 608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 609 sourcelen, targ-junkout); 610 if(getTestOption(VERBOSITY_OPTION)) 611 { 612 char junk[9999]; 613 char offset_str[9999]; 614 UChar *ptr; 615 616 junk[0] = 0; 617 offset_str[0] = 0; 618 619 for(ptr = junkout;ptr<targ;ptr++) 620 { 621 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 622 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 623 } 624 625 log_verbose(junk); 626 printUSeq(expect, expectlen); 627 if ( checkOffsets ) 628 { 629 log_verbose("\nOffsets:"); 630 log_verbose(offset_str); 631 } 632 log_verbose("\n"); 633 } 634 ucnv_close(conv); 635 636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 637 638 if (checkOffsets && (expectOffsets != 0)) 639 { 640 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 641 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 642 log_err("Got: "); 643 for(p=junkout;p<targ;p++) { 644 log_err("%d,", junokout[p-junkout]); 645 } 646 log_err("\n"); 647 log_err("Expected: "); 648 for(i=0; i<(targ-junkout); i++) { 649 log_err("%d,", expectOffsets[i]); 650 } 651 log_err("\n"); 652 log_err("output: "); 653 for(i=0; i<(targ-junkout); i++) { 654 log_err("%X,", junkout[i]); 655 } 656 log_err("\n"); 657 log_err("input: "); 658 for(i=0; i<(src-(const char *)source); i++) { 659 log_err("%X,", (unsigned char)source[i]); 660 } 661 log_err("\n"); 662 } 663 } 664 665 if(!memcmp(junkout, expect, expectlen*2)) 666 { 667 log_verbose("Matches!\n"); 668 return TC_OK; 669 } 670 else 671 { 672 log_err("String does not match. %s\n", gNuConvTestName); 673 log_verbose("String does not match. %s\n", gNuConvTestName); 674 printf("\nGot:"); 675 printUSeqErr(junkout, expectlen); 676 printf("\nExpected:"); 677 printUSeqErr(expect, expectlen); 678 return TC_MISMATCH; 679 } 680} 681 682 683static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 684{ 685/** test chars #1 */ 686 /* 1 2 3 1Han 2Han 3Han . */ 687 static const UChar sampleText[] = 688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 689 static const UChar sampleTextRoundTripUnmappable[] = 690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 691 692 693 static const uint8_t expectedUTF8[] = 694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 695 static const int32_t toUTF8Offs[] = 696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 697 static const int32_t fmUTF8Offs[] = 698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 699 700#ifdef U_ENABLE_GENERIC_ISO_2022 701 /* Same as UTF8, but with ^[%B preceeding */ 702 static const const uint8_t expectedISO2022[] = 703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 704 static const int32_t toISO2022Offs[] = 705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 707 static const int32_t fmISO2022Offs[] = 708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 709#endif 710 711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 712 static const uint8_t expectedIBM930[] = 713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 714 static const int32_t toIBM930Offs[] = 715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 716 static const int32_t fmIBM930Offs[] = 717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 718 719 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 720 static const uint8_t expectedIBM943[] = 721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 722 static const int32_t toIBM943Offs [] = 723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 724 static const int32_t fmIBM943Offs[] = 725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 726 727 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 728 static const uint8_t expectedIBM9027[] = 729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 730 static const int32_t toIBM9027Offs [] = 731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 732 733 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 734 static const uint8_t expectedIBM920[] = 735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 736 static const int32_t toIBM920Offs [] = 737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 738 739 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 740 static const uint8_t expectedISO88593[] = 741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 742 static const int32_t toISO88593Offs[] = 743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 744 745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 746 static const uint8_t expectedLATIN1[] = 747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 748 static const int32_t toLATIN1Offs[] = 749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 750 751 752 /* etc */ 753 static const uint8_t expectedUTF16BE[] = 754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 755 static const int32_t toUTF16BEOffs[]= 756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 757 static const int32_t fmUTF16BEOffs[] = 758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 759 760 static const uint8_t expectedUTF16LE[] = 761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 762 static const int32_t toUTF16LEOffs[]= 763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 764 static const int32_t fmUTF16LEOffs[] = 765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 766 767 static const uint8_t expectedUTF32BE[] = 768 { 0x00, 0x00, 0x00, 0x31, 769 0x00, 0x00, 0x00, 0x32, 770 0x00, 0x00, 0x00, 0x33, 771 0x00, 0x00, 0x00, 0x00, 772 0x00, 0x00, 0x4e, 0x00, 773 0x00, 0x00, 0x4e, 0x8c, 774 0x00, 0x00, 0x4e, 0x09, 775 0x00, 0x00, 0x00, 0x2e, 776 0x00, 0x02, 0x00, 0x21 }; 777 static const int32_t toUTF32BEOffs[]= 778 { 0x00, 0x00, 0x00, 0x00, 779 0x01, 0x01, 0x01, 0x01, 780 0x02, 0x02, 0x02, 0x02, 781 0x03, 0x03, 0x03, 0x03, 782 0x04, 0x04, 0x04, 0x04, 783 0x05, 0x05, 0x05, 0x05, 784 0x06, 0x06, 0x06, 0x06, 785 0x07, 0x07, 0x07, 0x07, 786 0x08, 0x08, 0x08, 0x08, 787 0x08, 0x08, 0x08, 0x08 }; 788 static const int32_t fmUTF32BEOffs[] = 789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 790 791 static const uint8_t expectedUTF32LE[] = 792 { 0x31, 0x00, 0x00, 0x00, 793 0x32, 0x00, 0x00, 0x00, 794 0x33, 0x00, 0x00, 0x00, 795 0x00, 0x00, 0x00, 0x00, 796 0x00, 0x4e, 0x00, 0x00, 797 0x8c, 0x4e, 0x00, 0x00, 798 0x09, 0x4e, 0x00, 0x00, 799 0x2e, 0x00, 0x00, 0x00, 800 0x21, 0x00, 0x02, 0x00 }; 801 static const int32_t toUTF32LEOffs[]= 802 { 0x00, 0x00, 0x00, 0x00, 803 0x01, 0x01, 0x01, 0x01, 804 0x02, 0x02, 0x02, 0x02, 805 0x03, 0x03, 0x03, 0x03, 806 0x04, 0x04, 0x04, 0x04, 807 0x05, 0x05, 0x05, 0x05, 808 0x06, 0x06, 0x06, 0x06, 809 0x07, 0x07, 0x07, 0x07, 810 0x08, 0x08, 0x08, 0x08, 811 0x08, 0x08, 0x08, 0x08 }; 812 static const int32_t fmUTF32LEOffs[] = 813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 814 815 816 817 818/** Test chars #2 **/ 819 820 /* Sahha [health], slashed h's */ 821 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 822 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 823 824 /* LMBCS */ 825 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 826 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 827 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 828 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 829 /*********************************** START OF CODE finally *************/ 830 831 gInBufferSize = insize; 832 gOutBufferSize = outsize; 833 834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 835 836 837 /*UTF-8*/ 838 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 839 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 840 841 log_verbose("Test surrogate behaviour for UTF8\n"); 842 { 843 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 844 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 845 0xf0, 0x90, 0x90, 0x81, 846 0xef, 0xbf, 0xbd 847 }; 848 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 849 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 850 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 851 852 853 } 854 855#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 856 /*ISO-2022*/ 857 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 858 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 859#endif 860 861 /*UTF16 LE*/ 862 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 863 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 864 /*UTF16 BE*/ 865 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 866 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 867 /*UTF32 LE*/ 868 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 869 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 870 /*UTF32 BE*/ 871 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 872 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 873 874 /*LATIN_1*/ 875 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 876 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 877 878#if !UCONFIG_NO_LEGACY_CONVERSION 879 /*EBCDIC_STATEFUL*/ 880 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 881 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 882 883 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 884 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 885 886 /*MBCS*/ 887 888 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 889 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 890 /*DBCS*/ 891 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 892 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 893 /*SBCS*/ 894 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 895 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 896 /*SBCS*/ 897 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 898 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 899#endif 900 901 902/****/ 903 904 /*UTF-8*/ 905 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 906 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 907#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 908 /*ISO-2022*/ 909 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 910 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 911#endif 912 913 /*UTF16 LE*/ 914 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 915 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 916 /*UTF16 BE*/ 917 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 918 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 919 /*UTF32 LE*/ 920 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 921 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 922 /*UTF32 BE*/ 923 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 924 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 925 926#if !UCONFIG_NO_LEGACY_CONVERSION 927 /*EBCDIC_STATEFUL*/ 928 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 929 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 930 /*MBCS*/ 931 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 932 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 933#endif 934 935 /* Try it again to make sure it still works */ 936 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 937 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 938 939#if !UCONFIG_NO_LEGACY_CONVERSION 940 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 941 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 942 943 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 944 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 945 946 /*LMBCS*/ 947 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 948 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 949 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 950 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 951#endif 952 953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 954 { 955 /* encode directly set D and set O */ 956 static const uint8_t utf7[] = { 957 /* 958 Hi Mom -+Jjo--! 959 A+ImIDkQ. 960 +- 961 +ZeVnLIqe- 962 */ 963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 965 0x2b, 0x2d, 966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 967 }; 968 static const UChar unicode[] = { 969 /* 970 Hi Mom -<WHITE SMILING FACE>-! 971 A<NOT IDENTICAL TO><ALPHA>. 972 + 973 [Japanese word "nihongo"] 974 */ 975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 976 0x41, 0x2262, 0x0391, 0x2e, 977 0x2b, 978 0x65e5, 0x672c, 0x8a9e 979 }; 980 static const int32_t toUnicodeOffsets[] = { 981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 982 15, 17, 19, 23, 983 24, 984 27, 29, 32 985 }; 986 static const int32_t fromUnicodeOffsets[] = { 987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 988 11, 12, 12, 12, 13, 13, 13, 13, 14, 989 15, 15, 990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 991 }; 992 993 /* same but escaping set O (the exclamation mark) */ 994 static const uint8_t utf7Restricted[] = { 995 /* 996 Hi Mom -+Jjo--+ACE- 997 A+ImIDkQ. 998 +- 999 +ZeVnLIqe- 1000 */ 1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1003 0x2b, 0x2d, 1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1005 }; 1006 static const int32_t toUnicodeOffsetsR[] = { 1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1008 19, 21, 23, 27, 1009 28, 1010 31, 33, 36 1011 }; 1012 static const int32_t fromUnicodeOffsetsR[] = { 1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1014 11, 12, 12, 12, 13, 13, 13, 13, 14, 1015 15, 15, 1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1017 }; 1018 1019 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1020 1021 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1022 1023 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1024 1025 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1026 } 1027 1028 /* 1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1030 * modified according to RFC 2060, 1031 * and supplemented with the one example in RFC 2060 itself. 1032 */ 1033 { 1034 static const uint8_t imap[] = { 1035 /* Hi Mom -&Jjo--! 1036 A&ImIDkQ-. 1037 &- 1038 &ZeVnLIqe- 1039 \ 1040 ~peter 1041 /mail 1042 /&ZeVnLIqe- 1043 /&U,BTFw- 1044 */ 1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1047 0x26, 0x2d, 1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1049 0x5c, 1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1051 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1054 }; 1055 static const UChar unicode[] = { 1056 /* Hi Mom -<WHITE SMILING FACE>-! 1057 A<NOT IDENTICAL TO><ALPHA>. 1058 & 1059 [Japanese word "nihongo"] 1060 \ 1061 ~peter 1062 /mail 1063 /<65e5, 672c, 8a9e> 1064 /<53f0, 5317> 1065 */ 1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1067 0x41, 0x2262, 0x0391, 0x2e, 1068 0x26, 1069 0x65e5, 0x672c, 0x8a9e, 1070 0x5c, 1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1072 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1073 0x2f, 0x65e5, 0x672c, 0x8a9e, 1074 0x2f, 0x53f0, 0x5317 1075 }; 1076 static const int32_t toUnicodeOffsets[] = { 1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1078 15, 17, 19, 24, 1079 25, 1080 28, 30, 33, 1081 37, 1082 38, 39, 40, 41, 42, 43, 1083 44, 45, 46, 47, 48, 1084 49, 51, 53, 56, 1085 60, 62, 64 1086 }; 1087 static const int32_t fromUnicodeOffsets[] = { 1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1090 15, 15, 1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1092 19, 1093 20, 21, 22, 23, 24, 25, 1094 26, 27, 28, 29, 30, 1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1096 35, 36, 36, 36, 37, 37, 37, 37, 37 1097 }; 1098 1099 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1100 1101 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1102 } 1103 1104 /* Test UTF-8 bad data handling*/ 1105 { 1106 static const uint8_t utf8[]={ 1107 0x61, 1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1109 0x00, 1110 0x62, 1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1114 0xdf, 0xbf, /* 7ff */ 1115 0xbf, /* truncated tail */ 1116 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1117 0x02 1118 }; 1119 1120 static const uint16_t utf8Expected[]={ 1121 0x0061, 1122 0xfffd, 1123 0x0000, 1124 0x0062, 1125 0xfffd, 1126 0xfffd, 1127 0xdbff, 0xdfff, 1128 0x07ff, 1129 0xfffd, 1130 0xfffd, 1131 0x0002 1132 }; 1133 1134 static const int32_t utf8Offsets[]={ 1135 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1136 }; 1137 testConvertToU(utf8, sizeof(utf8), 1138 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1139 1140 } 1141 1142 /* Test UTF-32BE bad data handling*/ 1143 { 1144 static const uint8_t utf32[]={ 1145 0x00, 0x00, 0x00, 0x61, 1146 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1147 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1148 0x00, 0x00, 0x00, 0x62, 1149 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1150 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1151 0x00, 0x00, 0x01, 0x62, 1152 0x00, 0x00, 0x02, 0x62 1153 }; 1154 static const uint16_t utf32Expected[]={ 1155 0x0061, 1156 0xfffd, /* 0x110000 out of range */ 1157 0xDBFF, /* 0x10FFFF in range */ 1158 0xDFFF, 1159 0x0062, 1160 0xfffd, /* 0xffffffff out of range */ 1161 0xfffd, /* 0x7fffffff out of range */ 1162 0x0162, 1163 0x0262 1164 }; 1165 static const int32_t utf32Offsets[]={ 1166 0, 4, 8, 8, 12, 16, 20, 24, 28 1167 }; 1168 static const uint8_t utf32ExpectedBack[]={ 1169 0x00, 0x00, 0x00, 0x61, 1170 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1171 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1172 0x00, 0x00, 0x00, 0x62, 1173 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1174 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1175 0x00, 0x00, 0x01, 0x62, 1176 0x00, 0x00, 0x02, 0x62 1177 }; 1178 static const int32_t utf32OffsetsBack[]={ 1179 0,0,0,0, 1180 1,1,1,1, 1181 2,2,2,2, 1182 4,4,4,4, 1183 5,5,5,5, 1184 6,6,6,6, 1185 7,7,7,7, 1186 8,8,8,8 1187 }; 1188 1189 testConvertToU(utf32, sizeof(utf32), 1190 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1191 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1192 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1193 } 1194 1195 /* Test UTF-32LE bad data handling*/ 1196 { 1197 static const uint8_t utf32[]={ 1198 0x61, 0x00, 0x00, 0x00, 1199 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1200 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1201 0x62, 0x00, 0x00, 0x00, 1202 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1203 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1204 0x62, 0x01, 0x00, 0x00, 1205 0x62, 0x02, 0x00, 0x00, 1206 }; 1207 1208 static const uint16_t utf32Expected[]={ 1209 0x0061, 1210 0xfffd, /* 0x110000 out of range */ 1211 0xDBFF, /* 0x10FFFF in range */ 1212 0xDFFF, 1213 0x0062, 1214 0xfffd, /* 0xffffffff out of range */ 1215 0xfffd, /* 0x7fffffff out of range */ 1216 0x0162, 1217 0x0262 1218 }; 1219 static const int32_t utf32Offsets[]={ 1220 0, 4, 8, 8, 12, 16, 20, 24, 28 1221 }; 1222 static const uint8_t utf32ExpectedBack[]={ 1223 0x61, 0x00, 0x00, 0x00, 1224 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1225 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1226 0x62, 0x00, 0x00, 0x00, 1227 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1228 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1229 0x62, 0x01, 0x00, 0x00, 1230 0x62, 0x02, 0x00, 0x00 1231 }; 1232 static const int32_t utf32OffsetsBack[]={ 1233 0,0,0,0, 1234 1,1,1,1, 1235 2,2,2,2, 1236 4,4,4,4, 1237 5,5,5,5, 1238 6,6,6,6, 1239 7,7,7,7, 1240 8,8,8,8 1241 }; 1242 testConvertToU(utf32, sizeof(utf32), 1243 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1244 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1245 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1246 } 1247} 1248 1249static void TestCoverageMBCS(){ 1250#if 0 1251 UErrorCode status = U_ZERO_ERROR; 1252 const char *directory = loadTestData(&status); 1253 char* tdpath = NULL; 1254 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1255 int len = strlen(directory); 1256 char* index=NULL; 1257 1258 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1259 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1260 log_verbose("Retrieved data directory %s \n",saveDirectory); 1261 uprv_strcpy(tdpath,directory); 1262 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1263 1264 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1265 *(index+1)=0; 1266 } 1267 u_setDataDirectory(tdpath); 1268 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1269#endif 1270 1271 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1272 which is test file for MBCS conversion with single-byte codepage data.*/ 1273 { 1274 1275 /* MBCS with single byte codepage data test1.ucm*/ 1276 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1277 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1278 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1279 1280 /*from Unicode*/ 1281 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1282 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1283 } 1284 1285 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1286 which is test file for MBCS conversion with three-byte codepage data.*/ 1287 { 1288 1289 /* MBCS with three byte codepage data test3.ucm*/ 1290 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1291 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1292 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1293 1294 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1295 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1296 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1297 1298 /*from Unicode*/ 1299 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1300 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1301 1302 /*to Unicode*/ 1303 testConvertToU(test3input, sizeof(test3input), 1304 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1305 1306 } 1307 1308 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1309 which is test file for MBCS conversion with four-byte codepage data.*/ 1310 { 1311 1312 /* MBCS with three byte codepage data test4.ucm*/ 1313 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1314 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1315 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1316 1317 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1318 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1319 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1320 1321 /*from Unicode*/ 1322 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1323 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1324 1325 /*to Unicode*/ 1326 testConvertToU(test4input, sizeof(test4input), 1327 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1328 1329 } 1330#if 0 1331 free(tdpath); 1332 /* restore the original data directory */ 1333 log_verbose("Setting the data directory to %s \n", saveDirectory); 1334 u_setDataDirectory(saveDirectory); 1335 free(saveDirectory); 1336#endif 1337 1338} 1339 1340static void TestConverterType(const char *convName, UConverterType convType) { 1341 UConverter* myConverter; 1342 UErrorCode err = U_ZERO_ERROR; 1343 1344 myConverter = my_ucnv_open(convName, &err); 1345 1346 if (U_FAILURE(err)) { 1347 log_data_err("Failed to create an %s converter\n", convName); 1348 return; 1349 } 1350 else 1351 { 1352 if (ucnv_getType(myConverter)!=convType) { 1353 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1354 convName, convType); 1355 } 1356 else { 1357 log_verbose("ucnv_getType %s ok\n", convName); 1358 } 1359 } 1360 ucnv_close(myConverter); 1361} 1362 1363static void TestConverterTypesAndStarters() 1364{ 1365#if !UCONFIG_NO_LEGACY_CONVERSION 1366 UConverter* myConverter; 1367 UErrorCode err = U_ZERO_ERROR; 1368 UBool mystarters[256]; 1369 1370/* const UBool expectedKSCstarters[256] = { 1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1388 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1397 1398 1399 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1400 1401 myConverter = ucnv_open("ksc", &err); 1402 if (U_FAILURE(err)) { 1403 log_data_err("Failed to create an ibm-ksc converter\n"); 1404 return; 1405 } 1406 else 1407 { 1408 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1409 log_err("ucnv_getType Failed for ibm-949\n"); 1410 else 1411 log_verbose("ucnv_getType ibm-949 ok\n"); 1412 1413 if(myConverter!=NULL) 1414 ucnv_getStarters(myConverter, mystarters, &err); 1415 1416 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1417 log_err("Failed ucnv_getStarters for ksc\n"); 1418 else 1419 log_verbose("ucnv_getStarters ok\n");*/ 1420 1421 } 1422 ucnv_close(myConverter); 1423 1424 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1425 TestConverterType("ibm-878", UCNV_SBCS); 1426#endif 1427 1428 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1429 1430 TestConverterType("ibm-1208", UCNV_UTF8); 1431 1432 TestConverterType("utf-8", UCNV_UTF8); 1433 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1434 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1435 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1436 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1437 1438#if !UCONFIG_NO_LEGACY_CONVERSION 1439 1440#if defined(U_ENABLE_GENERIC_ISO_2022) 1441 TestConverterType("iso-2022", UCNV_ISO_2022); 1442#endif 1443 1444 TestConverterType("hz", UCNV_HZ); 1445#endif 1446 1447 TestConverterType("scsu", UCNV_SCSU); 1448 1449#if !UCONFIG_NO_LEGACY_CONVERSION 1450 TestConverterType("x-iscii-de", UCNV_ISCII); 1451#endif 1452 1453 TestConverterType("ascii", UCNV_US_ASCII); 1454 TestConverterType("utf-7", UCNV_UTF7); 1455 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1456 TestConverterType("bocu-1", UCNV_BOCU1); 1457} 1458 1459static void 1460TestAmbiguousConverter(UConverter *cnv) { 1461 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1462 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1463 1464 const char *s; 1465 UChar *u; 1466 UErrorCode errorCode; 1467 UBool isAmbiguous; 1468 1469 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1470 errorCode=U_ZERO_ERROR; 1471 s=inBytes; 1472 u=outUnicode; 1473 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1474 if(U_FAILURE(errorCode)) { 1475 /* we do not care about general failures in this test; the input may just not be mappable */ 1476 return; 1477 } 1478 1479 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1480 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1481 /* There are some encodings that are partially ASCII based, 1482 like the ISO-7 and GSM series of codepages, which we ignore. */ 1483 return; 1484 } 1485 1486 isAmbiguous=ucnv_isAmbiguous(cnv); 1487 1488 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1489 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1490 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1491 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1492 return; 1493 } 1494 1495 if(outUnicode[2]!=0x5c) { 1496 /* needs fixup, fix it */ 1497 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1498 if(outUnicode[2]!=0x5c) { 1499 /* the fix failed */ 1500 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1501 return; 1502 } 1503 } 1504} 1505 1506static void TestAmbiguous() 1507{ 1508 UErrorCode status = U_ZERO_ERROR; 1509 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1510 static const char target[] = { 1511 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1512 0x5c, 0x75, 0x73, 0x72, 1513 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1514 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1515 0x5c, 0x64, 0x61, 0x74, 0x61, 1516 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1517 0 1518 }; 1519 UChar asciiResult[200], sjisResult[200]; 1520 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1521 const char *name; 1522 1523 /* enumerate all converters */ 1524 status=U_ZERO_ERROR; 1525 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1526 cnv=ucnv_open(name, &status); 1527 if(U_SUCCESS(status)) { 1528 TestAmbiguousConverter(cnv); 1529 ucnv_close(cnv); 1530 } else { 1531 log_err("error: unable to open available converter \"%s\"\n", name); 1532 status=U_ZERO_ERROR; 1533 } 1534 } 1535 1536#if !UCONFIG_NO_LEGACY_CONVERSION 1537 sjis_cnv = ucnv_open("ibm-943", &status); 1538 if (U_FAILURE(status)) 1539 { 1540 log_data_err("Failed to create a SJIS converter\n"); 1541 return; 1542 } 1543 ascii_cnv = ucnv_open("LATIN-1", &status); 1544 if (U_FAILURE(status)) 1545 { 1546 log_data_err("Failed to create a LATIN-1 converter\n"); 1547 ucnv_close(sjis_cnv); 1548 return; 1549 } 1550 /* convert target from SJIS to Unicode */ 1551 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1552 if (U_FAILURE(status)) 1553 { 1554 log_err("Failed to convert the SJIS string.\n"); 1555 ucnv_close(sjis_cnv); 1556 ucnv_close(ascii_cnv); 1557 return; 1558 } 1559 /* convert target from Latin-1 to Unicode */ 1560 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1561 if (U_FAILURE(status)) 1562 { 1563 log_err("Failed to convert the Latin-1 string.\n"); 1564 ucnv_close(sjis_cnv); 1565 ucnv_close(ascii_cnv); 1566 return; 1567 } 1568 if (!ucnv_isAmbiguous(sjis_cnv)) 1569 { 1570 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1571 ucnv_close(sjis_cnv); 1572 ucnv_close(ascii_cnv); 1573 return; 1574 } 1575 if (u_strcmp(sjisResult, asciiResult) == 0) 1576 { 1577 log_err("File separators for SJIS don't need to be fixed.\n"); 1578 } 1579 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1580 if (u_strcmp(sjisResult, asciiResult) != 0) 1581 { 1582 log_err("Fixing file separator for SJIS failed.\n"); 1583 } 1584 ucnv_close(sjis_cnv); 1585 ucnv_close(ascii_cnv); 1586#endif 1587} 1588 1589static void 1590TestSignatureDetection(){ 1591 /* with null terminated strings */ 1592 { 1593 static const char* data[] = { 1594 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1595 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1596 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1597 "\x0E\xFE\xFF\x00", /* SCSU */ 1598 1599 "\xFE\xFF", /* UTF-16BE */ 1600 "\xFF\xFE", /* UTF-16LE */ 1601 "\xEF\xBB\xBF", /* UTF-8 */ 1602 "\x0E\xFE\xFF", /* SCSU */ 1603 1604 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1605 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1606 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1607 "\x0E\xFE\xFF\x41", /* SCSU */ 1608 1609 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1610 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1611 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1612 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1613 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1614 1615 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1616 }; 1617 static const char* expected[] = { 1618 "UTF-16BE", 1619 "UTF-16LE", 1620 "UTF-8", 1621 "SCSU", 1622 1623 "UTF-16BE", 1624 "UTF-16LE", 1625 "UTF-8", 1626 "SCSU", 1627 1628 "UTF-16BE", 1629 "UTF-16LE", 1630 "UTF-8", 1631 "SCSU", 1632 1633 "UTF-7", 1634 "UTF-7", 1635 "UTF-7", 1636 "UTF-7", 1637 "UTF-7", 1638 "UTF-EBCDIC" 1639 }; 1640 static const int32_t expectedLength[] ={ 1641 2, 1642 2, 1643 3, 1644 3, 1645 1646 2, 1647 2, 1648 3, 1649 3, 1650 1651 2, 1652 2, 1653 3, 1654 3, 1655 1656 5, 1657 4, 1658 4, 1659 4, 1660 4, 1661 4 1662 }; 1663 int i=0; 1664 UErrorCode err; 1665 int32_t signatureLength = -1; 1666 const char* source = NULL; 1667 const char* enc = NULL; 1668 for( ; i<sizeof(data)/sizeof(char*); i++){ 1669 err = U_ZERO_ERROR; 1670 source = data[i]; 1671 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1672 if(U_FAILURE(err)){ 1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1674 continue; 1675 } 1676 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1677 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1678 continue; 1679 } 1680 if(signatureLength != expectedLength[i]){ 1681 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1682 } 1683 } 1684 } 1685 { 1686 static const char* data[] = { 1687 "\xFE\xFF\x00", /* UTF-16BE */ 1688 "\xFF\xFE\x00", /* UTF-16LE */ 1689 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1690 "\x0E\xFE\xFF\x00", /* SCSU */ 1691 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1692 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1693 "\xFE\xFF", /* UTF-16BE */ 1694 "\xFF\xFE", /* UTF-16LE */ 1695 "\xEF\xBB\xBF", /* UTF-8 */ 1696 "\x0E\xFE\xFF", /* SCSU */ 1697 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1698 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1699 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1700 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1701 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1702 "\x0E\xFE\xFF\x41", /* SCSU */ 1703 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1704 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1705 "\xFB\xEE\x28", /* BOCU-1 */ 1706 "\xFF\x41\x42" /* NULL */ 1707 }; 1708 static const int len[] = { 1709 3, 1710 3, 1711 4, 1712 4, 1713 4, 1714 4, 1715 2, 1716 2, 1717 3, 1718 3, 1719 4, 1720 4, 1721 4, 1722 4, 1723 4, 1724 4, 1725 5, 1726 5, 1727 3, 1728 3 1729 }; 1730 1731 static const char* expected[] = { 1732 "UTF-16BE", 1733 "UTF-16LE", 1734 "UTF-8", 1735 "SCSU", 1736 "UTF-32BE", 1737 "UTF-32LE", 1738 "UTF-16BE", 1739 "UTF-16LE", 1740 "UTF-8", 1741 "SCSU", 1742 "UTF-32BE", 1743 "UTF-32LE", 1744 "UTF-16BE", 1745 "UTF-16LE", 1746 "UTF-8", 1747 "SCSU", 1748 "UTF-32BE", 1749 "UTF-32LE", 1750 "BOCU-1", 1751 NULL 1752 }; 1753 static const int32_t expectedLength[] ={ 1754 2, 1755 2, 1756 3, 1757 3, 1758 4, 1759 4, 1760 2, 1761 2, 1762 3, 1763 3, 1764 4, 1765 4, 1766 2, 1767 2, 1768 3, 1769 3, 1770 4, 1771 4, 1772 3, 1773 0 1774 }; 1775 int i=0; 1776 UErrorCode err; 1777 int32_t signatureLength = -1; 1778 int32_t sourceLength=-1; 1779 const char* source = NULL; 1780 const char* enc = NULL; 1781 for( ; i<sizeof(data)/sizeof(char*); i++){ 1782 err = U_ZERO_ERROR; 1783 source = data[i]; 1784 sourceLength = len[i]; 1785 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1786 if(U_FAILURE(err)){ 1787 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1788 continue; 1789 } 1790 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1791 if(expected[i] !=NULL){ 1792 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1793 continue; 1794 } 1795 } 1796 if(signatureLength != expectedLength[i]){ 1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1798 } 1799 } 1800 } 1801} 1802 1803static void TestUTF7() { 1804 /* test input */ 1805 static const uint8_t in[]={ 1806 /* H - +Jjo- - ! +- +2AHcAQ */ 1807 0x48, 1808 0x2d, 1809 0x2b, 0x4a, 0x6a, 0x6f, 1810 0x2d, 0x2d, 1811 0x21, 1812 0x2b, 0x2d, 1813 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1814 }; 1815 1816 /* expected test results */ 1817 static const int32_t results[]={ 1818 /* number of bytes read, code point */ 1819 1, 0x48, 1820 1, 0x2d, 1821 4, 0x263a, /* <WHITE SMILING FACE> */ 1822 2, 0x2d, 1823 1, 0x21, 1824 2, 0x2b, 1825 7, 0x10401 1826 }; 1827 1828 const char *cnvName; 1829 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1830 UErrorCode errorCode=U_ZERO_ERROR; 1831 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1832 if(U_FAILURE(errorCode)) { 1833 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1834 return; 1835 } 1836 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1837 /* Test the condition when source >= sourceLimit */ 1838 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1839 cnvName = ucnv_getName(cnv, &errorCode); 1840 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1841 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1842 } 1843 ucnv_close(cnv); 1844} 1845 1846static void TestIMAP() { 1847 /* test input */ 1848 static const uint8_t in[]={ 1849 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1850 0x48, 1851 0x2d, 1852 0x26, 0x4a, 0x6a, 0x6f, 1853 0x2d, 0x2d, 1854 0x21, 1855 0x26, 0x2d, 1856 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1857 }; 1858 1859 /* expected test results */ 1860 static const int32_t results[]={ 1861 /* number of bytes read, code point */ 1862 1, 0x48, 1863 1, 0x2d, 1864 4, 0x263a, /* <WHITE SMILING FACE> */ 1865 2, 0x2d, 1866 1, 0x21, 1867 2, 0x26, 1868 7, 0x10401 1869 }; 1870 1871 const char *cnvName; 1872 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1873 UErrorCode errorCode=U_ZERO_ERROR; 1874 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1875 if(U_FAILURE(errorCode)) { 1876 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1877 return; 1878 } 1879 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1880 /* Test the condition when source >= sourceLimit */ 1881 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1882 cnvName = ucnv_getName(cnv, &errorCode); 1883 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1884 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1885 } 1886 ucnv_close(cnv); 1887} 1888 1889static void TestUTF8() { 1890 /* test input */ 1891 static const uint8_t in[]={ 1892 0x61, 1893 0xc2, 0x80, 1894 0xe0, 0xa0, 0x80, 1895 0xf0, 0x90, 0x80, 0x80, 1896 0xf4, 0x84, 0x8c, 0xa1, 1897 0xf0, 0x90, 0x90, 0x81 1898 }; 1899 1900 /* expected test results */ 1901 static const int32_t results[]={ 1902 /* number of bytes read, code point */ 1903 1, 0x61, 1904 2, 0x80, 1905 3, 0x800, 1906 4, 0x10000, 1907 4, 0x104321, 1908 4, 0x10401 1909 }; 1910 1911 /* error test input */ 1912 static const uint8_t in2[]={ 1913 0x61, 1914 0xc0, 0x80, /* illegal non-shortest form */ 1915 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1916 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1917 0xc0, 0xc0, /* illegal trail byte */ 1918 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1919 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1920 0xfe, /* illegal byte altogether */ 1921 0x62 1922 }; 1923 1924 /* expected error test results */ 1925 static const int32_t results2[]={ 1926 /* number of bytes read, code point */ 1927 1, 0x61, 1928 22, 0x62 1929 }; 1930 1931 UConverterToUCallback cb; 1932 const void *p; 1933 1934 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1935 UErrorCode errorCode=U_ZERO_ERROR; 1936 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1937 if(U_FAILURE(errorCode)) { 1938 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1939 return; 1940 } 1941 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1942 /* Test the condition when source >= sourceLimit */ 1943 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1944 1945 /* test error behavior with a skip callback */ 1946 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1947 source=(const char *)in2; 1948 limit=(const char *)(in2+sizeof(in2)); 1949 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1950 1951 ucnv_close(cnv); 1952} 1953 1954static void TestCESU8() { 1955 /* test input */ 1956 static const uint8_t in[]={ 1957 0x61, 1958 0xc2, 0x80, 1959 0xe0, 0xa0, 0x80, 1960 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1961 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1962 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1963 0xef, 0xbf, 0xbc 1964 }; 1965 1966 /* expected test results */ 1967 static const int32_t results[]={ 1968 /* number of bytes read, code point */ 1969 1, 0x61, 1970 2, 0x80, 1971 3, 0x800, 1972 6, 0x10000, 1973 3, 0xdc01, 1974 -1,0xd802, /* may read 3 or 6 bytes */ 1975 -1,0x10ffff,/* may read 0 or 3 bytes */ 1976 3, 0xfffc 1977 }; 1978 1979 /* error test input */ 1980 static const uint8_t in2[]={ 1981 0x61, 1982 0xc0, 0x80, /* illegal non-shortest form */ 1983 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1984 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1985 0xc0, 0xc0, /* illegal trail byte */ 1986 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 1987 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 1988 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 1989 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1990 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1991 0xfe, /* illegal byte altogether */ 1992 0x62 1993 }; 1994 1995 /* expected error test results */ 1996 static const int32_t results2[]={ 1997 /* number of bytes read, code point */ 1998 1, 0x61, 1999 34, 0x62 2000 }; 2001 2002 UConverterToUCallback cb; 2003 const void *p; 2004 2005 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2006 UErrorCode errorCode=U_ZERO_ERROR; 2007 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2008 if(U_FAILURE(errorCode)) { 2009 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2010 return; 2011 } 2012 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2013 /* Test the condition when source >= sourceLimit */ 2014 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2015 2016 /* test error behavior with a skip callback */ 2017 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2018 source=(const char *)in2; 2019 limit=(const char *)(in2+sizeof(in2)); 2020 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2021 2022 ucnv_close(cnv); 2023} 2024 2025static void TestUTF16() { 2026 /* test input */ 2027 static const uint8_t in1[]={ 2028 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2029 }; 2030 static const uint8_t in2[]={ 2031 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2032 }; 2033 static const uint8_t in3[]={ 2034 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2035 }; 2036 2037 /* expected test results */ 2038 static const int32_t results1[]={ 2039 /* number of bytes read, code point */ 2040 4, 0x4e00, 2041 2, 0xfeff 2042 }; 2043 static const int32_t results2[]={ 2044 /* number of bytes read, code point */ 2045 4, 0x004e, 2046 2, 0xfffe 2047 }; 2048 static const int32_t results3[]={ 2049 /* number of bytes read, code point */ 2050 2, 0xfefe, 2051 2, 0x4e00, 2052 2, 0xfeff, 2053 4, 0x20001 2054 }; 2055 2056 const char *source, *limit; 2057 2058 UErrorCode errorCode=U_ZERO_ERROR; 2059 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2060 if(U_FAILURE(errorCode)) { 2061 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2062 return; 2063 } 2064 2065 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2066 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2067 2068 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2069 ucnv_resetToUnicode(cnv); 2070 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2071 2072 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2073 ucnv_resetToUnicode(cnv); 2074 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2075 2076 /* Test the condition when source >= sourceLimit */ 2077 ucnv_resetToUnicode(cnv); 2078 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2079 2080 ucnv_close(cnv); 2081} 2082 2083static void TestUTF16BE() { 2084 /* test input */ 2085 static const uint8_t in[]={ 2086 0x00, 0x61, 2087 0x00, 0xc0, 2088 0x00, 0x31, 2089 0x00, 0xf4, 2090 0xce, 0xfe, 2091 0xd8, 0x01, 0xdc, 0x01 2092 }; 2093 2094 /* expected test results */ 2095 static const int32_t results[]={ 2096 /* number of bytes read, code point */ 2097 2, 0x61, 2098 2, 0xc0, 2099 2, 0x31, 2100 2, 0xf4, 2101 2, 0xcefe, 2102 4, 0x10401 2103 }; 2104 2105 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2106 UErrorCode errorCode=U_ZERO_ERROR; 2107 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2108 if(U_FAILURE(errorCode)) { 2109 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2110 return; 2111 } 2112 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2113 /* Test the condition when source >= sourceLimit */ 2114 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2115 /*Test for the condition where there is an invalid character*/ 2116 { 2117 static const uint8_t source2[]={0x61}; 2118 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2119 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2120 } 2121#if 0 2122 /* 2123 * Test disabled because currently the UTF-16BE/LE converters are supposed 2124 * to not set errors for unpaired surrogates. 2125 * This may change with 2126 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2127 */ 2128 2129 /*Test for the condition where there is a surrogate pair*/ 2130 { 2131 const uint8_t source2[]={0xd8, 0x01}; 2132 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2133 } 2134#endif 2135 ucnv_close(cnv); 2136} 2137 2138static void 2139TestUTF16LE() { 2140 /* test input */ 2141 static const uint8_t in[]={ 2142 0x61, 0x00, 2143 0x31, 0x00, 2144 0x4e, 0x2e, 2145 0x4e, 0x00, 2146 0x01, 0xd8, 0x01, 0xdc 2147 }; 2148 2149 /* expected test results */ 2150 static const int32_t results[]={ 2151 /* number of bytes read, code point */ 2152 2, 0x61, 2153 2, 0x31, 2154 2, 0x2e4e, 2155 2, 0x4e, 2156 4, 0x10401 2157 }; 2158 2159 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2160 UErrorCode errorCode=U_ZERO_ERROR; 2161 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2162 if(U_FAILURE(errorCode)) { 2163 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2164 return; 2165 } 2166 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2167 /* Test the condition when source >= sourceLimit */ 2168 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2169 /*Test for the condition where there is an invalid character*/ 2170 { 2171 static const uint8_t source2[]={0x61}; 2172 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2173 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2174 } 2175#if 0 2176 /* 2177 * Test disabled because currently the UTF-16BE/LE converters are supposed 2178 * to not set errors for unpaired surrogates. 2179 * This may change with 2180 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2181 */ 2182 2183 /*Test for the condition where there is a surrogate character*/ 2184 { 2185 static const uint8_t source2[]={0x01, 0xd8}; 2186 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2187 } 2188#endif 2189 2190 ucnv_close(cnv); 2191} 2192 2193static void TestUTF32() { 2194 /* test input */ 2195 static const uint8_t in1[]={ 2196 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2197 }; 2198 static const uint8_t in2[]={ 2199 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2200 }; 2201 static const uint8_t in3[]={ 2202 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2203 }; 2204 2205 /* expected test results */ 2206 static const int32_t results1[]={ 2207 /* number of bytes read, code point */ 2208 8, 0x100f00, 2209 4, 0xfeff 2210 }; 2211 static const int32_t results2[]={ 2212 /* number of bytes read, code point */ 2213 8, 0x0f1000, 2214 4, 0xfffe 2215 }; 2216 static const int32_t results3[]={ 2217 /* number of bytes read, code point */ 2218 4, 0xfefe, 2219 4, 0x100f00, 2220 4, 0xfffd, /* unmatched surrogate */ 2221 4, 0xfffd /* unmatched surrogate */ 2222 }; 2223 2224 const char *source, *limit; 2225 2226 UErrorCode errorCode=U_ZERO_ERROR; 2227 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2228 if(U_FAILURE(errorCode)) { 2229 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2230 return; 2231 } 2232 2233 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2234 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2235 2236 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2237 ucnv_resetToUnicode(cnv); 2238 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2239 2240 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2241 ucnv_resetToUnicode(cnv); 2242 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2243 2244 /* Test the condition when source >= sourceLimit */ 2245 ucnv_resetToUnicode(cnv); 2246 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2247 2248 ucnv_close(cnv); 2249} 2250 2251static void 2252TestUTF32BE() { 2253 /* test input */ 2254 static const uint8_t in[]={ 2255 0x00, 0x00, 0x00, 0x61, 2256 0x00, 0x00, 0x30, 0x61, 2257 0x00, 0x00, 0xdc, 0x00, 2258 0x00, 0x00, 0xd8, 0x00, 2259 0x00, 0x00, 0xdf, 0xff, 2260 0x00, 0x00, 0xff, 0xfe, 2261 0x00, 0x10, 0xab, 0xcd, 2262 0x00, 0x10, 0xff, 0xff 2263 }; 2264 2265 /* expected test results */ 2266 static const int32_t results[]={ 2267 /* number of bytes read, code point */ 2268 4, 0x61, 2269 4, 0x3061, 2270 4, 0xfffd, 2271 4, 0xfffd, 2272 4, 0xfffd, 2273 4, 0xfffe, 2274 4, 0x10abcd, 2275 4, 0x10ffff 2276 }; 2277 2278 /* error test input */ 2279 static const uint8_t in2[]={ 2280 0x00, 0x00, 0x00, 0x61, 2281 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2282 0x00, 0x00, 0x00, 0x62, 2283 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2284 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2285 0x00, 0x00, 0x01, 0x62, 2286 0x00, 0x00, 0x02, 0x62 2287 }; 2288 2289 /* expected error test results */ 2290 static const int32_t results2[]={ 2291 /* number of bytes read, code point */ 2292 4, 0x61, 2293 8, 0x62, 2294 12, 0x162, 2295 4, 0x262 2296 }; 2297 2298 UConverterToUCallback cb; 2299 const void *p; 2300 2301 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2302 UErrorCode errorCode=U_ZERO_ERROR; 2303 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2304 if(U_FAILURE(errorCode)) { 2305 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2306 return; 2307 } 2308 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2309 2310 /* Test the condition when source >= sourceLimit */ 2311 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2312 2313 /* test error behavior with a skip callback */ 2314 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2315 source=(const char *)in2; 2316 limit=(const char *)(in2+sizeof(in2)); 2317 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2318 2319 ucnv_close(cnv); 2320} 2321 2322static void 2323TestUTF32LE() { 2324 /* test input */ 2325 static const uint8_t in[]={ 2326 0x61, 0x00, 0x00, 0x00, 2327 0x61, 0x30, 0x00, 0x00, 2328 0x00, 0xdc, 0x00, 0x00, 2329 0x00, 0xd8, 0x00, 0x00, 2330 0xff, 0xdf, 0x00, 0x00, 2331 0xfe, 0xff, 0x00, 0x00, 2332 0xcd, 0xab, 0x10, 0x00, 2333 0xff, 0xff, 0x10, 0x00 2334 }; 2335 2336 /* expected test results */ 2337 static const int32_t results[]={ 2338 /* number of bytes read, code point */ 2339 4, 0x61, 2340 4, 0x3061, 2341 4, 0xfffd, 2342 4, 0xfffd, 2343 4, 0xfffd, 2344 4, 0xfffe, 2345 4, 0x10abcd, 2346 4, 0x10ffff 2347 }; 2348 2349 /* error test input */ 2350 static const uint8_t in2[]={ 2351 0x61, 0x00, 0x00, 0x00, 2352 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2353 0x62, 0x00, 0x00, 0x00, 2354 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2355 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2356 0x62, 0x01, 0x00, 0x00, 2357 0x62, 0x02, 0x00, 0x00, 2358 }; 2359 2360 /* expected error test results */ 2361 static const int32_t results2[]={ 2362 /* number of bytes read, code point */ 2363 4, 0x61, 2364 8, 0x62, 2365 12, 0x162, 2366 4, 0x262, 2367 }; 2368 2369 UConverterToUCallback cb; 2370 const void *p; 2371 2372 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2373 UErrorCode errorCode=U_ZERO_ERROR; 2374 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2375 if(U_FAILURE(errorCode)) { 2376 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2377 return; 2378 } 2379 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2380 2381 /* Test the condition when source >= sourceLimit */ 2382 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2383 2384 /* test error behavior with a skip callback */ 2385 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2386 source=(const char *)in2; 2387 limit=(const char *)(in2+sizeof(in2)); 2388 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2389 2390 ucnv_close(cnv); 2391} 2392 2393static void 2394TestLATIN1() { 2395 /* test input */ 2396 static const uint8_t in[]={ 2397 0x61, 2398 0x31, 2399 0x32, 2400 0xc0, 2401 0xf0, 2402 0xf4, 2403 }; 2404 2405 /* expected test results */ 2406 static const int32_t results[]={ 2407 /* number of bytes read, code point */ 2408 1, 0x61, 2409 1, 0x31, 2410 1, 0x32, 2411 1, 0xc0, 2412 1, 0xf0, 2413 1, 0xf4, 2414 }; 2415 static const uint16_t in1[] = { 2416 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2417 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2418 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2419 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2420 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2421 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2422 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2423 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2424 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2425 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2426 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2427 0xcb, 0x82 2428 }; 2429 static const uint8_t out1[] = { 2430 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2431 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2432 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2433 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2434 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2435 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2436 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2437 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2438 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2439 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2440 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2441 0xcb, 0x82 2442 }; 2443 static const uint16_t in2[]={ 2444 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2445 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2446 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2447 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2448 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2449 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2450 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2451 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2452 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2453 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2454 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2455 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2456 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2457 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2458 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2459 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2460 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2461 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2462 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2463 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2464 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2465 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2466 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2467 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2468 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2469 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2470 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2471 0x37, 0x20, 0x2A, 0x2F, 2472 }; 2473 static const unsigned char out2[]={ 2474 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2475 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2476 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2477 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2478 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2479 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2480 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2481 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2482 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2483 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2484 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2485 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2486 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2487 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2488 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2489 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2490 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2491 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2492 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2493 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2494 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2495 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2496 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2497 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2498 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2499 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2500 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2501 0x37, 0x20, 0x2A, 0x2F, 2502 }; 2503 const char *source=(const char *)in; 2504 const char *limit=(const char *)in+sizeof(in); 2505 2506 UErrorCode errorCode=U_ZERO_ERROR; 2507 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2508 if(U_FAILURE(errorCode)) { 2509 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2510 return; 2511 } 2512 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2513 /* Test the condition when source >= sourceLimit */ 2514 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2515 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2516 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2517 2518 ucnv_close(cnv); 2519} 2520 2521static void 2522TestSBCS() { 2523 /* test input */ 2524 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2525 /* expected test results */ 2526 static const int32_t results[]={ 2527 /* number of bytes read, code point */ 2528 1, 0x61, 2529 1, 0xbf, 2530 1, 0xc4, 2531 1, 0x2021, 2532 1, 0xf8ff, 2533 1, 0x00d9 2534 }; 2535 2536 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2537 UErrorCode errorCode=U_ZERO_ERROR; 2538 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2539 if(U_FAILURE(errorCode)) { 2540 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2541 return; 2542 } 2543 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2544 /* Test the condition when source >= sourceLimit */ 2545 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2546 /*Test for Illegal character */ /* 2547 { 2548 static const uint8_t input1[]={ 0xA1 }; 2549 const char* illegalsource=(const char*)input1; 2550 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2551 } 2552 */ 2553 ucnv_close(cnv); 2554} 2555 2556static void 2557TestDBCS() { 2558 /* test input */ 2559 static const uint8_t in[]={ 2560 0x44, 0x6a, 2561 0xc4, 0x9c, 2562 0x7a, 0x74, 2563 0x46, 0xab, 2564 0x42, 0x5b, 2565 2566 }; 2567 2568 /* expected test results */ 2569 static const int32_t results[]={ 2570 /* number of bytes read, code point */ 2571 2, 0x00a7, 2572 2, 0xe1d2, 2573 2, 0x6962, 2574 2, 0xf842, 2575 2, 0xffe5, 2576 }; 2577 2578 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2579 UErrorCode errorCode=U_ZERO_ERROR; 2580 2581 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2582 if(U_FAILURE(errorCode)) { 2583 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2584 return; 2585 } 2586 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2587 /* Test the condition when source >= sourceLimit */ 2588 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2589 /*Test for the condition where there is an invalid character*/ 2590 { 2591 static const uint8_t source2[]={0x1a, 0x1b}; 2592 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2593 } 2594 /*Test for the condition where we have a truncated char*/ 2595 { 2596 static const uint8_t source1[]={0xc4}; 2597 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2598 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2599 } 2600 ucnv_close(cnv); 2601} 2602 2603static void 2604TestMBCS() { 2605 /* test input */ 2606 static const uint8_t in[]={ 2607 0x01, 2608 0xa6, 0xa3, 2609 0x00, 2610 0xa6, 0xa1, 2611 0x08, 2612 0xc2, 0x76, 2613 0xc2, 0x78, 2614 2615 }; 2616 2617 /* expected test results */ 2618 static const int32_t results[]={ 2619 /* number of bytes read, code point */ 2620 1, 0x0001, 2621 2, 0x250c, 2622 1, 0x0000, 2623 2, 0x2500, 2624 1, 0x0008, 2625 2, 0xd60c, 2626 2, 0xd60e, 2627 }; 2628 2629 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2630 UErrorCode errorCode=U_ZERO_ERROR; 2631 2632 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2633 if(U_FAILURE(errorCode)) { 2634 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2635 return; 2636 } 2637 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2638 /* Test the condition when source >= sourceLimit */ 2639 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2640 /*Test for the condition where there is an invalid character*/ 2641 { 2642 static const uint8_t source2[]={0xa1, 0x80}; 2643 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2644 } 2645 /*Test for the condition where we have a truncated char*/ 2646 { 2647 static const uint8_t source1[]={0xc4}; 2648 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2649 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2650 } 2651 ucnv_close(cnv); 2652 2653} 2654 2655#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2656static void 2657TestICCRunout() { 2658/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2659 2660 const char *cnvName = "ibm-1363"; 2661 UErrorCode status = U_ZERO_ERROR; 2662 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2663 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2664 const char *source = sourceData; 2665 const char *sourceLim = sourceData+sizeof(sourceData); 2666 UChar c1, c2, c3; 2667 UConverter *cnv=ucnv_open(cnvName, &status); 2668 if(U_FAILURE(status)) { 2669 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2670 return; 2671 } 2672 2673#if 0 2674 { 2675 UChar targetBuf[256]; 2676 UChar *target = targetBuf; 2677 UChar *targetLim = target+256; 2678 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2679 2680 log_info("After convert: target@%d, source@%d, status%s\n", 2681 target-targetBuf, source-sourceData, u_errorName(status)); 2682 2683 if(U_FAILURE(status)) { 2684 log_err("Failed to convert: %s\n", u_errorName(status)); 2685 } else { 2686 2687 } 2688 } 2689#endif 2690 2691 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2692 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2693 2694 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2695 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2696 2697 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2698 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2699 2700 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2701 log_verbose("OK\n"); 2702 } else { 2703 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2704 } 2705 2706 ucnv_close(cnv); 2707 2708} 2709#endif 2710 2711#ifdef U_ENABLE_GENERIC_ISO_2022 2712 2713static void 2714TestISO_2022() { 2715 /* test input */ 2716 static const uint8_t in[]={ 2717 0x1b, 0x25, 0x42, 2718 0x31, 2719 0x32, 2720 0x61, 2721 0xc2, 0x80, 2722 0xe0, 0xa0, 0x80, 2723 0xf0, 0x90, 0x80, 0x80 2724 }; 2725 2726 2727 2728 /* expected test results */ 2729 static const int32_t results[]={ 2730 /* number of bytes read, code point */ 2731 4, 0x0031, /* 4 bytes including the escape sequence */ 2732 1, 0x0032, 2733 1, 0x61, 2734 2, 0x80, 2735 3, 0x800, 2736 4, 0x10000 2737 }; 2738 2739 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2740 UErrorCode errorCode=U_ZERO_ERROR; 2741 UConverter *cnv; 2742 2743 cnv=ucnv_open("ISO_2022", &errorCode); 2744 if(U_FAILURE(errorCode)) { 2745 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2746 return; 2747 } 2748 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2749 2750 /* Test the condition when source >= sourceLimit */ 2751 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2752 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2753 /*Test for the condition where we have a truncated char*/ 2754 { 2755 static const uint8_t source1[]={0xc4}; 2756 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2757 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2758 } 2759 /*Test for the condition where there is an invalid character*/ 2760 { 2761 static const uint8_t source2[]={0xa1, 0x01}; 2762 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2763 } 2764 ucnv_close(cnv); 2765} 2766 2767#endif 2768 2769static void 2770TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2771 const UChar* uSource; 2772 const UChar* uSourceLimit; 2773 const char* cSource; 2774 const char* cSourceLimit; 2775 UChar *uTargetLimit =NULL; 2776 UChar *uTarget; 2777 char *cTarget; 2778 const char *cTargetLimit; 2779 char *cBuf; 2780 UChar *uBuf; /*,*test;*/ 2781 int32_t uBufSize = 120; 2782 int len=0; 2783 int i=2; 2784 UErrorCode errorCode=U_ZERO_ERROR; 2785 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2786 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2787 ucnv_reset(cnv); 2788 for(;--i>0; ){ 2789 uSource = (UChar*) source; 2790 uSourceLimit=(const UChar*)sourceLimit; 2791 cTarget = cBuf; 2792 uTarget = uBuf; 2793 cSource = cBuf; 2794 cTargetLimit = cBuf; 2795 uTargetLimit = uBuf; 2796 2797 do{ 2798 2799 cTargetLimit = cTargetLimit+ i; 2800 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2801 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2802 errorCode=U_ZERO_ERROR; 2803 continue; 2804 } 2805 2806 if(U_FAILURE(errorCode)){ 2807 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2808 return; 2809 } 2810 2811 }while (uSource<uSourceLimit); 2812 2813 cSourceLimit =cTarget; 2814 do{ 2815 uTargetLimit=uTargetLimit+i; 2816 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2817 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2818 errorCode=U_ZERO_ERROR; 2819 continue; 2820 } 2821 if(U_FAILURE(errorCode)){ 2822 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2823 return; 2824 } 2825 }while(cSource<cSourceLimit); 2826 2827 uSource = source; 2828 /*test =uBuf;*/ 2829 for(len=0;len<(int)(source - sourceLimit);len++){ 2830 if(uBuf[len]!=uSource[len]){ 2831 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2832 } 2833 } 2834 } 2835 free(uBuf); 2836 free(cBuf); 2837} 2838/* Test for Jitterbug 778 */ 2839static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2840 const UChar* uSource; 2841 const UChar* uSourceLimit; 2842 const char* cSource; 2843 UChar *uTargetLimit =NULL; 2844 UChar *uTarget; 2845 char *cTarget; 2846 const char *cTargetLimit; 2847 char *cBuf; 2848 UChar *uBuf,*test; 2849 int32_t uBufSize = 120; 2850 int numCharsInTarget=0; 2851 UErrorCode errorCode=U_ZERO_ERROR; 2852 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2853 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2854 uSource = source; 2855 uSourceLimit=sourceLimit; 2856 cTarget = cBuf; 2857 cTargetLimit = cBuf +uBufSize*5; 2858 uTarget = uBuf; 2859 uTargetLimit = uBuf+ uBufSize*5; 2860 ucnv_reset(cnv); 2861 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2862 if(U_FAILURE(errorCode)){ 2863 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2864 return; 2865 } 2866 cSource = cBuf; 2867 test =uBuf; 2868 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2869 if(U_FAILURE(errorCode)){ 2870 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2871 return; 2872 } 2873 uSource = source; 2874 while(uSource<uSourceLimit){ 2875 if(*test!=*uSource){ 2876 2877 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2878 } 2879 uSource++; 2880 test++; 2881 } 2882 free(uBuf); 2883 free(cBuf); 2884} 2885 2886static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2887 const UChar* uSource; 2888 const UChar* uSourceLimit; 2889 const char* cSource; 2890 const char* cSourceLimit; 2891 UChar *uTargetLimit =NULL; 2892 UChar *uTarget; 2893 char *cTarget; 2894 const char *cTargetLimit; 2895 char *cBuf; 2896 UChar *uBuf; /*,*test;*/ 2897 int32_t uBufSize = 120; 2898 int len=0; 2899 int i=2; 2900 const UChar *temp = sourceLimit; 2901 UErrorCode errorCode=U_ZERO_ERROR; 2902 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2903 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2904 2905 ucnv_reset(cnv); 2906 for(;--i>0;){ 2907 uSource = (UChar*) source; 2908 cTarget = cBuf; 2909 uTarget = uBuf; 2910 cSource = cBuf; 2911 cTargetLimit = cBuf; 2912 uTargetLimit = uBuf+uBufSize*5; 2913 cTargetLimit = cTargetLimit+uBufSize*10; 2914 uSourceLimit=uSource; 2915 do{ 2916 2917 if (uSourceLimit < sourceLimit) { 2918 uSourceLimit = uSourceLimit+1; 2919 } 2920 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2921 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2922 errorCode=U_ZERO_ERROR; 2923 continue; 2924 } 2925 2926 if(U_FAILURE(errorCode)){ 2927 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2928 return; 2929 } 2930 2931 }while (uSource<temp); 2932 2933 cSourceLimit =cBuf; 2934 do{ 2935 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2936 cSourceLimit = cSourceLimit+1; 2937 } 2938 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2939 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2940 errorCode=U_ZERO_ERROR; 2941 continue; 2942 } 2943 if(U_FAILURE(errorCode)){ 2944 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2945 return; 2946 } 2947 }while(cSource<cTarget); 2948 2949 uSource = source; 2950 /*test =uBuf;*/ 2951 for(;len<(int)(source - sourceLimit);len++){ 2952 if(uBuf[len]!=uSource[len]){ 2953 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2954 } 2955 } 2956 } 2957 free(uBuf); 2958 free(cBuf); 2959} 2960static void 2961TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2962 const uint16_t results[], const char* message){ 2963/* const char* s0; */ 2964 const char* s=(char*)source; 2965 const uint16_t *r=results; 2966 UErrorCode errorCode=U_ZERO_ERROR; 2967 uint32_t c,exC; 2968 ucnv_reset(cnv); 2969 while(s<limit) { 2970 /* s0=s; */ 2971 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2972 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2973 break; /* no more significant input */ 2974 } else if(U_FAILURE(errorCode)) { 2975 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2976 break; 2977 } else { 2978 if(U16_IS_LEAD(*r)){ 2979 int i =0, len = 2; 2980 U16_NEXT(r, i, len, exC); 2981 r++; 2982 }else{ 2983 exC = *r; 2984 } 2985 if(c!=(uint32_t)(exC)) 2986 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 2987 } 2988 r++; 2989 } 2990} 2991 2992static int TestJitterbug930(const char* enc){ 2993 UErrorCode err = U_ZERO_ERROR; 2994 UConverter*converter; 2995 char out[80]; 2996 char*target = out; 2997 UChar in[4]; 2998 const UChar*source = in; 2999 int32_t off[80]; 3000 int32_t* offsets = off; 3001 int numOffWritten=0; 3002 UBool flush = 0; 3003 converter = my_ucnv_open(enc, &err); 3004 3005 in[0] = 0x41; /* 0x4E00;*/ 3006 in[1] = 0x4E01; 3007 in[2] = 0x4E02; 3008 in[3] = 0x4E03; 3009 3010 memset(off, '*', sizeof(off)); 3011 3012 ucnv_fromUnicode (converter, 3013 &target, 3014 target+2, 3015 &source, 3016 source+3, 3017 offsets, 3018 flush, 3019 &err); 3020 3021 /* writes three bytes into the output buffer: 41 1B 24 3022 * but offsets contains 0 1 1 3023 */ 3024 while(*offsets< off[10]){ 3025 numOffWritten++; 3026 offsets++; 3027 } 3028 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3029 if(numOffWritten!= (int)(target-out)){ 3030 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3031 } 3032 3033 err = U_ZERO_ERROR; 3034 3035 memset(off,'*' , sizeof(off)); 3036 3037 flush = 1; 3038 offsets=off; 3039 ucnv_fromUnicode (converter, 3040 &target, 3041 target+4, 3042 &source, 3043 source, 3044 offsets, 3045 flush, 3046 &err); 3047 numOffWritten=0; 3048 while(*offsets< off[10]){ 3049 numOffWritten++; 3050 if(*offsets!= -1){ 3051 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3052 } 3053 offsets++; 3054 } 3055 3056 /* writes 42 43 7A into output buffer, 3057 * offsets contains -1 -1 -1 3058 */ 3059 ucnv_close(converter); 3060 return 0; 3061} 3062 3063static void 3064TestHZ() { 3065 /* test input */ 3066 static const uint16_t in[]={ 3067 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3068 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3069 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3070 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3071 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3072 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3073 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3074 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3075 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3076 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3077 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3078 0x005A, 0x005B, 0x005C, 0x000A 3079 }; 3080 const UChar* uSource; 3081 const UChar* uSourceLimit; 3082 const char* cSource; 3083 const char* cSourceLimit; 3084 UChar *uTargetLimit =NULL; 3085 UChar *uTarget; 3086 char *cTarget; 3087 const char *cTargetLimit; 3088 char *cBuf; 3089 UChar *uBuf,*test; 3090 int32_t uBufSize = 120; 3091 UErrorCode errorCode=U_ZERO_ERROR; 3092 UConverter *cnv; 3093 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3094 int32_t* myOff= offsets; 3095 cnv=ucnv_open("HZ", &errorCode); 3096 if(U_FAILURE(errorCode)) { 3097 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3098 return; 3099 } 3100 3101 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3102 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3103 uSource = (const UChar*)in; 3104 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3105 cTarget = cBuf; 3106 cTargetLimit = cBuf +uBufSize*5; 3107 uTarget = uBuf; 3108 uTargetLimit = uBuf+ uBufSize*5; 3109 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3110 if(U_FAILURE(errorCode)){ 3111 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3112 return; 3113 } 3114 cSource = cBuf; 3115 cSourceLimit =cTarget; 3116 test =uBuf; 3117 myOff=offsets; 3118 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3119 if(U_FAILURE(errorCode)){ 3120 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3121 return; 3122 } 3123 uSource = (const UChar*)in; 3124 while(uSource<uSourceLimit){ 3125 if(*test!=*uSource){ 3126 3127 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3128 } 3129 uSource++; 3130 test++; 3131 } 3132 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3133 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3134 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3135 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3136 TestJitterbug930("csISO2022JP"); 3137 ucnv_close(cnv); 3138 free(offsets); 3139 free(uBuf); 3140 free(cBuf); 3141} 3142 3143static void 3144TestISCII(){ 3145 /* test input */ 3146 static const uint16_t in[]={ 3147 /* test full range of Devanagari */ 3148 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3149 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3150 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3151 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3152 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3153 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3154 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3155 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3156 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3157 0x096D,0x096E,0x096F, 3158 /* test Soft halant*/ 3159 0x0915,0x094d, 0x200D, 3160 /* test explicit halant */ 3161 0x0915,0x094d, 0x200c, 3162 /* test double danda */ 3163 0x965, 3164 /* test ASCII */ 3165 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3166 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3167 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3168 /* tests from Lotus */ 3169 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3170 0x0930,0x094D,0x200D, 3171 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3172 0x0915,0x0921,0x002B,0x095F, 3173 /* tamil range */ 3174 0x0B86, 0xB87, 0xB88, 3175 /* telugu range */ 3176 0x0C05, 0x0C02, 0x0C03,0x0c31, 3177 /* kannada range */ 3178 0x0C85, 0xC82, 0x0C83, 3179 /* test Abbr sign and Anudatta */ 3180 0x0970, 0x952, 3181 /* 0x0958, 3182 0x0959, 3183 0x095A, 3184 0x095B, 3185 0x095C, 3186 0x095D, 3187 0x095E, 3188 0x095F,*/ 3189 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3190 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3191 0x090C , 3192 0x0962, 3193 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3194 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3195 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3196 0x093D /* Avagraha 0xEA, 0xE9*/, 3197 0x0958, 3198 0x0959, 3199 0x095A, 3200 0x095B, 3201 0x095C, 3202 0x095D, 3203 0x095E, 3204 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3205 }; 3206 static const unsigned char byteArr[]={ 3207 3208 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3209 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3210 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3211 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3212 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3213 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3214 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3215 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3216 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3217 0xf8,0xf9,0xfa, 3218 /* test soft halant */ 3219 0xb3, 0xE8, 0xE9, 3220 /* test explicit halant */ 3221 0xb3, 0xE8, 0xE8, 3222 /* test double danda */ 3223 0xea, 0xea, 3224 /* test ASCII */ 3225 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3226 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3227 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3228 /* test ATR code */ 3229 3230 /* tests from Lotus */ 3231 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3232 0xEF,0x42,0xCF,0xE8,0xD9, 3233 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3234 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3235 /* tamil range */ 3236 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3237 /* telugu range */ 3238 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3239 /* kannada range */ 3240 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3241 /* anudatta and abbreviation sign */ 3242 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3243 3244 3245 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3246 3247 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3248 3249 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3250 3251 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3252 3253 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3254 3255 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3256 3257 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3258 3259 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3260 3261 0xB3, 0xE9, /* Ka + NUKTA */ 3262 3263 0xB4, 0xE9, /* Kha + NUKTA */ 3264 3265 0xB5, 0xE9, /* Ga + NUKTA */ 3266 3267 0xBA, 0xE9, 3268 3269 0xBF, 0xE9, 3270 3271 0xC0, 0xE9, 3272 3273 0xC9, 0xE9, 3274 /* INV halant RA */ 3275 0xD9, 0xE8, 0xCF, 3276 0x00, 0x00A0, 3277 /* just consume unhandled codepoints */ 3278 0xEF, 0x30, 3279 3280 }; 3281 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3282 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3283 3284} 3285 3286static void 3287TestISO_2022_JP() { 3288 /* test input */ 3289 static const uint16_t in[]={ 3290 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3291 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3292 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3293 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3294 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3295 0x201D, 0x3014, 0x000D, 0x000A, 3296 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3297 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3298 }; 3299 const UChar* uSource; 3300 const UChar* uSourceLimit; 3301 const char* cSource; 3302 const char* cSourceLimit; 3303 UChar *uTargetLimit =NULL; 3304 UChar *uTarget; 3305 char *cTarget; 3306 const char *cTargetLimit; 3307 char *cBuf; 3308 UChar *uBuf,*test; 3309 int32_t uBufSize = 120; 3310 UErrorCode errorCode=U_ZERO_ERROR; 3311 UConverter *cnv; 3312 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3313 int32_t* myOff= offsets; 3314 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3315 if(U_FAILURE(errorCode)) { 3316 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3317 return; 3318 } 3319 3320 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3321 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3322 uSource = (const UChar*)in; 3323 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3324 cTarget = cBuf; 3325 cTargetLimit = cBuf +uBufSize*5; 3326 uTarget = uBuf; 3327 uTargetLimit = uBuf+ uBufSize*5; 3328 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3329 if(U_FAILURE(errorCode)){ 3330 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3331 return; 3332 } 3333 cSource = cBuf; 3334 cSourceLimit =cTarget; 3335 test =uBuf; 3336 myOff=offsets; 3337 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3338 if(U_FAILURE(errorCode)){ 3339 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3340 return; 3341 } 3342 3343 uSource = (const UChar*)in; 3344 while(uSource<uSourceLimit){ 3345 if(*test!=*uSource){ 3346 3347 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3348 } 3349 uSource++; 3350 test++; 3351 } 3352 3353 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3354 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3355 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3356 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3357 TestJitterbug930("csISO2022JP"); 3358 ucnv_close(cnv); 3359 free(uBuf); 3360 free(cBuf); 3361 free(offsets); 3362} 3363 3364static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3365 const UChar* uSource; 3366 const UChar* uSourceLimit; 3367 const char* cSource; 3368 const char* cSourceLimit; 3369 UChar *uTargetLimit =NULL; 3370 UChar *uTarget; 3371 char *cTarget; 3372 const char *cTargetLimit; 3373 char *cBuf; 3374 UChar *uBuf,*test; 3375 int32_t uBufSize = 120*10; 3376 UErrorCode errorCode=U_ZERO_ERROR; 3377 UConverter *cnv; 3378 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3379 int32_t* myOff= offsets; 3380 cnv=my_ucnv_open(conv, &errorCode); 3381 if(U_FAILURE(errorCode)) { 3382 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3383 return; 3384 } 3385 3386 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3387 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3388 uSource = (const UChar*)in; 3389 uSourceLimit=uSource+len; 3390 cTarget = cBuf; 3391 cTargetLimit = cBuf +uBufSize; 3392 uTarget = uBuf; 3393 uTargetLimit = uBuf+ uBufSize; 3394 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3395 if(U_FAILURE(errorCode)){ 3396 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3397 return; 3398 } 3399 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3400 cSource = cBuf; 3401 cSourceLimit =cTarget; 3402 test =uBuf; 3403 myOff=offsets; 3404 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3405 if(U_FAILURE(errorCode)){ 3406 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3407 return; 3408 } 3409 3410 uSource = (const UChar*)in; 3411 while(uSource<uSourceLimit){ 3412 if(*test!=*uSource){ 3413 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3414 } 3415 uSource++; 3416 test++; 3417 } 3418 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3419 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3420 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3421 if(byteArr && byteArrLen!=0){ 3422 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3423 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3424 { 3425 cSource = byteArr; 3426 cSourceLimit = cSource+byteArrLen; 3427 test=uBuf; 3428 myOff = offsets; 3429 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3430 if(U_FAILURE(errorCode)){ 3431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3432 return; 3433 } 3434 3435 uSource = (const UChar*)in; 3436 while(uSource<uSourceLimit){ 3437 if(*test!=*uSource){ 3438 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3439 } 3440 uSource++; 3441 test++; 3442 } 3443 } 3444 } 3445 3446 ucnv_close(cnv); 3447 free(uBuf); 3448 free(cBuf); 3449 free(offsets); 3450} 3451static UChar U_CALLCONV 3452_charAt(int32_t offset, void *context) { 3453 return ((char*)context)[offset]; 3454} 3455 3456static int32_t 3457unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3458 int32_t srcIndex=0; 3459 int32_t dstIndex=0; 3460 if(U_FAILURE(*status)){ 3461 return 0; 3462 } 3463 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3464 *status = U_ILLEGAL_ARGUMENT_ERROR; 3465 return 0; 3466 } 3467 if(srcLen==-1){ 3468 srcLen = (int32_t)uprv_strlen(src); 3469 } 3470 3471 for (; srcIndex<srcLen; ) { 3472 UChar32 c = src[srcIndex++]; 3473 if (c == 0x005C /*'\\'*/) { 3474 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3475 if (c == (UChar32)0xFFFFFFFF) { 3476 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3477 break; /* invalid escape sequence */ 3478 } 3479 } 3480 if(dstIndex < dstLen){ 3481 if(c>0xFFFF){ 3482 dst[dstIndex++] = U16_LEAD(c); 3483 if(dstIndex<dstLen){ 3484 dst[dstIndex]=U16_TRAIL(c); 3485 }else{ 3486 *status=U_BUFFER_OVERFLOW_ERROR; 3487 } 3488 }else{ 3489 dst[dstIndex]=(UChar)c; 3490 } 3491 3492 }else{ 3493 *status = U_BUFFER_OVERFLOW_ERROR; 3494 } 3495 dstIndex++; /* for preflighting */ 3496 } 3497 return dstIndex; 3498} 3499 3500static void 3501TestFullRoundtrip(const char* cp){ 3502 UChar usource[10] ={0}; 3503 UChar nsrc[10] = {0}; 3504 uint32_t i=1; 3505 int len=0, ulen; 3506 nsrc[0]=0x0061; 3507 /* Test codepoint 0 */ 3508 TestConv(usource,1,cp,"",NULL,0); 3509 TestConv(usource,2,cp,"",NULL,0); 3510 nsrc[2]=0x5555; 3511 TestConv(nsrc,3,cp,"",NULL,0); 3512 3513 for(;i<=0x10FFFF;i++){ 3514 if(i==0xD800){ 3515 i=0xDFFF; 3516 continue; 3517 } 3518 if(i<=0xFFFF){ 3519 usource[0] =(UChar) i; 3520 len=1; 3521 }else{ 3522 usource[0]=U16_LEAD(i); 3523 usource[1]=U16_TRAIL(i); 3524 len=2; 3525 } 3526 ulen=len; 3527 if(i==0x80) { 3528 usource[2]=0; 3529 } 3530 /* Test only single code points */ 3531 TestConv(usource,ulen,cp,"",NULL,0); 3532 /* Test codepoint repeated twice */ 3533 usource[ulen]=usource[0]; 3534 usource[ulen+1]=usource[1]; 3535 ulen+=len; 3536 TestConv(usource,ulen,cp,"",NULL,0); 3537 /* Test codepoint repeated 3 times */ 3538 usource[ulen]=usource[0]; 3539 usource[ulen+1]=usource[1]; 3540 ulen+=len; 3541 TestConv(usource,ulen,cp,"",NULL,0); 3542 /* Test codepoint in between 2 codepoints */ 3543 nsrc[1]=usource[0]; 3544 nsrc[2]=usource[1]; 3545 nsrc[len+1]=0x5555; 3546 TestConv(nsrc,len+2,cp,"",NULL,0); 3547 uprv_memset(usource,0,sizeof(UChar)*10); 3548 } 3549} 3550 3551static void 3552TestRoundTrippingAllUTF(void){ 3553 if(!getTestOption(QUICK_OPTION)){ 3554 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3555 TestFullRoundtrip("BOCU-1"); 3556 log_verbose("Running exhaustive round trip test for SCSU\n"); 3557 TestFullRoundtrip("SCSU"); 3558 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3559 TestFullRoundtrip("UTF-8"); 3560 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3561 TestFullRoundtrip("CESU-8"); 3562 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3563 TestFullRoundtrip("UTF-16BE"); 3564 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3565 TestFullRoundtrip("UTF-16LE"); 3566 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3567 TestFullRoundtrip("UTF-16"); 3568 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3569 TestFullRoundtrip("UTF-32BE"); 3570 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3571 TestFullRoundtrip("UTF-32LE"); 3572 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3573 TestFullRoundtrip("UTF-32"); 3574 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3575 TestFullRoundtrip("UTF-7"); 3576 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3577 TestFullRoundtrip("UTF-7,version=1"); 3578 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3579 TestFullRoundtrip("IMAP-mailbox-name"); 3580 /* 3581 * 3582 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3583 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3584 * The old mappings remain as fallbacks. 3585 * This test may be reintroduced at a later time. 3586 * 3587 * 110118 - mow 3588 */ 3589 /* 3590 log_verbose("Running exhaustive round trip test for GB18030\n"); 3591 TestFullRoundtrip("GB18030"); 3592 */ 3593 } 3594} 3595 3596static void 3597TestSCSU() { 3598 3599 static const uint16_t germanUTF16[]={ 3600 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3601 }; 3602 3603 static const uint8_t germanSCSU[]={ 3604 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3605 }; 3606 3607 static const uint16_t russianUTF16[]={ 3608 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3609 }; 3610 3611 static const uint8_t russianSCSU[]={ 3612 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3613 }; 3614 3615 static const uint16_t japaneseUTF16[]={ 3616 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3617 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3618 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3619 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3620 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3621 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3622 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3623 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3624 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3625 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3626 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3627 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3628 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3629 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3630 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3631 }; 3632 3633 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3634 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3635 static const uint8_t japaneseSCSU[]={ 3636 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3637 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3638 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3639 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3640 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3641 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3642 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3643 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3644 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3645 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3646 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3647 0xcb, 0x82 3648 }; 3649 3650 static const uint16_t allFeaturesUTF16[]={ 3651 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3652 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3653 0x01df, 0xf000, 0xdbff, 0xdfff 3654 }; 3655 3656 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3657 * result here (34B vs. 35B) 3658 */ 3659 static const uint8_t allFeaturesSCSU[]={ 3660 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3661 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3662 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3663 0xdf, 0x14, 0x80, 0x15, 0xff 3664 }; 3665 static const uint16_t monkeyIn[]={ 3666 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3667 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3668 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3669 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3670 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3671 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3672 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3673 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3674 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3675 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3676 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3677 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3678 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3679 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3680 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3681 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3682 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3683 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3684 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3685 /* test non-BMP code points */ 3686 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3687 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3688 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3689 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3690 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3691 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3692 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3693 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3694 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3695 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3696 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3697 3698 3699 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3700 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3701 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3702 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3703 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3704 }; 3705 static const char *fTestCases [] = { 3706 "\\ud800\\udc00", /* smallest surrogate*/ 3707 "\\ud8ff\\udcff", 3708 "\\udBff\\udFff", /* largest surrogate pair*/ 3709 "\\ud834\\udc00", 3710 "\\U0010FFFF", 3711 "Hello \\u9292 \\u9192 World!", 3712 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3713 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3714 3715 "\\u0648\\u06c8", /* catch missing reset*/ 3716 "\\u0648\\u06c8", 3717 3718 "\\u4444\\uE001", /* lowest quotable*/ 3719 "\\u4444\\uf2FF", /* highest quotable*/ 3720 "\\u4444\\uf188\\u4444", 3721 "\\u4444\\uf188\\uf288", 3722 "\\u4444\\uf188abc\\u0429\\uf288", 3723 "\\u9292\\u2222", 3724 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3725 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3726 "Hello World!123456", 3727 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3728 3729 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3730 "abc\\u4411d", /* uses SQU*/ 3731 "abc\\u4411\\u4412d",/* uses SCU*/ 3732 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3734 "\\u9292\\u2222", 3735 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3736 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3737 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3738 3739 "", /* empty input*/ 3740 "\\u0000", /* smallest BMP character*/ 3741 "\\uFFFF", /* largest BMP character*/ 3742 3743 /* regression tests*/ 3744 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3745 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3746 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3747 "\\u0041\\u00df\\u0401\\u015f", 3748 "\\u9066\\u2123abc", 3749 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3750 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3751 }; 3752 int i=0; 3753 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3754 const char* cSrc = fTestCases[i]; 3755 UErrorCode status = U_ZERO_ERROR; 3756 int32_t cSrcLen,srcLen; 3757 UChar* src; 3758 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3759 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3760 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3761 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3762 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3763 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3764 free(src); 3765 } 3766 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3767 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3768 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3769 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3770 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3771 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3772 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3773} 3774 3775#if !UCONFIG_NO_LEGACY_CONVERSION 3776static void TestJitterbug2346(){ 3777 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3778 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3779 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3780 3781 UChar uTarget[500]={'\0'}; 3782 UChar* utarget=uTarget; 3783 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3784 3785 char cTarget[500]={'\0'}; 3786 char* ctarget=cTarget; 3787 char* ctargetLimit=cTarget+sizeof(cTarget); 3788 const char* csource=source; 3789 UChar* temp = expected; 3790 UErrorCode err=U_ZERO_ERROR; 3791 3792 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3793 if(U_FAILURE(err)) { 3794 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3795 return; 3796 } 3797 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3798 if(U_FAILURE(err)) { 3799 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3800 return; 3801 } 3802 utargetLimit=utarget; 3803 utarget = uTarget; 3804 while(utarget<utargetLimit){ 3805 if(*temp!=*utarget){ 3806 3807 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3808 } 3809 utarget++; 3810 temp++; 3811 } 3812 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3813 if(U_FAILURE(err)) { 3814 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3815 return; 3816 } 3817 ctargetLimit=ctarget; 3818 ctarget =cTarget; 3819 ucnv_close(conv); 3820 3821 3822} 3823 3824static void 3825TestISO_2022_JP_1() { 3826 /* test input */ 3827 static const uint16_t in[]={ 3828 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3829 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3830 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3831 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3832 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3833 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3834 0x201D, 0x000D, 0x000A, 3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3836 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3837 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3838 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3839 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3840 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3841 }; 3842 const UChar* uSource; 3843 const UChar* uSourceLimit; 3844 const char* cSource; 3845 const char* cSourceLimit; 3846 UChar *uTargetLimit =NULL; 3847 UChar *uTarget; 3848 char *cTarget; 3849 const char *cTargetLimit; 3850 char *cBuf; 3851 UChar *uBuf,*test; 3852 int32_t uBufSize = 120; 3853 UErrorCode errorCode=U_ZERO_ERROR; 3854 UConverter *cnv; 3855 3856 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3857 if(U_FAILURE(errorCode)) { 3858 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3859 return; 3860 } 3861 3862 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3863 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3864 uSource = (const UChar*)in; 3865 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3866 cTarget = cBuf; 3867 cTargetLimit = cBuf +uBufSize*5; 3868 uTarget = uBuf; 3869 uTargetLimit = uBuf+ uBufSize*5; 3870 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3871 if(U_FAILURE(errorCode)){ 3872 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3873 return; 3874 } 3875 cSource = cBuf; 3876 cSourceLimit =cTarget; 3877 test =uBuf; 3878 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3879 if(U_FAILURE(errorCode)){ 3880 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3881 return; 3882 } 3883 uSource = (const UChar*)in; 3884 while(uSource<uSourceLimit){ 3885 if(*test!=*uSource){ 3886 3887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3888 } 3889 uSource++; 3890 test++; 3891 } 3892 /*ucnv_close(cnv); 3893 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3894 /*Test for the condition where there is an invalid character*/ 3895 ucnv_reset(cnv); 3896 { 3897 static const uint8_t source2[]={0x0e,0x24,0x053}; 3898 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3899 } 3900 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3901 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3902 ucnv_close(cnv); 3903 free(uBuf); 3904 free(cBuf); 3905} 3906 3907static void 3908TestISO_2022_JP_2() { 3909 /* test input */ 3910 static const uint16_t in[]={ 3911 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3912 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3913 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3914 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3915 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3916 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3917 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3918 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3919 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3920 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3921 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3922 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3923 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3924 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3925 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3926 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3927 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3928 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3929 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3930 }; 3931 const UChar* uSource; 3932 const UChar* uSourceLimit; 3933 const char* cSource; 3934 const char* cSourceLimit; 3935 UChar *uTargetLimit =NULL; 3936 UChar *uTarget; 3937 char *cTarget; 3938 const char *cTargetLimit; 3939 char *cBuf; 3940 UChar *uBuf,*test; 3941 int32_t uBufSize = 120; 3942 UErrorCode errorCode=U_ZERO_ERROR; 3943 UConverter *cnv; 3944 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3945 int32_t* myOff= offsets; 3946 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3947 if(U_FAILURE(errorCode)) { 3948 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3949 return; 3950 } 3951 3952 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3953 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3954 uSource = (const UChar*)in; 3955 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3956 cTarget = cBuf; 3957 cTargetLimit = cBuf +uBufSize*5; 3958 uTarget = uBuf; 3959 uTargetLimit = uBuf+ uBufSize*5; 3960 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3961 if(U_FAILURE(errorCode)){ 3962 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3963 return; 3964 } 3965 cSource = cBuf; 3966 cSourceLimit =cTarget; 3967 test =uBuf; 3968 myOff=offsets; 3969 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3970 if(U_FAILURE(errorCode)){ 3971 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3972 return; 3973 } 3974 uSource = (const UChar*)in; 3975 while(uSource<uSourceLimit){ 3976 if(*test!=*uSource){ 3977 3978 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3979 } 3980 uSource++; 3981 test++; 3982 } 3983 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3984 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3985 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3986 /*Test for the condition where there is an invalid character*/ 3987 ucnv_reset(cnv); 3988 { 3989 static const uint8_t source2[]={0x0e,0x24,0x053}; 3990 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3991 } 3992 ucnv_close(cnv); 3993 free(uBuf); 3994 free(cBuf); 3995 free(offsets); 3996} 3997 3998static void 3999TestISO_2022_KR() { 4000 /* test input */ 4001 static const uint16_t in[]={ 4002 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4003 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4004 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4005 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4006 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4007 ,0x53E3,0x53E4,0x000A,0x000D}; 4008 const UChar* uSource; 4009 const UChar* uSourceLimit; 4010 const char* cSource; 4011 const char* cSourceLimit; 4012 UChar *uTargetLimit =NULL; 4013 UChar *uTarget; 4014 char *cTarget; 4015 const char *cTargetLimit; 4016 char *cBuf; 4017 UChar *uBuf,*test; 4018 int32_t uBufSize = 120; 4019 UErrorCode errorCode=U_ZERO_ERROR; 4020 UConverter *cnv; 4021 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4022 int32_t* myOff= offsets; 4023 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4024 if(U_FAILURE(errorCode)) { 4025 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4026 return; 4027 } 4028 4029 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4030 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4031 uSource = (const UChar*)in; 4032 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4033 cTarget = cBuf; 4034 cTargetLimit = cBuf +uBufSize*5; 4035 uTarget = uBuf; 4036 uTargetLimit = uBuf+ uBufSize*5; 4037 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4038 if(U_FAILURE(errorCode)){ 4039 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4040 return; 4041 } 4042 cSource = cBuf; 4043 cSourceLimit =cTarget; 4044 test =uBuf; 4045 myOff=offsets; 4046 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4047 if(U_FAILURE(errorCode)){ 4048 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4049 return; 4050 } 4051 uSource = (const UChar*)in; 4052 while(uSource<uSourceLimit){ 4053 if(*test!=*uSource){ 4054 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4055 } 4056 uSource++; 4057 test++; 4058 } 4059 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4060 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4061 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4062 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4063 TestJitterbug930("csISO2022KR"); 4064 /*Test for the condition where there is an invalid character*/ 4065 ucnv_reset(cnv); 4066 { 4067 static const uint8_t source2[]={0x1b,0x24,0x053}; 4068 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4069 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4070 } 4071 ucnv_close(cnv); 4072 free(uBuf); 4073 free(cBuf); 4074 free(offsets); 4075} 4076 4077static void 4078TestISO_2022_KR_1() { 4079 /* test input */ 4080 static const uint16_t in[]={ 4081 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4082 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4083 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4084 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4085 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4086 ,0x53E3,0x53E4,0x000A,0x000D}; 4087 const UChar* uSource; 4088 const UChar* uSourceLimit; 4089 const char* cSource; 4090 const char* cSourceLimit; 4091 UChar *uTargetLimit =NULL; 4092 UChar *uTarget; 4093 char *cTarget; 4094 const char *cTargetLimit; 4095 char *cBuf; 4096 UChar *uBuf,*test; 4097 int32_t uBufSize = 120; 4098 UErrorCode errorCode=U_ZERO_ERROR; 4099 UConverter *cnv; 4100 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4101 int32_t* myOff= offsets; 4102 cnv=ucnv_open("ibm-25546", &errorCode); 4103 if(U_FAILURE(errorCode)) { 4104 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4105 return; 4106 } 4107 4108 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4109 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4110 uSource = (const UChar*)in; 4111 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4112 cTarget = cBuf; 4113 cTargetLimit = cBuf +uBufSize*5; 4114 uTarget = uBuf; 4115 uTargetLimit = uBuf+ uBufSize*5; 4116 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4117 if(U_FAILURE(errorCode)){ 4118 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4119 return; 4120 } 4121 cSource = cBuf; 4122 cSourceLimit =cTarget; 4123 test =uBuf; 4124 myOff=offsets; 4125 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4126 if(U_FAILURE(errorCode)){ 4127 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4128 return; 4129 } 4130 uSource = (const UChar*)in; 4131 while(uSource<uSourceLimit){ 4132 if(*test!=*uSource){ 4133 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4134 } 4135 uSource++; 4136 test++; 4137 } 4138 ucnv_reset(cnv); 4139 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4140 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4141 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4142 ucnv_reset(cnv); 4143 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4144 /*Test for the condition where there is an invalid character*/ 4145 ucnv_reset(cnv); 4146 { 4147 static const uint8_t source2[]={0x1b,0x24,0x053}; 4148 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4149 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4150 } 4151 ucnv_close(cnv); 4152 free(uBuf); 4153 free(cBuf); 4154 free(offsets); 4155} 4156 4157static void TestJitterbug2411(){ 4158 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4159 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4160 UConverter* kr=NULL, *kr1=NULL; 4161 UErrorCode errorCode = U_ZERO_ERROR; 4162 UChar tgt[100]={'\0'}; 4163 UChar* target = tgt; 4164 UChar* targetLimit = target+100; 4165 kr=ucnv_open("iso-2022-kr", &errorCode); 4166 if(U_FAILURE(errorCode)) { 4167 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4168 return; 4169 } 4170 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4171 if(U_FAILURE(errorCode)) { 4172 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4173 return; 4174 } 4175 kr1 = ucnv_open("ibm-25546", &errorCode); 4176 if(U_FAILURE(errorCode)) { 4177 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4178 return; 4179 } 4180 target = tgt; 4181 targetLimit = target+100; 4182 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4183 4184 if(U_FAILURE(errorCode)) { 4185 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4186 return; 4187 } 4188 4189 ucnv_close(kr); 4190 ucnv_close(kr1); 4191 4192} 4193 4194static void 4195TestJIS(){ 4196 /* From Unicode moved to testdata/conversion.txt */ 4197 /*To Unicode*/ 4198 { 4199 static const uint8_t sampleTextJIS[] = { 4200 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4201 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4203 }; 4204 static const uint16_t expectedISO2022JIS[] = { 4205 0x0041, 0x0042, 4206 0xFF81, 0xFF82, 4207 0x3000 4208 }; 4209 static const int32_t toISO2022JISOffs[]={ 4210 3,4, 4211 8,9, 4212 16 4213 }; 4214 4215 static const uint8_t sampleTextJIS7[] = { 4216 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4217 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4218 0x1b,0x24,0x42,0x21,0x21, 4219 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4220 0x21,0x22, 4221 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4222 }; 4223 static const uint16_t expectedISO2022JIS7[] = { 4224 0x0041, 0x0042, 4225 0xFF81, 0xFF82, 4226 0x3000, 4227 0xFF81, 0xFF82, 4228 0x3001, 4229 0x3000 4230 }; 4231 static const int32_t toISO2022JIS7Offs[]={ 4232 3,4, 4233 8,9, 4234 13,16, 4235 17, 4236 19,27 4237 }; 4238 static const uint8_t sampleTextJIS8[] = { 4239 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4240 0xa1,0xc8,0xd9,/*Katakana Set*/ 4241 0x1b,0x28,0x42, 4242 0x41,0x42, 4243 0xb1,0xc3, /*Katakana Set*/ 4244 0x1b,0x24,0x42,0x21,0x21 4245 }; 4246 static const uint16_t expectedISO2022JIS8[] = { 4247 0x0041, 0x0042, 4248 0xff61, 0xff88, 0xff99, 4249 0x0041, 0x0042, 4250 0xff71, 0xff83, 4251 0x3000 4252 }; 4253 static const int32_t toISO2022JIS8Offs[]={ 4254 3, 4, 5, 6, 4255 7, 11, 12, 13, 4256 14, 18, 4257 }; 4258 4259 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4260 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4261 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4262 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4263 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4264 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4265 } 4266 4267} 4268 4269 4270#if 0 4271 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4272 4273static void TestJitterbug915(){ 4274/* tests for roundtripping of the below sequence 4275\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4276\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4277\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4278\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4279\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4280\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4281\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4282*/ 4283 static const char cSource[]={ 4284 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4285 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4286 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4287 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4288 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4289 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4290 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4291 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4292 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4293 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4294 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4295 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4296 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4297 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4298 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4299 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4300 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4301 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4302 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4303 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4304 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4305 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4306 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4307 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4308 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4309 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4310 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4311 0x37, 0x20, 0x2A, 0x2F 4312 }; 4313 UChar uTarget[500]={'\0'}; 4314 UChar* utarget=uTarget; 4315 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4316 4317 char cTarget[500]={'\0'}; 4318 char* ctarget=cTarget; 4319 char* ctargetLimit=cTarget+sizeof(cTarget); 4320 const char* csource=cSource; 4321 const char* tempSrc = cSource; 4322 UErrorCode err=U_ZERO_ERROR; 4323 4324 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4325 if(U_FAILURE(err)) { 4326 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4327 return; 4328 } 4329 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4330 if(U_FAILURE(err)) { 4331 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4332 return; 4333 } 4334 utargetLimit=utarget; 4335 utarget = uTarget; 4336 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4337 if(U_FAILURE(err)) { 4338 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4339 return; 4340 } 4341 ctargetLimit=ctarget; 4342 ctarget =cTarget; 4343 while(ctarget<ctargetLimit){ 4344 if(*ctarget != *tempSrc){ 4345 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4346 } 4347 ++ctarget; 4348 ++tempSrc; 4349 } 4350 4351 ucnv_close(conv); 4352} 4353 4354static void 4355TestISO_2022_CN_EXT() { 4356 /* test input */ 4357 static const uint16_t in[]={ 4358 /* test Non-BMP code points */ 4359 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4360 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4361 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4362 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4363 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4364 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4365 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4366 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4367 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4368 0xD869, 0xDED5, 4369 4370 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4371 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4372 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4373 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4374 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4375 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4376 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4377 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4378 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4379 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4380 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4381 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4382 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4383 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4384 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4385 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4386 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4387 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4388 4389 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4390 4391 }; 4392 4393 const UChar* uSource; 4394 const UChar* uSourceLimit; 4395 const char* cSource; 4396 const char* cSourceLimit; 4397 UChar *uTargetLimit =NULL; 4398 UChar *uTarget; 4399 char *cTarget; 4400 const char *cTargetLimit; 4401 char *cBuf; 4402 UChar *uBuf,*test; 4403 int32_t uBufSize = 180; 4404 UErrorCode errorCode=U_ZERO_ERROR; 4405 UConverter *cnv; 4406 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4407 int32_t* myOff= offsets; 4408 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4409 if(U_FAILURE(errorCode)) { 4410 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4411 return; 4412 } 4413 4414 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4415 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4416 uSource = (const UChar*)in; 4417 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4418 cTarget = cBuf; 4419 cTargetLimit = cBuf +uBufSize*5; 4420 uTarget = uBuf; 4421 uTargetLimit = uBuf+ uBufSize*5; 4422 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4423 if(U_FAILURE(errorCode)){ 4424 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4425 return; 4426 } 4427 cSource = cBuf; 4428 cSourceLimit =cTarget; 4429 test =uBuf; 4430 myOff=offsets; 4431 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4432 if(U_FAILURE(errorCode)){ 4433 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4434 return; 4435 } 4436 uSource = (const UChar*)in; 4437 while(uSource<uSourceLimit){ 4438 if(*test!=*uSource){ 4439 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4440 } 4441 else{ 4442 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4443 } 4444 uSource++; 4445 test++; 4446 } 4447 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4448 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4449 /*Test for the condition where there is an invalid character*/ 4450 ucnv_reset(cnv); 4451 { 4452 static const uint8_t source2[]={0x0e,0x24,0x053}; 4453 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4454 } 4455 ucnv_close(cnv); 4456 free(uBuf); 4457 free(cBuf); 4458 free(offsets); 4459} 4460#endif 4461 4462static void 4463TestISO_2022_CN() { 4464 /* test input */ 4465 static const uint16_t in[]={ 4466 /* jitterbug 951 */ 4467 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4468 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4469 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4470 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4471 0x0020, 0x0045, 0x004e, 0x0044, 4472 /**/ 4473 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4474 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4475 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4476 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4477 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4478 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4479 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4480 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4481 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4482 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4483 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4484 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4485 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4486 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4487 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4488 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4489 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4490 4491 }; 4492 const UChar* uSource; 4493 const UChar* uSourceLimit; 4494 const char* cSource; 4495 const char* cSourceLimit; 4496 UChar *uTargetLimit =NULL; 4497 UChar *uTarget; 4498 char *cTarget; 4499 const char *cTargetLimit; 4500 char *cBuf; 4501 UChar *uBuf,*test; 4502 int32_t uBufSize = 180; 4503 UErrorCode errorCode=U_ZERO_ERROR; 4504 UConverter *cnv; 4505 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4506 int32_t* myOff= offsets; 4507 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4508 if(U_FAILURE(errorCode)) { 4509 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4510 return; 4511 } 4512 4513 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4514 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4515 uSource = (const UChar*)in; 4516 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4517 cTarget = cBuf; 4518 cTargetLimit = cBuf +uBufSize*5; 4519 uTarget = uBuf; 4520 uTargetLimit = uBuf+ uBufSize*5; 4521 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4522 if(U_FAILURE(errorCode)){ 4523 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4524 return; 4525 } 4526 cSource = cBuf; 4527 cSourceLimit =cTarget; 4528 test =uBuf; 4529 myOff=offsets; 4530 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4531 if(U_FAILURE(errorCode)){ 4532 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4533 return; 4534 } 4535 uSource = (const UChar*)in; 4536 while(uSource<uSourceLimit){ 4537 if(*test!=*uSource){ 4538 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4539 } 4540 else{ 4541 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4542 } 4543 uSource++; 4544 test++; 4545 } 4546 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4547 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4548 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4549 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4550 TestJitterbug930("csISO2022CN"); 4551 /*Test for the condition where there is an invalid character*/ 4552 ucnv_reset(cnv); 4553 { 4554 static const uint8_t source2[]={0x0e,0x24,0x053}; 4555 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4556 } 4557 4558 ucnv_close(cnv); 4559 free(uBuf); 4560 free(cBuf); 4561 free(offsets); 4562} 4563 4564/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4565typedef struct { 4566 const char * converterName; 4567 const char * inputText; 4568 int inputTextLength; 4569} EmptySegmentTest; 4570 4571/* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4572static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4573 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4574 if (reason > UCNV_IRREGULAR) { 4575 return; 4576 } 4577 if (reason != UCNV_IRREGULAR) { 4578 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4579 } 4580 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4581 *err = U_ZERO_ERROR; 4582 ucnv_cbToUWriteSub(toArgs,0,err); 4583} 4584 4585enum { kEmptySegmentToUCharsMax = 64 }; 4586static void TestJitterbug6175(void) { 4587 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4588 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4589 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4590 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4591 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4592 static const EmptySegmentTest emptySegmentTests[] = { 4593 /* converterName inputText inputTextLength */ 4594 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4595 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4596 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4597 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4598 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4599 /* terminator: */ 4600 { NULL, NULL, 0, } 4601 }; 4602 const EmptySegmentTest * testPtr; 4603 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4604 UErrorCode err = U_ZERO_ERROR; 4605 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4606 if (U_FAILURE(err)) { 4607 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4608 return; 4609 } 4610 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4611 if (U_FAILURE(err)) { 4612 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4613 ucnv_close(cnv); 4614 return; 4615 } 4616 { 4617 UChar toUChars[kEmptySegmentToUCharsMax]; 4618 UChar * toUCharsPtr = toUChars; 4619 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4620 const char * inCharsPtr = testPtr->inputText; 4621 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4622 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4623 } 4624 ucnv_close(cnv); 4625 } 4626} 4627 4628static void 4629TestEBCDIC_STATEFUL() { 4630 /* test input */ 4631 static const uint8_t in[]={ 4632 0x61, 4633 0x1a, 4634 0x0f, 0x4b, 4635 0x42, 4636 0x40, 4637 0x36, 4638 }; 4639 4640 /* expected test results */ 4641 static const int32_t results[]={ 4642 /* number of bytes read, code point */ 4643 1, 0x002f, 4644 1, 0x0092, 4645 2, 0x002e, 4646 1, 0xff62, 4647 1, 0x0020, 4648 1, 0x0096, 4649 4650 }; 4651 static const uint8_t in2[]={ 4652 0x0f, 4653 0xa1, 4654 0x01 4655 }; 4656 4657 /* expected test results */ 4658 static const int32_t results2[]={ 4659 /* number of bytes read, code point */ 4660 2, 0x203E, 4661 1, 0x0001, 4662 }; 4663 4664 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4665 UErrorCode errorCode=U_ZERO_ERROR; 4666 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4667 if(U_FAILURE(errorCode)) { 4668 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4669 return; 4670 } 4671 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4672 ucnv_reset(cnv); 4673 /* Test the condition when source >= sourceLimit */ 4674 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4675 ucnv_reset(cnv); 4676 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4677 { 4678 static const uint8_t source1[]={0x0f}; 4679 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4680 } 4681 /*Test for the condition where there is an invalid character*/ 4682 ucnv_reset(cnv); 4683 { 4684 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4685 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4686 } 4687 ucnv_reset(cnv); 4688 source=(const char*)in2; 4689 limit=(const char*)in2+sizeof(in2); 4690 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4691 ucnv_close(cnv); 4692 4693} 4694 4695static void 4696TestGB18030() { 4697 /* test input */ 4698 static const uint8_t in[]={ 4699 0x24, 4700 0x7f, 4701 0x81, 0x30, 0x81, 0x30, 4702 0xa8, 0xbf, 4703 0xa2, 0xe3, 4704 0xd2, 0xbb, 4705 0x82, 0x35, 0x8f, 0x33, 4706 0x84, 0x31, 0xa4, 0x39, 4707 0x90, 0x30, 0x81, 0x30, 4708 0xe3, 0x32, 0x9a, 0x35 4709#if 0 4710 /* 4711 * Feature removed markus 2000-oct-26 4712 * Only some codepages must match surrogate pairs into supplementary code points - 4713 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4714 * GB 18030 provides direct encodings for supplementary code points, therefore 4715 * it must not combine two single-encoded surrogates into one code point. 4716 */ 4717 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4718#endif 4719 }; 4720 4721 /* expected test results */ 4722 static const int32_t results[]={ 4723 /* number of bytes read, code point */ 4724 1, 0x24, 4725 1, 0x7f, 4726 4, 0x80, 4727 2, 0x1f9, 4728 2, 0x20ac, 4729 2, 0x4e00, 4730 4, 0x9fa6, 4731 4, 0xffff, 4732 4, 0x10000, 4733 4, 0x10ffff 4734#if 0 4735 /* Feature removed. See comment above. */ 4736 8, 0x10000 4737#endif 4738 }; 4739 4740/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4741 UErrorCode errorCode=U_ZERO_ERROR; 4742 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4743 if(U_FAILURE(errorCode)) { 4744 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4745 return; 4746 } 4747 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4748 ucnv_close(cnv); 4749} 4750 4751static void 4752TestLMBCS() { 4753 /* LMBCS-1 string */ 4754 static const uint8_t pszLMBCS[]={ 4755 0x61, 4756 0x01, 0x29, 4757 0x81, 4758 0xA0, 4759 0x0F, 0x27, 4760 0x0F, 0x91, 4761 0x14, 0x0a, 0x74, 4762 0x14, 0xF6, 0x02, 4763 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4764 0x10, 0x88, 0xA0, 4765 }; 4766 4767 /* Unicode UChar32 equivalents */ 4768 static const UChar32 pszUnicode32[]={ 4769 /* code point */ 4770 0x00000061, 4771 0x00002013, 4772 0x000000FC, 4773 0x000000E1, 4774 0x00000007, 4775 0x00000091, 4776 0x00000a74, 4777 0x00000200, 4778 0x00023456, /* code point for surrogate pair */ 4779 0x00005516 4780 }; 4781 4782/* Unicode UChar equivalents */ 4783 static const UChar pszUnicode[]={ 4784 /* code point */ 4785 0x0061, 4786 0x2013, 4787 0x00FC, 4788 0x00E1, 4789 0x0007, 4790 0x0091, 4791 0x0a74, 4792 0x0200, 4793 0xD84D, /* low surrogate */ 4794 0xDC56, /* high surrogate */ 4795 0x5516 4796 }; 4797 4798/* expected test results */ 4799 static const int offsets32[]={ 4800 /* number of bytes read, code point */ 4801 0, 4802 1, 4803 3, 4804 4, 4805 5, 4806 7, 4807 9, 4808 12, 4809 15, 4810 21, 4811 24 4812 }; 4813 4814/* expected test results */ 4815 static const int offsets[]={ 4816 /* number of bytes read, code point */ 4817 0, 4818 1, 4819 3, 4820 4, 4821 5, 4822 7, 4823 9, 4824 12, 4825 15, 4826 18, 4827 21, 4828 24 4829 }; 4830 4831 4832 UConverter *cnv; 4833 4834#define NAME_LMBCS_1 "LMBCS-1" 4835#define NAME_LMBCS_2 "LMBCS-2" 4836 4837 4838 /* Some basic open/close/property tests on some LMBCS converters */ 4839 { 4840 4841 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4842 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4843 char get_subchars [1]; 4844 const char * get_name; 4845 UConverter *cnv1; 4846 UConverter *cnv2; 4847 4848 int8_t len = sizeof(get_subchars); 4849 4850 UErrorCode errorCode=U_ZERO_ERROR; 4851 4852 /* Open */ 4853 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4854 if(U_FAILURE(errorCode)) { 4855 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4856 return; 4857 } 4858 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4859 if(U_FAILURE(errorCode)) { 4860 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4861 return; 4862 } 4863 4864 /* Name */ 4865 get_name = ucnv_getName (cnv1, &errorCode); 4866 if (strcmp(NAME_LMBCS_1,get_name)){ 4867 log_err("Unexpected converter name: %s\n", get_name); 4868 } 4869 get_name = ucnv_getName (cnv2, &errorCode); 4870 if (strcmp(NAME_LMBCS_2,get_name)){ 4871 log_err("Unexpected converter name: %s\n", get_name); 4872 } 4873 4874 /* substitution chars */ 4875 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4876 if(U_FAILURE(errorCode)) { 4877 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4878 } 4879 if (len!=1){ 4880 log_err("Unexpected length of sub chars\n"); 4881 } 4882 if (get_subchars[0] != expected_subchars[0]){ 4883 log_err("Unexpected value of sub chars\n"); 4884 } 4885 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4886 if(U_FAILURE(errorCode)) { 4887 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4888 } 4889 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4890 if(U_FAILURE(errorCode)) { 4891 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4892 } 4893 if (len!=1){ 4894 log_err("Unexpected length of sub chars\n"); 4895 } 4896 if (get_subchars[0] != new_subchars[0]){ 4897 log_err("Unexpected value of sub chars\n"); 4898 } 4899 ucnv_close(cnv1); 4900 ucnv_close(cnv2); 4901 4902 } 4903 4904 /* LMBCS to Unicode - offsets */ 4905 { 4906 UErrorCode errorCode=U_ZERO_ERROR; 4907 4908 const char * pSource = (const char *)pszLMBCS; 4909 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4910 4911 UChar Out [sizeof(pszUnicode) + 1]; 4912 UChar * pOut = Out; 4913 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4914 4915 int32_t off [sizeof(offsets)]; 4916 4917 /* last 'offset' in expected results is just the final size. 4918 (Makes other tests easier). Compensate here: */ 4919 4920 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4921 4922 4923 4924 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4925 if(U_FAILURE(errorCode)) { 4926 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4927 return; 4928 } 4929 4930 4931 4932 ucnv_toUnicode (cnv, 4933 &pOut, 4934 OutLimit, 4935 &pSource, 4936 sourceLimit, 4937 off, 4938 TRUE, 4939 &errorCode); 4940 4941 4942 if (memcmp(off,offsets,sizeof(offsets))) 4943 { 4944 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4945 } 4946 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4947 { 4948 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4949 } 4950 ucnv_close(cnv); 4951 } 4952 { 4953 /* LMBCS to Unicode - getNextUChar */ 4954 const char * sourceStart; 4955 const char *source=(const char *)pszLMBCS; 4956 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4957 const UChar32 *results= pszUnicode32; 4958 const int *off = offsets32; 4959 4960 UErrorCode errorCode=U_ZERO_ERROR; 4961 UChar32 uniChar; 4962 4963 cnv=ucnv_open("LMBCS-1", &errorCode); 4964 if(U_FAILURE(errorCode)) { 4965 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4966 return; 4967 } 4968 else 4969 { 4970 4971 while(source<limit) { 4972 sourceStart=source; 4973 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4974 if(U_FAILURE(errorCode)) { 4975 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4976 break; 4977 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4978 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4979 uniChar, (source-sourceStart), *results, *off); 4980 break; 4981 } 4982 results++; 4983 off++; 4984 } 4985 } 4986 ucnv_close(cnv); 4987 } 4988 { /* test locale & optimization group operations: Unicode to LMBCS */ 4989 4990 UErrorCode errorCode=U_ZERO_ERROR; 4991 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 4992 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 4993 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 4994 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 4995 const UChar * pUniOut = uniString; 4996 UChar * pUniIn = uniString; 4997 uint8_t lmbcsString [4]; 4998 const char * pLMBCSOut = (const char *)lmbcsString; 4999 char * pLMBCSIn = (char *)lmbcsString; 5000 5001 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5002 ucnv_fromUnicode (cnv16he, 5003 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5004 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5005 NULL, 1, &errorCode); 5006 5007 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5008 { 5009 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5010 } 5011 5012 pLMBCSIn= (char *)lmbcsString; 5013 pUniOut = uniString; 5014 ucnv_fromUnicode (cnv01us, 5015 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5016 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5017 NULL, 1, &errorCode); 5018 5019 if (lmbcsString[0] != 0x9F) 5020 { 5021 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5022 } 5023 5024 /* single byte char from mbcs char set */ 5025 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5026 pLMBCSOut = (const char *)lmbcsString; 5027 pUniIn = uniString; 5028 ucnv_toUnicode (cnv16jp, 5029 &pUniIn, pUniIn + 1, 5030 &pLMBCSOut, (pLMBCSOut + 1), 5031 NULL, 1, &errorCode); 5032 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5033 { 5034 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5035 } 5036 /* convert to group 1: should be 3 bytes */ 5037 pLMBCSIn = (char *)lmbcsString; 5038 pUniOut = uniString; 5039 ucnv_fromUnicode (cnv01us, 5040 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5041 &pUniOut, pUniOut + 1, 5042 NULL, 1, &errorCode); 5043 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5044 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5045 { 5046 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5047 } 5048 pLMBCSOut = (const char *)lmbcsString; 5049 pUniIn = uniString; 5050 ucnv_toUnicode (cnv01us, 5051 &pUniIn, pUniIn + 1, 5052 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5053 NULL, 1, &errorCode); 5054 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5055 { 5056 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5057 } 5058 pLMBCSIn = (char *)lmbcsString; 5059 pUniOut = uniString; 5060 ucnv_fromUnicode (cnv16jp, 5061 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5062 &pUniOut, pUniOut + 1, 5063 NULL, 1, &errorCode); 5064 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5065 { 5066 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5067 } 5068 ucnv_close(cnv16he); 5069 ucnv_close(cnv16jp); 5070 ucnv_close(cnv01us); 5071 } 5072 { 5073 /* Small source buffer testing, LMBCS -> Unicode */ 5074 5075 UErrorCode errorCode=U_ZERO_ERROR; 5076 5077 const char * pSource = (const char *)pszLMBCS; 5078 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5079 int codepointCount = 0; 5080 5081 UChar Out [sizeof(pszUnicode) + 1]; 5082 UChar * pOut = Out; 5083 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5084 5085 5086 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5087 if(U_FAILURE(errorCode)) { 5088 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5089 return; 5090 } 5091 5092 5093 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5094 { 5095 ucnv_toUnicode (cnv, 5096 &pOut, 5097 OutLimit, 5098 &pSource, 5099 (pSource+1), /* claim that this is a 1- byte buffer */ 5100 NULL, 5101 FALSE, /* FALSE means there might be more chars in the next buffer */ 5102 &errorCode); 5103 5104 if (U_SUCCESS (errorCode)) 5105 { 5106 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5107 { 5108 /* we are on to the next code point: check value */ 5109 5110 if (Out[0] != pszUnicode[codepointCount]){ 5111 log_err("LMBCS->Uni result %lx should have been %lx \n", 5112 Out[0], pszUnicode[codepointCount]); 5113 } 5114 5115 pOut = Out; /* reset for accumulating next code point */ 5116 codepointCount++; 5117 } 5118 } 5119 else 5120 { 5121 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5122 } 5123 } 5124 { 5125 /* limits & surrogate error testing */ 5126 char LIn [sizeof(pszLMBCS)]; 5127 const char * pLIn = LIn; 5128 5129 char LOut [sizeof(pszLMBCS)]; 5130 char * pLOut = LOut; 5131 5132 UChar UOut [sizeof(pszUnicode)]; 5133 UChar * pUOut = UOut; 5134 5135 UChar UIn [sizeof(pszUnicode)]; 5136 const UChar * pUIn = UIn; 5137 5138 int32_t off [sizeof(offsets)]; 5139 UChar32 uniChar; 5140 5141 errorCode=U_ZERO_ERROR; 5142 5143 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5144 pUIn++; 5145 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5146 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5147 { 5148 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5149 } 5150 pUIn--; 5151 5152 errorCode=U_ZERO_ERROR; 5153 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5154 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5155 { 5156 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5157 } 5158 errorCode=U_ZERO_ERROR; 5159 5160 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5161 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5162 { 5163 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5164 } 5165 errorCode=U_ZERO_ERROR; 5166 5167 /* 0 byte source request - no error, no pointer movement */ 5168 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5169 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5170 if(U_FAILURE(errorCode)) { 5171 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5172 } 5173 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5174 { 5175 log_err("Unexpected pointer move in 0 byte source request \n"); 5176 } 5177 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5178 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5179 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5180 { 5181 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5182 } 5183 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5184 { 5185 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5186 } 5187 errorCode = U_ZERO_ERROR; 5188 5189 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5190 5191 pUIn = pszUnicode; 5192 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5193 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5194 { 5195 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5196 } 5197 5198 errorCode = U_ZERO_ERROR; 5199 5200 pLIn = (const char *)pszLMBCS; 5201 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5202 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5203 { 5204 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5205 } 5206 5207 /* unpaired or chopped LMBCS surrogates */ 5208 5209 /* OK high surrogate, Low surrogate is chopped */ 5210 LIn [0] = (char)0x14; 5211 LIn [1] = (char)0xD8; 5212 LIn [2] = (char)0x01; 5213 LIn [3] = (char)0x14; 5214 LIn [4] = (char)0xDC; 5215 pLIn = LIn; 5216 errorCode = U_ZERO_ERROR; 5217 pUOut = UOut; 5218 5219 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5220 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5221 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5222 { 5223 log_err("Unexpected results on chopped low surrogate\n"); 5224 } 5225 5226 /* chopped at surrogate boundary */ 5227 LIn [0] = (char)0x14; 5228 LIn [1] = (char)0xD8; 5229 LIn [2] = (char)0x01; 5230 pLIn = LIn; 5231 errorCode = U_ZERO_ERROR; 5232 pUOut = UOut; 5233 5234 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5235 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5236 { 5237 log_err("Unexpected results on chopped at surrogate boundary \n"); 5238 } 5239 5240 /* unpaired surrogate plus valid Unichar */ 5241 LIn [0] = (char)0x14; 5242 LIn [1] = (char)0xD8; 5243 LIn [2] = (char)0x01; 5244 LIn [3] = (char)0x14; 5245 LIn [4] = (char)0xC9; 5246 LIn [5] = (char)0xD0; 5247 pLIn = LIn; 5248 errorCode = U_ZERO_ERROR; 5249 pUOut = UOut; 5250 5251 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5252 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5253 { 5254 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5255 } 5256 5257 /* unpaired surrogate plus chopped Unichar */ 5258 LIn [0] = (char)0x14; 5259 LIn [1] = (char)0xD8; 5260 LIn [2] = (char)0x01; 5261 LIn [3] = (char)0x14; 5262 LIn [4] = (char)0xC9; 5263 5264 pLIn = LIn; 5265 errorCode = U_ZERO_ERROR; 5266 pUOut = UOut; 5267 5268 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5269 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5270 { 5271 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5272 } 5273 5274 /* unpaired surrogate plus valid non-Unichar */ 5275 LIn [0] = (char)0x14; 5276 LIn [1] = (char)0xD8; 5277 LIn [2] = (char)0x01; 5278 LIn [3] = (char)0x0F; 5279 LIn [4] = (char)0x3B; 5280 5281 pLIn = LIn; 5282 errorCode = U_ZERO_ERROR; 5283 pUOut = UOut; 5284 5285 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5286 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5287 { 5288 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5289 } 5290 5291 /* unpaired surrogate plus chopped non-Unichar */ 5292 LIn [0] = (char)0x14; 5293 LIn [1] = (char)0xD8; 5294 LIn [2] = (char)0x01; 5295 LIn [3] = (char)0x0F; 5296 5297 pLIn = LIn; 5298 errorCode = U_ZERO_ERROR; 5299 pUOut = UOut; 5300 5301 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5302 5303 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5304 { 5305 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5306 } 5307 } 5308 } 5309 ucnv_close(cnv); /* final cleanup */ 5310} 5311 5312 5313static void TestJitterbug255() 5314{ 5315 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5316 const char *testBuffer = (const char *)testBytes; 5317 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5318 UErrorCode status = U_ZERO_ERROR; 5319 /*UChar32 result;*/ 5320 UConverter *cnv = 0; 5321 5322 cnv = ucnv_open("shift-jis", &status); 5323 if (U_FAILURE(status) || cnv == 0) { 5324 log_data_err("Failed to open the converter for SJIS.\n"); 5325 return; 5326 } 5327 while (testBuffer != testEnd) 5328 { 5329 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5330 if (U_FAILURE(status)) 5331 { 5332 log_err("Failed to convert the next UChar for SJIS.\n"); 5333 break; 5334 } 5335 } 5336 ucnv_close(cnv); 5337} 5338 5339static void TestEBCDICUS4XML() 5340{ 5341 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5342 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5343 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5344 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5345 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5346 UChar *unicodes = unicodes_x; 5347 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5348 char *target = target_x; 5349 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5350 UErrorCode status = U_ZERO_ERROR; 5351 UConverter *cnv = 0; 5352 5353 cnv = ucnv_open("ebcdic-xml-us", &status); 5354 if (U_FAILURE(status) || cnv == 0) { 5355 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5356 return; 5357 } 5358 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5359 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5360 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5361 u_errorName(status)); 5362 printUSeqErr(unicodes_x, 3); 5363 printUSeqErr(toUnicodeMaps, 3); 5364 } 5365 status = U_ZERO_ERROR; 5366 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5367 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5368 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5369 u_errorName(status)); 5370 printSeqErr((const unsigned char*)target_x, 3); 5371 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5372 } 5373 ucnv_close(cnv); 5374} 5375#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5376 5377#if !UCONFIG_NO_COLLATION 5378 5379static void TestJitterbug981(){ 5380 const UChar* rules; 5381 int32_t rules_length, target_cap, bytes_needed, buff_size; 5382 UErrorCode status = U_ZERO_ERROR; 5383 UConverter *utf8cnv; 5384 UCollator* myCollator; 5385 char *buff; 5386 int numNeeded=0; 5387 utf8cnv = ucnv_open ("utf8", &status); 5388 if(U_FAILURE(status)){ 5389 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5390 return; 5391 } 5392 myCollator = ucol_open("zh", &status); 5393 if(U_FAILURE(status)){ 5394 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5395 ucnv_close(utf8cnv); 5396 return; 5397 } 5398 5399 rules = ucol_getRules(myCollator, &rules_length); 5400 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5401 buff = malloc(buff_size); 5402 5403 target_cap = 0; 5404 do { 5405 ucnv_reset(utf8cnv); 5406 status = U_ZERO_ERROR; 5407 if(target_cap >= buff_size) { 5408 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5409 break; 5410 } 5411 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5412 rules, rules_length, &status); 5413 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5414 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5415 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5416 break; 5417 } 5418 numNeeded = bytes_needed; 5419 } while (status == U_BUFFER_OVERFLOW_ERROR); 5420 ucol_close(myCollator); 5421 ucnv_close(utf8cnv); 5422 free(buff); 5423} 5424 5425#endif 5426 5427#if !UCONFIG_NO_LEGACY_CONVERSION 5428static void TestJitterbug1293(){ 5429 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5430 char target[256]; 5431 UErrorCode status = U_ZERO_ERROR; 5432 UConverter* conv=NULL; 5433 int32_t target_cap, bytes_needed, numNeeded = 0; 5434 conv = ucnv_open("shift-jis",&status); 5435 if(U_FAILURE(status)){ 5436 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5437 return; 5438 } 5439 5440 do{ 5441 target_cap =0; 5442 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5443 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5444 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5445 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5446 } 5447 numNeeded = bytes_needed; 5448 } while (status == U_BUFFER_OVERFLOW_ERROR); 5449 if(U_FAILURE(status)){ 5450 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5451 return; 5452 } 5453 ucnv_close(conv); 5454} 5455#endif 5456 5457static void TestJB5275_1(){ 5458 5459 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5460 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5461 /* Switch script: */ 5462 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5463 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5464 "\xEF\x40\x3B\xB3\x0A"; 5465 static const UChar expected[] ={ 5466 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5467 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5468 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5469 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5470 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5471 }; 5472 5473 UErrorCode status = U_ZERO_ERROR; 5474 UConverter* conv = ucnv_open("iscii-gur", &status); 5475 UChar dest[100] = {'\0'}; 5476 UChar* target = dest; 5477 UChar* targetLimit = dest+100; 5478 const char* source = data; 5479 const char* sourceLimit = data+strlen(data); 5480 const UChar* exp = expected; 5481 5482 if (U_FAILURE(status)) { 5483 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5484 return; 5485 } 5486 5487 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5488 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5489 if(U_FAILURE(status)){ 5490 log_err("conversion failed: %s \n", u_errorName(status)); 5491 } 5492 targetLimit = target; 5493 target = dest; 5494 printUSeq(target, targetLimit-target); 5495 while(target<targetLimit){ 5496 if(*exp!=*target){ 5497 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5498 } 5499 target++; 5500 exp++; 5501 } 5502 ucnv_close(conv); 5503} 5504 5505static void TestJB5275(){ 5506 static const char* data = 5507 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5508 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5509 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5510 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5511 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5512 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5513 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5514 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5515 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5516 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5517 static const UChar expected[] ={ 5518 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5519 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5520 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5521 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5522 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5523 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5524 }; 5525 5526 UErrorCode status = U_ZERO_ERROR; 5527 UConverter* conv = ucnv_open("iscii", &status); 5528 UChar dest[100] = {'\0'}; 5529 UChar* target = dest; 5530 UChar* targetLimit = dest+100; 5531 const char* source = data; 5532 const char* sourceLimit = data+strlen(data); 5533 const UChar* exp = expected; 5534 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5535 if(U_FAILURE(status)){ 5536 log_err("conversion failed: %s \n", u_errorName(status)); 5537 } 5538 targetLimit = target; 5539 target = dest; 5540 5541 printUSeq(target, targetLimit-target); 5542 5543 while(target<targetLimit){ 5544 if(*exp!=*target){ 5545 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5546 } 5547 target++; 5548 exp++; 5549 } 5550 ucnv_close(conv); 5551} 5552 5553static void 5554TestIsFixedWidth() { 5555 UErrorCode status = U_ZERO_ERROR; 5556 UConverter *cnv = NULL; 5557 int32_t i; 5558 5559 const char *fixedWidth[] = { 5560 "US-ASCII", 5561 "UTF32", 5562 "ibm-5478_P100-1995" 5563 }; 5564 5565 const char *notFixedWidth[] = { 5566 "GB18030", 5567 "UTF8", 5568 "windows-949-2000", 5569 "UTF16" 5570 }; 5571 5572 for (i = 0; i < LENGTHOF(fixedWidth); i++) { 5573 cnv = ucnv_open(fixedWidth[i], &status); 5574 if (cnv == NULL || U_FAILURE(status)) { 5575 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5576 continue; 5577 } 5578 5579 if (!ucnv_isFixedWidth(cnv, &status)) { 5580 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5581 } 5582 ucnv_close(cnv); 5583 } 5584 5585 for (i = 0; i < LENGTHOF(notFixedWidth); i++) { 5586 cnv = ucnv_open(notFixedWidth[i], &status); 5587 if (cnv == NULL || U_FAILURE(status)) { 5588 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5589 continue; 5590 } 5591 5592 if (ucnv_isFixedWidth(cnv, &status)) { 5593 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5594 } 5595 ucnv_close(cnv); 5596 } 5597} 5598