1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ***************************************************************************/ 8/***************************************************************************** 9* 10* File NCNVCBTS 11* 12* Modification History: 13* Name Date Description 14* Madhu Katragadda 06/23/2000 Tests for Conveter FallBack API and Functionality 15****************************************************************************** 16*/ 17#include <stdio.h> 18#include "unicode/uloc.h" 19#include "unicode/ucnv.h" 20#include "unicode/ucnv_err.h" 21#include "cintltst.h" 22#include "unicode/utypes.h" 23#include "unicode/ustring.h" 24#include "ncnvfbts.h" 25#include "cmemory.h" 26#include "cstring.h" 27 28#if !UCONFIG_NO_LEGACY_CONVERSION 29#define NEW_MAX_BUFFER 999 30 31 32#define nct_min(x,y) ((x<y) ? x : y) 33 34static int32_t gInBufferSize = 0; 35static int32_t gOutBufferSize = 0; 36static char gNuConvTestName[1024]; 37 38static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 39{ 40 if(cnv && cnv[0] == '@') { 41 return ucnv_openPackage("testdata", cnv+1, err); 42 } else { 43 return ucnv_open(cnv, err); 44 } 45} 46 47 48static void printSeq(const unsigned char* a, int len) 49{ 50 int i=0; 51 log_verbose("{"); 52 while (i<len) 53 log_verbose("0x%02x ", a[i++]); 54 log_verbose("}\n"); 55} 56 57static void printUSeq(const UChar* a, int len) 58{ 59 int i=0; 60 log_verbose("{U+"); 61 while (i<len) 62 log_verbose("0x%04x ", a[i++]); 63 log_verbose("}\n"); 64} 65 66static void printSeqErr(const unsigned char* a, int len) 67{ 68 int i=0; 69 fprintf(stderr, "{"); 70 while (i<len) 71 fprintf(stderr, "0x%02x ", a[i++]); 72 fprintf(stderr, "}\n"); 73} 74 75static void printUSeqErr(const UChar* a, int len) 76{ 77 int i=0; 78 fprintf(stderr, "{U+"); 79 while (i<len) 80 fprintf(stderr, "0x%04x ", a[i++]); 81 fprintf(stderr,"}\n"); 82} 83 84static void TestConverterFallBack(void) 85{ 86 TestConvertFallBackWithBufferSizes(10,10); 87 TestConvertFallBackWithBufferSizes(2,3); 88 TestConvertFallBackWithBufferSizes(3,2); 89 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,1); 90 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,2); 91 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,3); 92 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,4); 93 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,5); 94 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,6); 95 TestConvertFallBackWithBufferSizes(1,NEW_MAX_BUFFER); 96 TestConvertFallBackWithBufferSizes(2,NEW_MAX_BUFFER); 97 TestConvertFallBackWithBufferSizes(3,NEW_MAX_BUFFER); 98 TestConvertFallBackWithBufferSizes(4,NEW_MAX_BUFFER); 99 TestConvertFallBackWithBufferSizes(5,NEW_MAX_BUFFER); 100 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 101 102} 103 104 105void addTestConverterFallBack(TestNode** root); 106 107void addTestConverterFallBack(TestNode** root) 108{ 109#if !UCONFIG_NO_FILE_IO 110 addTest(root, &TestConverterFallBack, "tsconv/ncnvfbts/TestConverterFallBack"); 111#endif 112 113} 114 115 116/* Note that this test already makes use of statics, so it's not really 117 multithread safe. 118 This convenience function lets us make the error messages actually useful. 119*/ 120 121static void setNuConvTestName(const char *codepage, const char *direction) 122{ 123 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 124 codepage, 125 direction, 126 (int)gInBufferSize, 127 (int)gOutBufferSize); 128} 129 130 131static UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 132 const char *codepage, UBool fallback, const int32_t *expectOffsets) 133{ 134 135 136 UErrorCode status = U_ZERO_ERROR; 137 UConverter *conv = 0; 138 char junkout[NEW_MAX_BUFFER]; /* FIX */ 139 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 140 const UChar *src; 141 char *end; 142 char *targ; 143 int32_t *offs; 144 int i; 145 int32_t realBufferSize; 146 char *realBufferEnd; 147 const UChar *realSourceEnd; 148 const UChar *sourceLimit; 149 UBool checkOffsets = TRUE; 150 UBool doFlush; 151 UBool action=FALSE; 152 char *p; 153 154 155 for(i=0;i<NEW_MAX_BUFFER;i++) 156 junkout[i] = (char)0xF0; 157 for(i=0;i<NEW_MAX_BUFFER;i++) 158 junokout[i] = 0xFF; 159 setNuConvTestName(codepage, "FROM"); 160 161 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 162 gOutBufferSize); 163 164 conv = my_ucnv_open(codepage, &status); 165 if(U_FAILURE(status)) 166 { 167 log_data_err("Couldn't open converter %s\n",codepage); 168 return TRUE; 169 } 170 171 log_verbose("Converter opened..\n"); 172 /*----setting the callback routine----*/ 173 ucnv_setFallback (conv, fallback); 174 action = ucnv_usesFallback(conv); 175 if(action != fallback){ 176 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status)); 177 } 178 /*------------------------*/ 179 src = source; 180 targ = junkout; 181 offs = junokout; 182 183 realBufferSize = UPRV_LENGTHOF(junkout); 184 realBufferEnd = junkout + realBufferSize; 185 realSourceEnd = source + sourceLen; 186 187 if ( gOutBufferSize != realBufferSize ) 188 checkOffsets = FALSE; 189 190 if( gInBufferSize != NEW_MAX_BUFFER ) 191 checkOffsets = FALSE; 192 193 do 194 { 195 end = nct_min(targ + gOutBufferSize, realBufferEnd); 196 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 197 198 doFlush = (UBool)(sourceLimit == realSourceEnd); 199 200 if(targ == realBufferEnd) 201 { 202 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 203 return FALSE; 204 } 205 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 206 207 208 status = U_ZERO_ERROR; 209 210 ucnv_fromUnicode (conv, 211 (char **)&targ, 212 (const char *)end, 213 &src, 214 sourceLimit, 215 checkOffsets ? offs : NULL, 216 doFlush, /* flush if we're at the end of the input data */ 217 &status); 218 219 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (sourceLimit < realSourceEnd) ); 220 221 if(U_FAILURE(status)) 222 { 223 log_err("Problem doing toUnicode, errcode %d %s\n", myErrorName(status), gNuConvTestName); 224 return FALSE; 225 } 226 227 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 228 sourceLen, targ-junkout); 229 if(getTestOption(VERBOSITY_OPTION)) 230 { 231 char junk[9999]; 232 char offset_str[9999]; 233 234 junk[0] = 0; 235 offset_str[0] = 0; 236 for(p = junkout;p<targ;p++) 237 { 238 sprintf(junk + uprv_strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 239 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 240 } 241 242 log_verbose(junk); 243 printSeq((const unsigned char*)expect, expectLen); 244 if ( checkOffsets ) 245 { 246 log_verbose("\nOffsets:"); 247 log_verbose(offset_str); 248 } 249 log_verbose("\n"); 250 } 251 ucnv_close(conv); 252 253 254 if(expectLen != targ-junkout) 255 { 256 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 257 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 258 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 259 printSeqErr((const unsigned char*)expect, expectLen); 260 return FALSE; 261 } 262 263 if (checkOffsets && (expectOffsets != 0) ) 264 { 265 log_verbose("\ncomparing %d offsets..\n", targ-junkout); 266 if(uprv_memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 267 log_err("\ndid not get the expected offsets while %s \n", gNuConvTestName); 268 log_err("Got : "); 269 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 270 for(p=junkout;p<targ;p++) 271 log_err("%d, ", junokout[p-junkout]); 272 log_err("\nExpected: "); 273 for(i=0; i<(targ-junkout); i++) 274 log_err("%d,", expectOffsets[i]); 275 } 276 } 277 278 log_verbose("\n\ncomparing..\n"); 279 if(!memcmp(junkout, expect, expectLen)) 280 { 281 log_verbose("Matches!\n"); 282 return TRUE; 283 } 284 else 285 { 286 log_err("String does not match. %s\n", gNuConvTestName); 287 log_verbose("String does not match. %s\n", gNuConvTestName); 288 printSeqErr((const unsigned char*)junkout, expectLen); 289 printSeqErr((const unsigned char*)expect, expectLen); 290 return FALSE; 291 } 292} 293 294static UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 295 const char *codepage, UBool fallback, const int32_t *expectOffsets) 296{ 297 UErrorCode status = U_ZERO_ERROR; 298 UConverter *conv = 0; 299 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 300 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 301 const char *src; 302 const char *realSourceEnd; 303 const char *srcLimit; 304 UChar *targ; 305 UChar *end; 306 int32_t *offs; 307 int i; 308 UBool checkOffsets = TRUE; 309 char junk[9999]; 310 char offset_str[9999]; 311 UChar *p; 312 UBool action; 313 314 int32_t realBufferSize; 315 UChar *realBufferEnd; 316 317 318 for(i=0;i<NEW_MAX_BUFFER;i++) 319 junkout[i] = 0xFFFE; 320 321 for(i=0;i<NEW_MAX_BUFFER;i++) 322 junokout[i] = -1; 323 324 setNuConvTestName(codepage, "TO"); 325 326 log_verbose("\n========= %s\n", gNuConvTestName); 327 328 conv = my_ucnv_open(codepage, &status); 329 if(U_FAILURE(status)) 330 { 331 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 332 return TRUE; /* because it has been logged */ 333 } 334 335 log_verbose("Converter opened..\n"); 336 337 src = (const char *)source; 338 targ = junkout; 339 offs = junokout; 340 341 realBufferSize = UPRV_LENGTHOF(junkout); 342 realBufferEnd = junkout + realBufferSize; 343 realSourceEnd = src + sourcelen; 344 /*----setting the fallback routine----*/ 345 ucnv_setFallback (conv, fallback); 346 action = ucnv_usesFallback(conv); 347 if(action != fallback){ 348 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status)); 349 } 350 /*-------------------------------------*/ 351 if ( gOutBufferSize != realBufferSize ) 352 checkOffsets = FALSE; 353 354 if( gInBufferSize != NEW_MAX_BUFFER ) 355 checkOffsets = FALSE; 356 357 do 358 { 359 end = nct_min( targ + gOutBufferSize, realBufferEnd); 360 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 361 362 if(targ == realBufferEnd) 363 { 364 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 365 return FALSE; 366 } 367 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 368 369 370 371 status = U_ZERO_ERROR; 372 373 ucnv_toUnicode (conv, 374 &targ, 375 end, 376 (const char **)&src, 377 (const char *)srcLimit, 378 checkOffsets ? offs : NULL, 379 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 380 &status); 381 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (srcLimit < realSourceEnd) ); /* while we just need another buffer */ 382 383 384 if(U_FAILURE(status)) 385 { 386 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 387 return FALSE; 388 } 389 390 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 391 sourcelen, targ-junkout); 392 if(getTestOption(VERBOSITY_OPTION)) 393 { 394 395 junk[0] = 0; 396 offset_str[0] = 0; 397 398 for(p = junkout;p<targ;p++) 399 { 400 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 401 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 402 } 403 404 log_verbose(junk); 405 printUSeq(expect, expectlen); 406 if ( checkOffsets ) 407 { 408 log_verbose("\nOffsets:"); 409 log_verbose(offset_str); 410 } 411 log_verbose("\n"); 412 } 413 ucnv_close(conv); 414 415 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 416 417 if (checkOffsets && (expectOffsets != 0)) 418 { 419 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 420 { 421 log_err("\n\ndid not get the expected offsets while %s \n", gNuConvTestName); 422 log_err("\nGot : "); 423 for(p=junkout;p<targ;p++) 424 log_err("%d, ", junokout[p-junkout]); 425 log_err("\nExpected: "); 426 for(i=0; i<(targ-junkout); i++) 427 log_err("%d,", expectOffsets[i]); 428 log_err(""); 429 for(i=0; i<(targ-junkout); i++) 430 log_err("0x%04X,", junkout[i]); 431 log_err(""); 432 for(i=0; i<(src-(const char *)source); i++) 433 log_err("0x%04X,", (unsigned char)source[i]); 434 } 435 } 436 437 if(!memcmp(junkout, expect, expectlen*2)) 438 { 439 log_verbose("Matches!\n"); 440 return TRUE; 441 } 442 else 443 { 444 log_err("String does not match. %s\n", gNuConvTestName); 445 log_verbose("String does not match. %s\n", gNuConvTestName); 446 printUSeqErr(junkout, expectlen); 447 printf("\n"); 448 printUSeqErr(expect, expectlen); 449 return FALSE; 450 } 451} 452 453 454 455static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize ) 456{ 457 458 static const UChar SBCSText[] = 459 { 0x0021, 0xFF01, 0x0022, 0xFF02, 0x0023, 0xFF03, 0x003A, 0xFF1A, 0x003B, 0xFF1B, 0x003C, 0xFF1C }; 460 /* 21, ?, 22, ?, 23, ?, 3a, ?, 3b, ?, 3c, ? SBCS*/ 461 static const uint8_t expectedNative[] = 462 { 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3c}; 463 static const UChar retrievedSBCSText[]= 464 { 0x0021, 0x0021, 0x0022, 0x0022, 0x0023, 0x0023, 0x003A, 0x003A, 0x003B, 0x003B, 0x003C, 0x003C }; 465 static const int32_t toNativeOffs [] = 466 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b}; 467 static const int32_t fromNativeoffs [] = 468 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; 469 470 471 /* 1363 isn't DBCS, but it has the DBCS section */ 472 static const UChar DBCSText[] = 473 { 0x00a1, 0x00ad, 0x2010, 0x00b7, 0x30fb}; 474 static const uint8_t expectedIBM1363_DBCS[] = 475 { 0xa2, 0xae, 0xa1 ,0xa9, 0xa1, 0xa9,0xa1 ,0xa4, 0xa1, 0xa4}; 476 static const UChar retrievedDBCSText[]= 477 { 0x00a1, 0x2010, 0x2010, 0x30fb, 0x30fb }; 478 static const int32_t toIBM1363Offs_DBCS[] = 479 { 0x00, 0x00, 0x01,0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04}; 480 static const int32_t fromIBM1363offs_DBCS[] = 481 { 0, 2, 4, 6, 8}; 482 483 484 static const UChar MBCSText[] = 485 { 0x0001, 0x263a, 0x2013, 0x2014, 0x263b, 0x0002}; 486 static const uint8_t expectedIBM950[] = 487 { 0x01, 0x01, 0xa1, 0x56, 0xa1, 0x56, 0x02, 0x02}; 488 static const UChar retrievedMBCSText[]= 489 { 0x0001, 0x0001, 0x2014, 0x2014, 0x0002, 0x0002}; 490 static const int32_t toIBM950Offs [] = 491 { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05}; 492 static const int32_t fromIBM950offs [] = 493 { 0, 1, 2, 4, 6, 7}; 494 495 static const UChar MBCSText1363[] = 496 { 0x0005, 497 0xffe8, 498 0x0007, 499 0x2022, 500 0x005c, 501 0x00b7, 502 0x3016, 503 0x30fb, 504 0x9a36}; 505 static const uint8_t expectedIBM1363[] = 506 { 0x05, 507 0x05, 508 0x07, 509 0x07, 510 0x7f, 511 0xa1, 0xa4, 512 0xa1, 0xe0, 513 0xa1, 0xa4, 514 0xf5, 0xe2}; 515 static const UChar retrievedMBCSText1363[]= 516 { 0x0005, 0x0005, 0x0007, 0x0007, 0x001a, 0x30fb, 0x25a1, 0x30fb, 0x9a36}; 517 static const int32_t toIBM1363Offs [] = 518 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08}; 519 static const int32_t fromIBM1363offs [] = 520 { 0, 1, 2, 3, 4, 5, 7, 9, 11}; 521 522 523 524 static const char* nativeCodePage[]={ 525 /*NLCS Mapping*/ 526 "ibm-437", 527 "ibm-850", 528 "ibm-878", 529 "ibm-923", 530 "ibm-1051", 531 "ibm-1089", 532 "ibm-1250", 533 "ibm-1251", 534 "ibm-1253", 535 "ibm-1254", 536 "ibm-1255", 537 "ibm-1256", 538 "ibm-1257", 539 "ibm-1258", 540 "ibm-1276" 541 }; 542 543 int32_t i=0; 544 gInBufferSize = insize; 545 gOutBufferSize = outsize; 546 547 for(i=0; i<UPRV_LENGTHOF(nativeCodePage); i++){ 548 log_verbose("Testing %s\n", nativeCodePage[i]); 549 if(!testConvertFromUnicode(SBCSText, UPRV_LENGTHOF(SBCSText), 550 expectedNative, sizeof(expectedNative), nativeCodePage[i], TRUE, toNativeOffs )) 551 log_err("u-> %s(SBCS) with FallBack did not match.\n", nativeCodePage[i]); 552 553 if(!testConvertToUnicode(expectedNative, sizeof(expectedNative), 554 retrievedSBCSText, UPRV_LENGTHOF(retrievedSBCSText), nativeCodePage[i], TRUE, fromNativeoffs )) 555 log_err("%s->u(SBCS) with Fallback did not match.\n", nativeCodePage[i]); 556 } 557 558 /*DBCS*/ 559 if(!testConvertFromUnicode(DBCSText, UPRV_LENGTHOF(DBCSText), 560 expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), "ibm-1363", TRUE, toIBM1363Offs_DBCS )) 561 log_err("u-> ibm-1363(DBCS portion) with FallBack did not match.\n"); 562 563 if(!testConvertToUnicode(expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), 564 retrievedDBCSText, UPRV_LENGTHOF(retrievedDBCSText),"ibm-1363", TRUE, fromIBM1363offs_DBCS )) 565 log_err("ibm-1363->u(DBCS portion) with Fallback did not match.\n"); 566 567 568 /*MBCS*/ 569 if(!testConvertFromUnicode(MBCSText, UPRV_LENGTHOF(MBCSText), 570 expectedIBM950, sizeof(expectedIBM950), "ibm-950", TRUE, toIBM950Offs )) 571 log_err("u-> ibm-950(MBCS) with FallBack did not match.\n"); 572 573 if(!testConvertToUnicode(expectedIBM950, sizeof(expectedIBM950), 574 retrievedMBCSText, UPRV_LENGTHOF(retrievedMBCSText),"ibm-950", TRUE, fromIBM950offs )) 575 log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); 576 577 /*commented untill data table is available*/ 578 log_verbose("toUnicode fallback with fallback data for MBCS\n"); 579 { 580 const uint8_t IBM950input[] = { 581 0xf4, 0x87, 0xa4, 0x4a, 0xf4, 0x88, 0xa4, 0x4b, 582 0xf9, 0x92, 0xdc, 0xb0, }; 583 UChar expectedUnicodeText[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9}; 584 int32_t fromIBM950inputOffs [] = { 0, 2, 4, 6, 8, 10}; 585 /* for testing reverse fallback behavior */ 586 UChar expectedFallbackFalse[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9}; 587 588 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input), 589 expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"ibm-950", TRUE, fromIBM950inputOffs )) 590 log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); 591 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input), 592 expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"ibm-950", FALSE, fromIBM950inputOffs )) 593 log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); 594 595 } 596 log_verbose("toUnicode fallback with fallback data for euc-tw\n"); 597 { 598 const uint8_t euc_tw_input[] = { 599 0xA7, 0xCC, 0x8E, 0xA2, 0xA1, 0xAB, 600 0xA8, 0xC7, 0xC8, 0xDE, 601 0xA8, 0xCD, 0x8E, 0xA2, 0xA2, 0xEA,}; 602 UChar expectedUnicodeText[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278}; 603 int32_t from_euc_tw_offs [] = { 0, 2, 6, 8, 10, 12}; 604 /* for testing reverse fallback behavior */ 605 UChar expectedFallbackFalse[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278}; 606 607 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input), 608 expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"euc-tw", TRUE, from_euc_tw_offs )) 609 log_err("from euc-tw->u with Fallback did not match.\n"); 610 611 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input), 612 expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"euc-tw", FALSE, from_euc_tw_offs )) 613 log_err("from euc-tw->u with Fallback false did not match.\n"); 614 615 616 } 617 log_verbose("fromUnicode to euc-tw with fallback data euc-tw\n"); 618 { 619 UChar inputText[]= { 0x0001, 0x008e, 0x203e, 0x2223, 0xff5c, 0x5296, 620 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278, 0xEDEC}; 621 const uint8_t expected_euc_tw[] = { 622 0x01, 0x1a, 0xa2, 0xa3, 623 0xa2, 0xde, 0xa2, 0xde, 624 0x8e, 0xa2, 0xe5, 0xb9, 625 0x8e, 0xa2, 0xa1, 0xab, 0x8e, 0xa2, 0xa1, 0xab, 626 0xc8, 0xde, 0xc8, 0xde, 627 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xa2, 0xa2, 0xea, 628 0x8e, 0xac, 0xc6, 0xf7}; 629 int32_t to_euc_tw_offs [] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 630 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12}; 631 632 if(!testConvertFromUnicode(inputText, UPRV_LENGTHOF(inputText), 633 expected_euc_tw, sizeof(expected_euc_tw), "euc-tw", TRUE, to_euc_tw_offs )) 634 log_err("u-> euc-tw with FallBack did not match.\n"); 635 636 } 637 638 /*MBCS 1363*/ 639 if(!testConvertFromUnicode(MBCSText1363, UPRV_LENGTHOF(MBCSText1363), 640 expectedIBM1363, sizeof(expectedIBM1363), "ibm-1363", TRUE, toIBM1363Offs )) 641 log_err("u-> ibm-1363(MBCS) with FallBack did not match.\n"); 642 643 if(!testConvertToUnicode(expectedIBM1363, sizeof(expectedIBM1363), 644 retrievedMBCSText1363, UPRV_LENGTHOF(retrievedMBCSText1363),"ibm-1363", TRUE, fromIBM1363offs )) 645 log_err("ibm-1363->u(MBCS) with Fallback did not match.\n"); 646 647 648 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 649 which is test file for MBCS conversion with single-byte codepage data.*/ 650 { 651 652 /* MBCS with single byte codepage data test1.ucm*/ 653 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x0003}; 654 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0x08, 0xff,}; 655 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, 7}; 656 657 const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09}; 658 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd, 0xfffe}; 659 int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4,5}; 660 661 /*from Unicode*/ 662 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), 663 expectedtest1, sizeof(expectedtest1), "@test1", TRUE, totest1Offs )) 664 log_err("u-> test1(MBCS conversion with single-byte) did not match.\n"); 665 666 /*to Unicode*/ 667 if(!testConvertToUnicode(test1input, sizeof(test1input), 668 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test1", TRUE, fromtest1Offs )) 669 log_err("test1(MBCS conversion with single-byte) -> u did not match.\n"); 670 671 } 672 673 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 674 which is test file for MBCS conversion with three-byte codepage data.*/ 675 { 676 677 /* MBCS with three byte codepage data test3.ucm*/ 678 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x000b, 0xd84d, 0xdc56, 0x000e, 0x0003, }; 679 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x07, 0xff, 0x01, 0x02, 0x0b, 0x01, 0x02, 0x0a, 0xff, 0xff,}; 680 int32_t totest3Offs[] = { 0, 1, 2, 3, 5, 7, 7, 7, 8, 8, 8, 10, 11}; 681 682 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 683 0x01, 0x02, 0x0e, 0x01, 0x02, 0x0d, 0x03, 0x01, 0x02, 0x0f,}; 684 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 685 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd }; 686 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10, 13, 13, 16, 17}; 687 688 /*from Unicode*/ 689 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), 690 expectedtest3, sizeof(expectedtest3), "@test3", TRUE, totest3Offs )) 691 log_err("u-> test3(MBCS conversion with three-byte) did not match.\n"); 692 693 /*to Unicode*/ 694 if(!testConvertToUnicode(test3input, sizeof(test3input), 695 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", TRUE, fromtest3Offs )) 696 log_err("test3(MBCS conversion with three-byte) -> u did not match.\n"); 697 698 } 699 700 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 701 which is test file for MBCS conversion with four-byte codepage data.*/ 702 { 703 704 /* MBCS with three byte codepage data test4.ucm*/ 705 const UChar unicodeInput[] = 706 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 707 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x000f}; 708 const uint8_t expectedtest4[] = 709 { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0xff, 710 0x01, 0x02, 0x03, 0x0a, 0xff, 0xff, 0xff}; 711 int32_t totest4Offs[] = 712 { 0, 1, 2, 3, 3, 3, 3, 4, 6, 8, 8, 8, 8, 10, 11, 13}; 713 714 const uint8_t test4input[] = 715 { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x08, 716 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,}; 717 const UChar expectedUnicode[] = 718 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 719 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd}; 720 int32_t fromtest4Offs[] = 721 { 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,}; 722 723 /*from Unicode*/ 724 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), 725 expectedtest4, sizeof(expectedtest4), "@test4", TRUE, totest4Offs )) 726 log_err("u-> test4(MBCS conversion with four-byte) did not match.\n"); 727 728 /*to Unicode*/ 729 if(!testConvertToUnicode(test4input, sizeof(test4input), 730 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", TRUE, fromtest4Offs )) 731 log_err("test4(MBCS conversion with four-byte) -> u did not match.\n"); 732 733 } 734 /* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/ 735 { 736 const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E }; 737 const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f }; 738 int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 }; 739 const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 }; 740 const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c }; 741 int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 }; 742 /*from Unicode*/ 743 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), 744 expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs )) 745 log_err("u-> ibm-1371(MBCS conversion with single-byte) did not match.,\n"); 746 /*to Unicode*/ 747 if(!testConvertToUnicode(test1input, sizeof(test1input), 748 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "ibm-1371", TRUE, fromtest1Offs )) 749 log_err("ibm-1371(MBCS conversion with single-byte) -> u did not match.,\n"); 750 } 751 752} 753#endif 754