nccbtst.c revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2006, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/* 7******************************************************************************** 8* File NCCBTST.C 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda 7/21/1999 Testing error callback routines 13******************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <ctype.h> 19#include "cstring.h" 20#include "unicode/uloc.h" 21#include "unicode/ucnv.h" 22#include "unicode/ucnv_err.h" 23#include "cintltst.h" 24#include "unicode/utypes.h" 25#include "unicode/ustring.h" 26#include "nccbtst.h" 27#include "unicode/ucnv_cb.h" 28#define NEW_MAX_BUFFER 999 29 30#define nct_min(x,y) ((x<y) ? x : y) 31#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) 32 33static int32_t gInBufferSize = 0; 34static int32_t gOutBufferSize = 0; 35static char gNuConvTestName[1024]; 36 37static void printSeq(const uint8_t* a, int len) 38{ 39 int i=0; 40 log_verbose("\n{"); 41 while (i<len) 42 log_verbose("0x%02X, ", a[i++]); 43 log_verbose("}\n"); 44} 45 46static void printUSeq(const UChar* a, int len) 47{ 48 int i=0; 49 log_verbose("{"); 50 while (i<len) 51 log_verbose(" 0x%04x, ", a[i++]); 52 log_verbose("}\n"); 53} 54 55static void printSeqErr(const uint8_t* a, int len) 56{ 57 int i=0; 58 fprintf(stderr, "{"); 59 while (i<len) 60 fprintf(stderr, " 0x%02x, ", a[i++]); 61 fprintf(stderr, "}\n"); 62} 63 64static void printUSeqErr(const UChar* a, int len) 65{ 66 int i=0; 67 fprintf(stderr, "{"); 68 while (i<len) 69 fprintf(stderr, "0x%04x, ", a[i++]); 70 fprintf(stderr,"}\n"); 71} 72 73static void setNuConvTestName(const char *codepage, const char *direction) 74{ 75 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 76 codepage, 77 direction, 78 (int)gInBufferSize, 79 (int)gOutBufferSize); 80} 81 82 83static void TestCallBackFailure(void); 84 85void addTestConvertErrorCallBack(TestNode** root); 86 87void addTestConvertErrorCallBack(TestNode** root) 88{ 89 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); 90 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); 91 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); 92 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); 93 94#if !UCONFIG_NO_LEGACY_CONVERSION 95 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); 96 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); 97#endif 98 99 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); 100} 101 102static void TestSkipCallBack() 103{ 104 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 105 TestSkip(1,NEW_MAX_BUFFER); 106 TestSkip(1,1); 107 TestSkip(NEW_MAX_BUFFER, 1); 108} 109 110static void TestStopCallBack() 111{ 112 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 113 TestStop(1,NEW_MAX_BUFFER); 114 TestStop(1,1); 115 TestStop(NEW_MAX_BUFFER, 1); 116} 117 118static void TestSubCallBack() 119{ 120 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 121 TestSub(1,NEW_MAX_BUFFER); 122 TestSub(1,1); 123 TestSub(NEW_MAX_BUFFER, 1); 124 125#if !UCONFIG_NO_LEGACY_CONVERSION 126 TestEBCDIC_STATEFUL_Sub(1, 1); 127 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); 128 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); 129 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 130#endif 131} 132 133static void TestSubWithValueCallBack() 134{ 135 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 136 TestSubWithValue(1,NEW_MAX_BUFFER); 137 TestSubWithValue(1,1); 138 TestSubWithValue(NEW_MAX_BUFFER, 1); 139} 140 141#if !UCONFIG_NO_LEGACY_CONVERSION 142static void TestLegalAndOtherCallBack() 143{ 144 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 145 TestLegalAndOthers(1,NEW_MAX_BUFFER); 146 TestLegalAndOthers(1,1); 147 TestLegalAndOthers(NEW_MAX_BUFFER, 1); 148} 149 150static void TestSingleByteCallBack() 151{ 152 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 153 TestSingleByte(1,NEW_MAX_BUFFER); 154 TestSingleByte(1,1); 155 TestSingleByte(NEW_MAX_BUFFER, 1); 156} 157#endif 158 159static void TestSkip(int32_t inputsize, int32_t outputsize) 160{ 161 static const uint8_t expskipIBM_949[]= { 162 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 163 164 static const uint8_t expskipIBM_943[] = { 165 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; 166 167 static const uint8_t expskipIBM_930[] = { 168 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; 169 170 gInBufferSize = inputsize; 171 gOutBufferSize = outputsize; 172 173 /*From Unicode*/ 174 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); 175 176#if !UCONFIG_NO_LEGACY_CONVERSION 177 { 178 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 179 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 180 181 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; 182 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; 183 184 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 185 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", 186 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) 187 log_err("u-> ibm-949 with skip did not match.\n"); 188 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 189 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", 190 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) 191 log_err("u-> ibm-943 with skip did not match.\n"); 192 } 193 194 { 195 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; 196 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; 197 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; 198 199 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ 200 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, 201 fromUBytes, sizeof(fromUBytes), 202 "ibm-930", 203 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, 204 NULL, 0) 205 ) { 206 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); 207 } 208 } 209#endif 210 211 { 212 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 213 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; 214 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; 215 216 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 217 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; 218 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; 219 220 /* US-ASCII */ 221 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 222 usasciiFromUBytes, sizeof(usasciiFromUBytes), 223 "US-ASCII", 224 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 225 NULL, 0) 226 ) { 227 log_err("u->US-ASCII with skip did not match.\n"); 228 } 229 230#if !UCONFIG_NO_LEGACY_CONVERSION 231 /* SBCS NLTC codepage 367 for US-ASCII */ 232 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 233 usasciiFromUBytes, sizeof(usasciiFromUBytes), 234 "ibm-367", 235 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 236 NULL, 0) 237 ) { 238 log_err("u->ibm-367 with skip did not match.\n"); 239 } 240#endif 241 242 /* ISO-Latin-1 */ 243 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 244 latin1FromUBytes, sizeof(latin1FromUBytes), 245 "LATIN_1", 246 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 247 NULL, 0) 248 ) { 249 log_err("u->LATIN_1 with skip did not match.\n"); 250 } 251 252#if !UCONFIG_NO_LEGACY_CONVERSION 253 /* windows-1252 */ 254 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 255 latin1FromUBytes, sizeof(latin1FromUBytes), 256 "windows-1252", 257 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 258 NULL, 0) 259 ) { 260 log_err("u->windows-1252 with skip did not match.\n"); 261 } 262 } 263 264 { 265 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 266 static const uint8_t toIBM943[]= { 0x61, 0x61 }; 267 static const int32_t offset[]= {0, 4}; 268 269 /* EUC_JP*/ 270 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 271 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 272 0x61, 0x8e, 0xe0, 273 }; 274 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; 275 276 /*EUC_TW*/ 277 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 278 static const uint8_t to_euc_tw[]={ 279 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 280 0x61, 0xe6, 0xca, 0x8a, 281 }; 282 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; 283 284 /*ISO-2022-JP*/ 285 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; 286 static const uint8_t to_iso_2022_jp[]={ 287 0x41, 288 0x42, 289 290 }; 291 static const int32_t from_iso_2022_jpOffs [] ={0,2}; 292 293 /*ISO-2022-JP*/ 294 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 295 static const uint8_t to_iso_2022_jp2[]={ 296 0x41, 297 0x43, 298 299 }; 300 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; 301 302 /*ISO-2022-cn*/ 303 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 304 static const uint8_t to_iso_2022_cn[]={ 305 0x41, 0x42 306 }; 307 static const int32_t from_iso_2022_cnOffs [] ={ 308 0, 2 309 }; 310 311 /*ISO-2022-CN*/ 312 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 313 static const uint8_t to_iso_2022_cn1[]={ 314 0x41, 0x43 315 316 }; 317 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; 318 319 /*ISO-2022-kr*/ 320 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 321 static const uint8_t to_iso_2022_kr[]={ 322 0x1b, 0x24, 0x29, 0x43, 323 0x41, 324 0x0e, 0x25, 0x50, 325 0x25, 0x50, 326 0x0f, 0x42, 327 }; 328 static const int32_t from_iso_2022_krOffs [] ={ 329 -1,-1,-1,-1, 330 0, 331 1,1,1, 332 3,3, 333 4,4 334 }; 335 336 /*ISO-2022-kr*/ 337 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 338 static const uint8_t to_iso_2022_kr1[]={ 339 0x1b, 0x24, 0x29, 0x43, 340 0x41, 341 0x0e, 0x25, 0x50, 342 0x25, 0x50, 343 344 }; 345 static const int32_t from_iso_2022_krOffs1 [] ={ 346 -1,-1,-1,-1, 347 0, 348 1,1,1, 349 3,3, 350 351 }; 352 /* HZ encoding */ 353 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 354 355 static const uint8_t to_hz[]={ 356 0x7e, 0x7d, 0x41, 357 0x7e, 0x7b, 0x26, 0x30, 358 0x26, 0x30, 359 0x7e, 0x7d, 0x42, 360 361 }; 362 static const int32_t from_hzOffs [] ={ 363 0,0,0, 364 1,1,1,1, 365 3,3, 366 4,4,4,4 367 }; 368 369 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 370 371 static const uint8_t to_hz1[]={ 372 0x7e, 0x7d, 0x41, 373 0x7e, 0x7b, 0x26, 0x30, 374 0x26, 0x30, 375 376 377 }; 378 static const int32_t from_hzOffs1 [] ={ 379 0,0,0, 380 1,1,1,1, 381 3,3, 382 383 }; 384 385#endif 386 387 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 388 389 static const uint8_t to_SCSU[]={ 390 0x41, 391 0x42 392 393 394 }; 395 static const int32_t from_SCSUOffs [] ={ 396 0, 397 2, 398 399 }; 400 401#if !UCONFIG_NO_LEGACY_CONVERSION 402 /* ISCII */ 403 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 404 static const uint8_t to_iscii[]={ 405 0x41, 406 0x42, 407 }; 408 static const int32_t from_isciiOffs [] ={ 409 0,2, 410 411 }; 412 /*ISCII*/ 413 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 414 static const uint8_t to_iscii1[]={ 415 0x44, 416 0x43, 417 418 }; 419 static const int32_t from_isciiOffs1 [] ={0,2}; 420 421 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 422 toIBM943, sizeof(toIBM943), "ibm-943", 423 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) 424 log_err("u-> ibm-943 with skip did not match.\n"); 425 426 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 427 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 428 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) 429 log_err("u-> euc-jp with skip did not match.\n"); 430 431 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 432 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 433 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) 434 log_err("u-> euc-tw with skip did not match.\n"); 435 436 /*iso_2022_jp*/ 437 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 438 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 439 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) 440 log_err("u-> iso-2022-jp with skip did not match.\n"); 441 442 /* with context */ 443 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 444 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 445 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 446 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 447 448 /*iso_2022_cn*/ 449 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 450 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 451 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) 452 log_err("u-> iso-2022-cn with skip did not match.\n"); 453 /*with context*/ 454 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), 455 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", 456 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 457 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 458 459 /*iso_2022_kr*/ 460 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 461 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 462 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) 463 log_err("u-> iso-2022-kr with skip did not match.\n"); 464 /*with context*/ 465 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), 466 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", 467 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 468 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 469 470 /*hz*/ 471 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 472 to_hz, sizeof(to_hz), "HZ", 473 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) 474 log_err("u-> HZ with skip did not match.\n"); 475 /*with context*/ 476 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), 477 to_hz1, sizeof(to_hz1), "hz", 478 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 479 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 480#endif 481 482 /*SCSU*/ 483 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 484 to_SCSU, sizeof(to_SCSU), "SCSU", 485 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) 486 log_err("u-> SCSU with skip did not match.\n"); 487 488#if !UCONFIG_NO_LEGACY_CONVERSION 489 /*ISCII*/ 490 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 491 to_iscii, sizeof(to_iscii), "ISCII,version=0", 492 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) 493 log_err("u-> iscii with skip did not match.\n"); 494 /*with context*/ 495 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), 496 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", 497 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 498 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 499#endif 500 } 501 502 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 503 { 504 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 505 0xFB, 0xEE, 0x28, /* from source offset 0 */ 506 0x24, 0x1E, 0x52, 507 0xB2, 508 0x20, 509 0xB3, 510 0xB1, 511 0x0D, 512 0x0A, 513 514 0x20, /* from 8 */ 515 0x00, 516 0xD0, 0x6C, 517 0xB6, 518 0xD8, 0xA5, 519 0x20, 520 0x68, 521 0x59, 522 523 0xF9, 0x28, /* from 16 */ 524 0x6D, 525 0x20, 526 0x73, 527 0xE0, 0x2D, 528 0xDE, 0x43, 529 0xD0, 0x33, 530 0x20, 531 532 0xFA, 0x83, /* from 24 */ 533 0x25, 0x01, 534 0xFB, 0x16, 0x87, 535 0x4B, 0x16, 536 0x20, 537 0xE6, 0xBD, 538 0xEB, 0x5B, 539 0x4B, 0xCC, 540 541 0xF9, 0xA2, /* from 32 */ 542 0xFC, 0x10, 0x3E, 543 0xFE, 0x16, 0x3A, 0x8C, 544 0x20, 545 0xFC, 0x03, 0xAC, 546 547 0x01, /* from 41 */ 548 0xDE, 0x83, 549 0x20, 550 0x09 551 }; 552 static const UChar expected[]={ 553 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ 554 0x0063, 0x0061, 0x000D, 0x000A, 555 556 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ 557 0x0930, 0x0020, 0x0918, 0x0909, 558 559 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ 560 0x4000, 0x4E00, 0x7777, 0x0020, 561 562 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ 563 0x0020, 0xD7A3, 0xDC00, 0xD800, 564 565 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ 566 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 567 568 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ 569 0x0009 570 }; 571 static const int32_t offsets[]={ 572 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, 573 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 574 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 575 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 576 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, 577 41, 42, 42, 43, 44 578 }; 579 580 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ 581 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 582 sampleText, sizeof(sampleText), 583 "BOCU-1", 584 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 585 ) { 586 log_err("u->BOCU-1 with skip did not match.\n"); 587 } 588 } 589 590 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 591 { 592 const uint8_t sampleText[]={ 593 0x61, /* 'a' */ 594 0xc4, 0xb5, /* U+0135 */ 595 0xed, 0x80, 0xa0, /* Hangul U+d020 */ 596 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ 597 0xee, 0x80, 0x80, /* PUA U+e000 */ 598 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ 599 0x62, /* 'b' */ 600 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ 601 0xd0, 0x80 /* U+0400 */ 602 }; 603 UChar expected[]={ 604 0x0061, 605 0x0135, 606 0xd020, 607 0xd801, 0xdc01, 608 0xe000, 609 0xdc01, 610 0x0062, 611 0xd801, 612 0x0400 613 }; 614 int32_t offsets[]={ 615 0, 616 1, 1, 617 2, 2, 2, 618 3, 3, 3, 4, 4, 4, 619 5, 5, 5, 620 6, 6, 6, 621 7, 622 8, 8, 8, 623 9, 9 624 }; 625 626 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ 627 628 /* without offsets */ 629 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 630 sampleText, sizeof(sampleText), 631 "CESU-8", 632 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) 633 ) { 634 log_err("u->CESU-8 with skip did not match.\n"); 635 } 636 637 /* with offsets */ 638 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 639 sampleText, sizeof(sampleText), 640 "CESU-8", 641 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 642 ) { 643 log_err("u->CESU-8 with skip did not match.\n"); 644 } 645 } 646 647 /*to Unicode*/ 648 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); 649 650#if !UCONFIG_NO_LEGACY_CONVERSION 651 { 652 653 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; 654 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 655 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 656 657 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; 658 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; 659 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; 660 661 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), 662 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", 663 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) 664 log_err("ibm-949->u with skip did not match.\n"); 665 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), 666 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", 667 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) 668 log_err("ibm-943->u with skip did not match.\n"); 669 670 671 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), 672 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 673 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) 674 log_err("ibm-930->u with skip did not match.\n"); 675 676 677 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), 678 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 679 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 680 log_err("ibm-930->u with skip did not match.\n"); 681 } 682#endif 683 684 { 685 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; 686 static const UChar usasciiToU[] = { 0x61, 0x31 }; 687 static const int32_t usasciiToUOffsets[] = { 0, 2 }; 688 689 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; 690 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; 691 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; 692 693 /* US-ASCII */ 694 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 695 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 696 "US-ASCII", 697 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 698 NULL, 0) 699 ) { 700 log_err("US-ASCII->u with skip did not match.\n"); 701 } 702 703#if !UCONFIG_NO_LEGACY_CONVERSION 704 /* SBCS NLTC codepage 367 for US-ASCII */ 705 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 706 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 707 "ibm-367", 708 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 709 NULL, 0) 710 ) { 711 log_err("ibm-367->u with skip did not match.\n"); 712 } 713#endif 714 715 /* ISO-Latin-1 */ 716 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 717 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 718 "LATIN_1", 719 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 720 NULL, 0) 721 ) { 722 log_err("LATIN_1->u with skip did not match.\n"); 723 } 724 725#if !UCONFIG_NO_LEGACY_CONVERSION 726 /* windows-1252 */ 727 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 728 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 729 "windows-1252", 730 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 731 NULL, 0) 732 ) { 733 log_err("windows-1252->u with skip did not match.\n"); 734 } 735#endif 736 } 737 738#if !UCONFIG_NO_LEGACY_CONVERSION 739 { 740 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 741 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 742 }; 743 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 744 }; 745 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; 746 747 748 /* euc-jp*/ 749 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 750 0x8f, 0xda, 0xa1, /*unassigned*/ 751 0x8e, 0xe0, 752 }; 753 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; 754 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; 755 756 /*EUC_TW*/ 757 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 758 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 759 0xe6, 0xca, 0x8a, 760 }; 761 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; 762 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; 763 /*iso-2022-jp*/ 764 static const uint8_t sampleTxt_iso_2022_jp[]={ 765 0x41, 766 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ 767 0x1b, 0x28, 0x42, 0x42, 768 769 }; 770 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; 771 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; 772 773 /*iso-2022-cn*/ 774 static const uint8_t sampleTxt_iso_2022_cn[]={ 775 0x0f, 0x41, 0x44, 776 0x1B, 0x24, 0x29, 0x47, 777 0x0E, 0x40, 0x6f, /*unassigned*/ 778 0x0f, 0x42, 779 780 }; 781 782 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; 783 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; 784 785 /*iso-2022-kr*/ 786 static const uint8_t sampleTxt_iso_2022_kr[]={ 787 0x1b, 0x24, 0x29, 0x43, 788 0x41, 789 0x0E, 0x7f, 0x1E, 790 0x0e, 0x25, 0x50, 791 0x0f, 0x51, 792 0x42, 0x43, 793 794 }; 795 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; 796 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; 797 798 /*hz*/ 799 static const uint8_t sampleTxt_hz[]={ 800 0x41, 801 0x7e, 0x7b, 0x26, 0x30, 802 0x7f, 0x1E, /*unassigned*/ 803 0x26, 0x30, 804 0x7e, 0x7d, 0x42, 805 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 806 0x7e, 0x7d, 0x42, 807 }; 808 static const UChar hztoUnicode[]={ 809 0x41, 810 0x03a0, 811 0x03A0, 812 0x42, 813 0x42,}; 814 815 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; 816 817 /*ISCII*/ 818 static const uint8_t sampleTxt_iscii[]={ 819 0x41, 820 0xa1, 821 0xEB, /*unassigned*/ 822 0x26, 823 0x30, 824 0xa2, 825 0xEC, /*unassigned*/ 826 0x42, 827 }; 828 static const UChar isciitoUnicode[]={ 829 0x41, 830 0x0901, 831 0x26, 832 0x30, 833 0x0902, 834 0x42, 835 }; 836 837 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; 838 839 /*LMBCS*/ 840 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, 841 0x12, 0x92, 0xa0, /*unassigned*/ 842 0x12, 0x92, 0xA1, 843 }; 844 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; 845 static const int32_t fromLMBCS[] = {0, 6}; 846 847 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 848 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 849 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 850 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 851 852 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 853 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 854 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 855 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 856 857 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 858 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 859 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) 860 log_err("euc-jp->u with skip did not match.\n"); 861 862 863 864 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 865 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 866 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) 867 log_err("euc-tw->u with skip did not match.\n"); 868 869 870 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 871 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 872 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) 873 log_err("iso-2022-jp->u with skip did not match.\n"); 874 875 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 876 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 877 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) 878 log_err("iso-2022-cn->u with skip did not match.\n"); 879 880 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 881 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 882 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) 883 log_err("iso-2022-kr->u with skip did not match.\n"); 884 885 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 886 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 887 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) 888 log_err("HZ->u with skip did not match.\n"); 889 890 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 891 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 892 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) 893 log_err("iscii->u with skip did not match.\n"); 894 895 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), 896 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", 897 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) 898 log_err("LMBCS->u with skip did not match.\n"); 899 900 } 901#endif 902 903 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); 904 { 905 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 906 0xe0, 0x80, 0x61,}; 907 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; 908 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; 909 910 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 911 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 912 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 913 log_err("utf8->u with skip did not match.\n");; 914 } 915 916 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); 917 { 918 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 919 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; 920 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 921 922 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 923 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 924 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 925 log_err("scsu->u with skip did not match.\n"); 926 } 927 928 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 929 { 930 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 931 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ 932 0x24, 0x1E, 0x52, /* 3 */ 933 0xB2, /* 6 */ 934 0x20, /* 7 */ 935 0x40, 0x07, /* 8 - wrong trail byte */ 936 0xB3, /* 10 */ 937 0xB1, /* 11 */ 938 0xD0, 0x20, /* 12 - wrong trail byte */ 939 0x0D, /* 14 */ 940 0x0A, /* 15 */ 941 0x20, /* 16 */ 942 0x00, /* 17 */ 943 0xD0, 0x6C, /* 18 */ 944 0xB6, /* 20 */ 945 0xD8, 0xA5, /* 21 */ 946 0x20, /* 23 */ 947 0x68, /* 24 */ 948 0x59, /* 25 */ 949 0xF9, 0x28, /* 26 */ 950 0x6D, /* 28 */ 951 0x20, /* 29 */ 952 0x73, /* 30 */ 953 0xE0, 0x2D, /* 31 */ 954 0xDE, 0x43, /* 33 */ 955 0xD0, 0x33, /* 35 */ 956 0x20, /* 37 */ 957 0xFA, 0x83, /* 38 */ 958 0x25, 0x01, /* 40 */ 959 0xFB, 0x16, 0x87, /* 42 */ 960 0x4B, 0x16, /* 45 */ 961 0x20, /* 47 */ 962 0xE6, 0xBD, /* 48 */ 963 0xEB, 0x5B, /* 50 */ 964 0x4B, 0xCC, /* 52 */ 965 0xF9, 0xA2, /* 54 */ 966 0xFC, 0x10, 0x3E, /* 56 */ 967 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ 968 0x20, /* 63 */ 969 0xFC, 0x03, 0xAC, /* 64 */ 970 0xFF, /* 67 - FF just resets the state without encoding anything */ 971 0x01, /* 68 */ 972 0xDE, 0x83, /* 69 */ 973 0x20, /* 71 */ 974 0x09 /* 72 */ 975 }; 976 UChar expected[]={ 977 0xFEFF, 0x0061, 0x0062, 0x0020, 978 0x0063, 0x0061, 0x000D, 0x000A, 979 0x0020, 0x0000, 0x00DF, 0x00E6, 980 0x0930, 0x0020, 0x0918, 0x0909, 981 0x3086, 0x304D, 0x0020, 0x3053, 982 0x4000, 0x4E00, 0x7777, 0x0020, 983 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, 984 0x0020, 0xD7A3, 0xDC00, 0xD800, 985 0xD800, 0xDC00, 0xD845, 0xDDDD, 986 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 987 0xDFFF, 0x0001, 0x0E40, 0x0020, 988 0x0009 989 }; 990 int32_t offsets[]={ 991 0, 3, 6, 7, /* skip 8, */ 992 10, 11, /* skip 12, */ 993 14, 15, 16, 17, 18, 994 20, 21, 23, 24, 25, 26, 28, 29, 995 30, 31, 33, 35, 37, 38, 996 40, 42, 45, 47, 48, 997 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, 998 63, 64, /* trail */ 64, /* reset only 67, */ 999 68, 69, 1000 71, 72 1001 }; 1002 1003 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1004 expected, ARRAY_LENGTH(expected), "BOCU-1", 1005 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1006 ) { 1007 log_err("BOCU-1->u with skip did not match.\n"); 1008 } 1009 } 1010 1011 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 1012 { 1013 const uint8_t sampleText[]={ 1014 0x61, /* 0 'a' */ 1015 0xc0, 0x80, /* 1 non-shortest form */ 1016 0xc4, 0xb5, /* 3 U+0135 */ 1017 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ 1018 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ 1019 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ 1020 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ 1021 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ 1022 0x62, /* 24 'b' */ 1023 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ 1024 0xed, 0xa0, /* 28 incomplete sequence */ 1025 0xd0, 0x80 /* 30 U+0400 */ 1026 }; 1027 UChar expected[]={ 1028 0x0061, 1029 /* skip */ 1030 0x0135, 1031 0xd020, 1032 0xd801, 0xdc01, 1033 0xe000, 1034 0xdc01, 1035 /* skip */ 1036 0x0062, 1037 0xd801, 1038 0x0400 1039 }; 1040 int32_t offsets[]={ 1041 0, 1042 /* skip 1, */ 1043 3, 1044 5, 1045 8, 11, 1046 14, 1047 17, 1048 /* skip 20, 20, */ 1049 24, 1050 25, 1051 /* skip 28 */ 1052 30 1053 }; 1054 1055 /* without offsets */ 1056 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1057 expected, ARRAY_LENGTH(expected), "CESU-8", 1058 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) 1059 ) { 1060 log_err("CESU-8->u with skip did not match.\n"); 1061 } 1062 1063 /* with offsets */ 1064 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1065 expected, ARRAY_LENGTH(expected), "CESU-8", 1066 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1067 ) { 1068 log_err("CESU-8->u with skip did not match.\n"); 1069 } 1070 } 1071} 1072 1073static void TestStop(int32_t inputsize, int32_t outputsize) 1074{ 1075 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1076 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1077 1078 static const uint8_t expstopIBM_949[]= { 1079 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; 1080 1081 static const uint8_t expstopIBM_943[] = { 1082 0x9f, 0xaf, 0x9f, 0xb1}; 1083 1084 static const uint8_t expstopIBM_930[] = { 1085 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; 1086 1087 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; 1088 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; 1089 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; 1090 1091 1092 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; 1093 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; 1094 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; 1095 1096 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; 1097 static const int32_t fromIBM943Offs [] = { 0, 2}; 1098 static const int32_t fromIBM930Offs [] = { 1, 3}; 1099 1100 gInBufferSize = inputsize; 1101 gOutBufferSize = outputsize; 1102 1103 /*From Unicode*/ 1104 1105#if !UCONFIG_NO_LEGACY_CONVERSION 1106 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1107 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", 1108 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) 1109 log_err("u-> ibm-949 with stop did not match.\n"); 1110 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1111 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", 1112 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) 1113 log_err("u-> ibm-943 with stop did not match.\n"); 1114 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1115 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", 1116 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) 1117 log_err("u-> ibm-930 with stop did not match.\n"); 1118 1119 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); 1120 { 1121 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1122 static const uint8_t toIBM943[]= { 0x61,}; 1123 static const int32_t offset[]= {0,} ; 1124 1125 /*EUC_JP*/ 1126 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1127 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; 1128 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; 1129 1130 /*EUC_TW*/ 1131 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1132 static const uint8_t to_euc_tw[]={ 1133 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; 1134 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; 1135 1136 /*ISO-2022-JP*/ 1137 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; 1138 static const uint8_t to_iso_2022_jp[]={ 1139 0x41, 1140 1141 }; 1142 static const int32_t from_iso_2022_jpOffs [] ={0,}; 1143 1144 /*ISO-2022-cn*/ 1145 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1146 static const uint8_t to_iso_2022_cn[]={ 1147 0x41, 1148 1149 }; 1150 static const int32_t from_iso_2022_cnOffs [] ={ 1151 0,0, 1152 2,2, 1153 }; 1154 1155 /*ISO-2022-kr*/ 1156 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 1157 static const uint8_t to_iso_2022_kr[]={ 1158 0x1b, 0x24, 0x29, 0x43, 1159 0x41, 1160 0x0e, 0x25, 0x50, 1161 }; 1162 static const int32_t from_iso_2022_krOffs [] ={ 1163 -1,-1,-1,-1, 1164 0, 1165 1,1,1, 1166 }; 1167 1168 /* HZ encoding */ 1169 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1170 1171 static const uint8_t to_hz[]={ 1172 0x7e, 0x7d, 0x41, 1173 0x7e, 0x7b, 0x26, 0x30, 1174 1175 }; 1176 static const int32_t from_hzOffs [] ={ 1177 0, 0,0, 1178 1,1,1,1, 1179 }; 1180 1181 /*ISCII*/ 1182 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1183 static const uint8_t to_iscii[]={ 1184 0x41, 1185 }; 1186 static const int32_t from_isciiOffs [] ={ 1187 0, 1188 }; 1189 1190 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1191 toIBM943, sizeof(toIBM943), "ibm-943", 1192 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) 1193 log_err("u-> ibm-943 with stop did not match.\n"); 1194 1195 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1196 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 1197 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) 1198 log_err("u-> euc-jp with stop did not match.\n"); 1199 1200 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1201 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1202 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1203 log_err("u-> euc-tw with stop did not match.\n"); 1204 1205 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1206 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1207 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1208 log_err("u-> iso-2022-jp with stop did not match.\n"); 1209 1210 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1211 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1212 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1213 log_err("u-> iso-2022-jp with stop did not match.\n"); 1214 1215 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 1216 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 1217 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) 1218 log_err("u-> iso-2022-cn with stop did not match.\n"); 1219 1220 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 1221 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 1222 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) 1223 log_err("u-> iso-2022-kr with stop did not match.\n"); 1224 1225 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 1226 to_hz, sizeof(to_hz), "HZ", 1227 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) 1228 log_err("u-> HZ with stop did not match.\n");\ 1229 1230 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 1231 to_iscii, sizeof(to_iscii), "ISCII,version=0", 1232 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) 1233 log_err("u-> iscii with stop did not match.\n"); 1234 1235 1236 } 1237#endif 1238 1239 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); 1240 { 1241 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1242 1243 static const uint8_t to_SCSU[]={ 1244 0x41, 1245 1246 }; 1247 int32_t from_SCSUOffs [] ={ 1248 0, 1249 1250 }; 1251 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1252 to_SCSU, sizeof(to_SCSU), "SCSU", 1253 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) 1254 log_err("u-> SCSU with skip did not match.\n"); 1255 1256 } 1257 1258 /*to Unicode*/ 1259 1260#if !UCONFIG_NO_LEGACY_CONVERSION 1261 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), 1262 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", 1263 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) 1264 log_err("ibm-949->u with stop did not match.\n"); 1265 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), 1266 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", 1267 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) 1268 log_err("ibm-943->u with stop did not match.\n"); 1269 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), 1270 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", 1271 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) 1272 log_err("ibm-930->u with stop did not match.\n"); 1273 1274 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); 1275 { 1276 1277 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1278 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1279 }; 1280 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; 1281 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; 1282 1283 1284 /*EUC-JP*/ 1285 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1286 0x8f, 0xda, 0xa1, /*unassigned*/ 1287 0x8e, 0xe0, 1288 }; 1289 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; 1290 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; 1291 1292 /*EUC_TW*/ 1293 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1294 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1295 0xe6, 0xca, 0x8a, 1296 }; 1297 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; 1298 int32_t from_euc_twOffs [] ={ 0, 1, 3}; 1299 1300 1301 1302 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1303 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1304 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1305 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); 1306 1307 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1308 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1309 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) 1310 log_err("euc-jp->u with stop did not match.\n"); 1311 1312 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1313 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1314 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1315 log_err("euc-tw->u with stop did not match.\n"); 1316 } 1317#endif 1318 1319 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); 1320 { 1321 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1322 0xe0, 0x80, 0x61,}; 1323 static const UChar expected1[] = { 0x0031, 0x4e8c,}; 1324 static const int32_t offsets1[] = { 0x0000, 0x0001}; 1325 1326 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1327 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1328 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1329 log_err("utf8->u with stop did not match.\n");; 1330 } 1331 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); 1332 { 1333 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; 1334 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; 1335 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; 1336 1337 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1338 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1339 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1340 log_err("scsu->u with stop did not match.\n");; 1341 } 1342 1343} 1344 1345static void TestSub(int32_t inputsize, int32_t outputsize) 1346{ 1347 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1348 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1349 1350 static const uint8_t expsubIBM_949[] = 1351 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; 1352 1353 static const uint8_t expsubIBM_943[] = { 1354 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; 1355 1356 static const uint8_t expsubIBM_930[] = { 1357 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; 1358 1359 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; 1360 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1361 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1362 1363 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1364 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; 1365 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; 1366 1367 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; 1368 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; 1369 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; 1370 1371 gInBufferSize = inputsize; 1372 gOutBufferSize = outputsize; 1373 1374 /*from unicode*/ 1375 1376#if !UCONFIG_NO_LEGACY_CONVERSION 1377 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1378 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", 1379 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) 1380 log_err("u-> ibm-949 with subst did not match.\n"); 1381 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1382 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", 1383 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) 1384 log_err("u-> ibm-943 with subst did not match.\n"); 1385 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1386 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", 1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) 1388 log_err("u-> ibm-930 with subst did not match.\n"); 1389 1390 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1391 { 1392 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1393 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; 1394 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; 1395 1396 1397 /* EUC_JP*/ 1398 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1399 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1400 0xf4, 0xfe, 0xf4, 0xfe, 1401 0x61, 0x8e, 0xe0, 1402 }; 1403 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; 1404 1405 /*EUC_TW*/ 1406 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1407 static const uint8_t to_euc_tw[]={ 1408 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1409 0xfd, 0xfe, 0xfd, 0xfe, 1410 0x61, 0xe6, 0xca, 0x8a, 1411 }; 1412 1413 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; 1414 1415 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1416 toIBM943, sizeof(toIBM943), "ibm-943", 1417 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) 1418 log_err("u-> ibm-943 with substitute did not match.\n"); 1419 1420 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1421 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 1422 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) 1423 log_err("u-> euc-jp with substitute did not match.\n"); 1424 1425 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1426 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1427 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1428 log_err("u-> euc-tw with substitute did not match.\n"); 1429 } 1430#endif 1431 1432 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1433 { 1434 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1435 1436 const uint8_t to_SCSU[]={ 1437 0x41, 1438 0x0e, 0xff,0xfd, 1439 0x42 1440 1441 1442 }; 1443 int32_t from_SCSUOffs [] ={ 1444 0, 1445 1,1,1, 1446 2, 1447 1448 }; 1449 const uint8_t to_SCSU_1[]={ 1450 0x41, 1451 1452 }; 1453 int32_t from_SCSUOffs_1 [] ={ 1454 0, 1455 1456 }; 1457 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1458 to_SCSU, sizeof(to_SCSU), "SCSU", 1459 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) 1460 log_err("u-> SCSU with substitute did not match.\n"); 1461 1462 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1463 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", 1464 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 1465 log_err("u-> SCSU with substitute did not match.\n"); 1466 } 1467 1468 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1469 { 1470 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; 1471 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, 1472 0xf0, 0x90, 0x90, 0x81, 1473 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 1474 0xef, 0xbf, 0xbf, 0x61, 1475 1476 }; 1477 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; 1478 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), 1479 expectedUTF8, sizeof(expectedUTF8), "utf8", 1480 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { 1481 log_err("u-> utf8 with stop did not match.\n"); 1482 } 1483 } 1484 1485 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1486 { 1487 static const UChar in[]={ 0x0041, 0xfeff }; 1488 1489 static const uint8_t out[]={ 1490#if U_IS_BIG_ENDIAN 1491 0xfe, 0xff, 1492 0x00, 0x41, 1493 0xfe, 0xff 1494#else 1495 0xff, 0xfe, 1496 0x41, 0x00, 1497 0xff, 0xfe 1498#endif 1499 }; 1500 static const int32_t offsets[]={ 1501 -1, -1, 0, 0, 1, 1 1502 }; 1503 1504 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1505 out, sizeof(out), "UTF-16", 1506 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1507 ) { 1508 log_err("u->UTF-16 with substitute did not match.\n"); 1509 } 1510 } 1511 1512 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1513 { 1514 static const UChar in[]={ 0x0041, 0xfeff }; 1515 1516 static const uint8_t out[]={ 1517#if U_IS_BIG_ENDIAN 1518 0x00, 0x00, 0xfe, 0xff, 1519 0x00, 0x00, 0x00, 0x41, 1520 0x00, 0x00, 0xfe, 0xff 1521#else 1522 0xff, 0xfe, 0x00, 0x00, 1523 0x41, 0x00, 0x00, 0x00, 1524 0xff, 0xfe, 0x00, 0x00 1525#endif 1526 }; 1527 static const int32_t offsets[]={ 1528 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 1529 }; 1530 1531 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1532 out, sizeof(out), "UTF-32", 1533 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1534 ) { 1535 log_err("u->UTF-32 with substitute did not match.\n"); 1536 } 1537 } 1538 1539 /*to unicode*/ 1540 1541#if !UCONFIG_NO_LEGACY_CONVERSION 1542 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), 1543 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", 1544 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) 1545 log_err("ibm-949->u with substitute did not match.\n"); 1546 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), 1547 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", 1548 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) 1549 log_err("ibm-943->u with substitute did not match.\n"); 1550 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), 1551 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", 1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) 1553 log_err("ibm-930->u with substitute did not match.\n"); 1554 1555 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1556 { 1557 1558 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1559 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1560 }; 1561 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 1562 }; 1563 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; 1564 1565 1566 /* EUC_JP*/ 1567 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1568 0x8f, 0xda, 0xa1, /*unassigned*/ 1569 0x8e, 0xe0, 0x8a 1570 }; 1571 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; 1572 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; 1573 1574 /*EUC_TW*/ 1575 const uint8_t sampleTxt_euc_tw[]={ 1576 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1577 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1578 0xe6, 0xca, 0x8a, 1579 }; 1580 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; 1581 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; 1582 1583 1584 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1585 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1586 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1587 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); 1588 1589 1590 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1591 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1592 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) 1593 log_err("euc-jp->u with substitute did not match.\n"); 1594 1595 1596 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1597 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1598 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1599 log_err("euc-tw->u with substitute did not match.\n"); 1600 1601 1602 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1603 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1604 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) 1605 log_err("euc-jp->u with substitute did not match.\n"); 1606 } 1607#endif 1608 1609 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1610 { 1611 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1612 0xe0, 0x80, 0x61,}; 1613 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 1614 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 1615 1616 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1617 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1618 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1619 log_err("utf8->u with substitute did not match.\n");; 1620 } 1621 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1622 { 1623 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 1624 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; 1625 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 1626 1627 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1628 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1629 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1630 log_err("scsu->u with stop did not match.\n");; 1631 } 1632 1633#if !UCONFIG_NO_LEGACY_CONVERSION 1634 log_verbose("Testing ibm-930 subchar/subchar1\n"); 1635 { 1636 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; 1637 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; 1638 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1639 1640 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; 1641 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; 1642 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; 1643 1644 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", 1645 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1646 ) { 1647 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); 1648 } 1649 1650 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", 1651 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1652 ) { 1653 log_err("ibm-930->u subchar/subchar1 did not match.\n"); 1654 } 1655 } 1656 1657 log_verbose("Testing GB 18030 with substitute callbacks\n"); 1658 { 1659 static const UChar u2[]={ 1660 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; 1661 static const uint8_t gb2[]={ 1662 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; 1663 static const int32_t offsets2[]={ 1664 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; 1665 1666 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", 1667 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1668 ) { 1669 log_err("gb18030->u with substitute did not match.\n"); 1670 } 1671 } 1672#endif 1673 1674 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); 1675 { 1676 static const uint8_t utf7[]={ 1677 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 1678 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 1679 }; 1680 static const UChar unicode[]={ 1681 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd 1682 }; 1683 static const int32_t offsets[]={ 1684 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23 1685 }; 1686 1687 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", 1688 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1689 ) { 1690 log_err("UTF-7->u with substitute did not match.\n"); 1691 } 1692 } 1693 1694 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); 1695 { 1696 static const uint8_t 1697 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, 1698 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, 1699 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; 1700 1701 static const UChar 1702 out1[]={ 0x4e00, 0xfeff }, 1703 out2[]={ 0x004e, 0xfffe }, 1704 out3[]={ 0xfefd, 0x4e00, 0xfeff }; 1705 1706 static const int32_t 1707 offsets1[]={ 2, 4 }, 1708 offsets2[]={ 2, 4 }, 1709 offsets3[]={ 0, 2, 4 }; 1710 1711 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", 1712 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1713 ) { 1714 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); 1715 } 1716 1717 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", 1718 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1719 ) { 1720 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); 1721 } 1722 1723 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", 1724 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1725 ) { 1726 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); 1727 } 1728 } 1729 1730 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); 1731 { 1732 static const uint8_t 1733 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, 1734 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, 1735 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, 1736 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; 1737 1738 static const UChar 1739 out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff }, 1740 out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe }, 1741 out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd }, 1742 out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 }; 1743 1744 static const int32_t 1745 offsets1[]={ 4, 4, 8 }, 1746 offsets2[]={ 4, 4, 8 }, 1747 offsets3[]={ 0, 4, 4, 8, 12 }, 1748 offsets4[]={ 0, 0, 4, 8 }; 1749 1750 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", 1751 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1752 ) { 1753 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); 1754 } 1755 1756 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", 1757 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1758 ) { 1759 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); 1760 } 1761 1762 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", 1763 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1764 ) { 1765 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); 1766 } 1767 1768 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", 1769 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) 1770 ) { 1771 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); 1772 } 1773 } 1774} 1775 1776static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1777{ 1778 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1779 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1780 1781 const uint8_t expsubwvalIBM_949[]= { 1782 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 1783 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; 1784 1785 const uint8_t expsubwvalIBM_943[]= { 1786 0x9f, 0xaf, 0x9f, 0xb1, 1787 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; 1788 1789 const uint8_t expsubwvalIBM_930[] = { 1790 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; 1791 1792 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; 1793 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; 1794 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ 1795 1796 gInBufferSize = inputsize; 1797 gOutBufferSize = outputsize; 1798 1799 /*from Unicode*/ 1800 1801#if !UCONFIG_NO_LEGACY_CONVERSION 1802 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1803 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", 1804 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) 1805 log_err("u-> ibm-949 with subst with value did not match.\n"); 1806 1807 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1808 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", 1809 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) 1810 log_err("u-> ibm-943 with sub with value did not match.\n"); 1811 1812 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1813 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", 1814 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) 1815 log_err("u-> ibm-930 with subst with value did not match.\n"); 1816 1817 1818 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 1819 { 1820 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1821 static const uint8_t toIBM943[]= { 0x61, 1822 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1823 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1825 0x61 }; 1826 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 1827 1828 1829 /* EUC_JP*/ 1830 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; 1831 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1832 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1833 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1835 0x61, 0x8e, 0xe0, 1836 }; 1837 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 1838 3, 3, 3, 3, 3, 3, 1839 3, 3, 3, 3, 3, 3, 1840 5, 5, 5, 5, 5, 5, 1841 6, 7, 7, 1842 }; 1843 1844 /*EUC_TW*/ 1845 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1846 static const uint8_t to_euc_tw[]={ 1847 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1848 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1849 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1851 0x61, 0xe6, 0xca, 0x8a, 1852 }; 1853 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 1854 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 1855 6, 7, 7, 8, 1856 }; 1857 /*ISO-2022-JP*/ 1858 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; 1859 static const uint8_t to_iso_2022_jp1[]={ 1860 0x1b, 0x24, 0x42, 0x21, 0x21, 1861 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1862 0x1b, 0x24, 0x42, 0x21, 0x22, 1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1864 0x42, 1865 }; 1866 1867 static const int32_t from_iso_2022_jpOffs1 [] ={ 1868 0,0,0,0,0, 1869 1,1,1,1,1,1,1,1,1, 1870 2,2,2,2,2, 1871 3,3,3,3,3,3,3,3,3, 1872 4, 1873 }; 1874 /* surrogate pair*/ 1875 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; 1876 static const uint8_t to_iso_2022_jp2[]={ 1877 0x1b, 0x24, 0x42, 0x21, 0x21, 1878 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1879 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1880 0x1b, 0x24, 0x42, 0x21, 0x22, 1881 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1882 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1883 0x42, 1884 }; 1885 static const int32_t from_iso_2022_jpOffs2 [] ={ 1886 0,0,0,0,0, 1887 1,1,1,1,1,1,1,1,1, 1888 1,1,1,1,1,1, 1889 3,3,3,3,3, 1890 4,4,4,4,4,4,4,4,4, 1891 4,4,4,4,4,4, 1892 6, 1893 }; 1894 1895 /*ISO-2022-cn*/ 1896 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1897 static const uint8_t to_iso_2022_cn[]={ 1898 0x41, 1899 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 1900 0x42, 1901 }; 1902 static const int32_t from_iso_2022_cnOffs [] ={ 1903 0, 1904 1,1,1,1,1,1, 1905 2, 1906 }; 1907 1908 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; 1909 1910 static const uint8_t to_iso_2022_cn4[]={ 1911 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 1912 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1913 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1914 0x0e, 0x21, 0x22, 1915 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1916 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1917 0x42, 1918 }; 1919 static const int32_t from_iso_2022_cnOffs4 [] ={ 1920 0,0,0,0,0,0,0, 1921 1,1,1,1,1,1,1, 1922 1,1,1,1,1,1, 1923 3,3,3, 1924 4,4,4,4,4,4,4, 1925 4,4,4,4,4,4, 1926 6 1927 1928 }; 1929 1930 /*ISO-2022-kr*/ 1931 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 1932 static const uint8_t to_iso_2022_kr2[]={ 1933 0x1b, 0x24, 0x29, 0x43, 1934 0x41, 1935 0x0e, 0x25, 0x50, 1936 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1937 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1938 0x0e, 0x25, 0x50, 1939 0x0f, 0x42, 1940 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1941 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1942 0x43 1943 }; 1944 static const int32_t from_iso_2022_krOffs2 [] ={ 1945 -1,-1,-1,-1, 1946 0, 1947 1,1,1, 1948 2,2,2,2,2,2,2, 1949 2,2,2,2,2,2, 1950 4,4,4, 1951 5,5, 1952 6,6,6,6,6,6, 1953 6,6,6,6,6,6, 1954 8, 1955 }; 1956 1957 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; 1958 static const uint8_t to_iso_2022_kr[]={ 1959 0x1b, 0x24, 0x29, 0x43, 1960 0x41, 1961 0x0e, 0x25, 0x50, 1962 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1963 0x0e, 0x25, 0x50, 1964 0x0f, 0x42, 1965 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1966 0x43 1967 }; 1968 1969 1970 static const int32_t from_iso_2022_krOffs [] ={ 1971 -1,-1,-1,-1, 1972 0, 1973 1,1,1, 1974 2,2,2,2,2,2,2, 1975 3,3,3, 1976 4,4, 1977 5,5,5,5,5,5, 1978 6, 1979 }; 1980 /* HZ encoding */ 1981 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1982 1983 static const uint8_t to_hz[]={ 1984 0x7e, 0x7d, 0x41, 1985 0x7e, 0x7b, 0x26, 0x30, 1986 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ 1987 0x7e, 0x7b, 0x26, 0x30, 1988 0x7e, 0x7d, 0x42, 1989 1990 }; 1991 static const int32_t from_hzOffs [] ={ 1992 0,0,0, 1993 1,1,1,1, 1994 2,2,2,2,2,2,2,2, 1995 3,3,3,3, 1996 4,4,4 1997 }; 1998 1999 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 2000 static const uint8_t to_hz2[]={ 2001 0x7e, 0x7d, 0x41, 2002 0x7e, 0x7b, 0x26, 0x30, 2003 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2004 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2005 0x7e, 0x7b, 0x26, 0x30, 2006 0x7e, 0x7d, 0x42, 2007 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2008 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2009 0x43 2010 }; 2011 static const int32_t from_hzOffs2 [] ={ 2012 0,0,0, 2013 1,1,1,1, 2014 2,2,2,2,2,2,2,2, 2015 2,2,2,2,2,2, 2016 4,4,4,4, 2017 5,5,5, 2018 6,6,6,6,6,6, 2019 6,6,6,6,6,6, 2020 8, 2021 }; 2022 2023 /*ISCII*/ 2024 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; 2025 static const uint8_t to_iscii[]={ 2026 0x41, 2027 0xef, 0x42, 0xa1, 2028 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2029 0xa2, 2030 0x42, 2031 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2032 0x43 2033 }; 2034 2035 2036 static const int32_t from_isciiOffs [] ={ 2037 0, 2038 1,1,1, 2039 2,2,2,2,2,2, 2040 3, 2041 4, 2042 5,5,5,5,5,5, 2043 6, 2044 }; 2045 2046 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 2047 toIBM943, sizeof(toIBM943), "ibm-943", 2048 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) 2049 log_err("u-> ibm-943 with subst with value did not match.\n"); 2050 2051 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 2052 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 2053 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) 2054 log_err("u-> euc-jp with subst with value did not match.\n"); 2055 2056 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 2057 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 2058 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) 2059 log_err("u-> euc-tw with subst with value did not match.\n"); 2060 2061 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2062 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2063 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2064 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2065 2066 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2067 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2068 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2069 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2070 2071 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 2072 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 2073 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) 2074 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2075 /*ESCAPE OPTIONS*/ 2076 { 2077 /* surrogate pair*/ 2078 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; 2079 static const uint8_t to_iso_2022_jp3_v2[]={ 2080 0x1b, 0x24, 0x42, 0x21, 0x21, 2081 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2082 2083 0x1b, 0x24, 0x42, 0x21, 0x22, 2084 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2085 2086 0x42, 2087 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, 2088 }; 2089 2090 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ 2091 0,0,0,0,0, 2092 1,1,1,1,1,1,1,1,1,1,1,1, 2093 2094 3,3,3,3,3, 2095 4,4,4,4,4,4,4,4,4,4,4,4, 2096 2097 6, 2098 7,7,7,7,7,7,7,7,7 2099 }; 2100 2101 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), 2102 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", 2103 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2104 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); 2105 } 2106 { 2107 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2108 static const uint8_t to_iso_2022_cn5_v2[]={ 2109 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2110 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2111 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2112 0x0e, 0x21, 0x22, 2113 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2114 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2115 0x42, 2116 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, 2117 }; 2118 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 2119 0,0,0,0,0,0,0, 2120 1,1,1,1,1,1,1, 2121 1,1,1,1,1,1, 2122 3,3,3, 2123 4,4,4,4,4,4,4, 2124 4,4,4,4,4,4, 2125 6, 2126 7,7,7,7,7,7 2127 }; 2128 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), 2129 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", 2130 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) 2131 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); 2132 2133 } 2134 { 2135 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2136 static const uint8_t to_iso_2022_cn6_v2[]={ 2137 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2138 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2139 0x0e, 0x21, 0x22, 2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2141 0x42, 2142 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d 2143 }; 2144 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 2145 0, 0, 0, 0, 0, 0, 0, 2146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2147 3, 3, 3, 2148 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2149 6, 2150 7, 7, 7, 7, 7, 7, 7, 7, 2151 }; 2152 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), 2153 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", 2154 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) 2155 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); 2156 2157 } 2158 { 2159 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2160 static const uint8_t to_iso_2022_cn7_v2[]={ 2161 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2162 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2163 0x0e, 0x21, 0x22, 2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2165 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, 2166 }; 2167 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 2168 0, 0, 0, 0, 0, 0, 0, 2169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2170 3, 3, 3, 2171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2172 6, 2173 7, 7, 7, 7, 7, 7, 2174 }; 2175 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), 2176 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", 2177 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) 2178 log_err("u-> iso-2022-cn with sub & K did not match.\n"); 2179 2180 } 2181 { 2182 static const uint8_t to_iso_2022_cn4_v3[]={ 2183 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2184 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2185 0x0e, 0x21, 0x22, 2186 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2187 0x42 2188 }; 2189 2190 2191 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ 2192 0,0,0,0,0,0,0, 2193 1,1,1,1,1,1,1,1,1,1,1, 2194 2195 3,3,3, 2196 4,4,4,4,4,4,4,4,4,4,4, 2197 2198 6 2199 2200 }; 2201 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2202 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", 2203 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2204 { 2205 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); 2206 } 2207 } 2208 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 2209 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 2210 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) 2211 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2212 2213 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2214 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", 2215 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) 2216 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2217 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 2218 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 2219 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) 2220 log_err("u-> iso_2022_kr with subst with value did not match.\n"); 2221 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), 2222 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", 2223 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) 2224 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); 2225 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 2226 to_hz, sizeof(to_hz), "HZ", 2227 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) 2228 log_err("u-> hz with subst with value did not match.\n"); 2229 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), 2230 to_hz2, sizeof(to_hz2), "HZ", 2231 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) 2232 log_err("u-> hz with subst with value did not match.\n"); 2233 2234 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 2235 to_iscii, sizeof(to_iscii), "ISCII,version=0", 2236 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) 2237 log_err("u-> iscii with subst with value did not match.\n"); 2238 } 2239#endif 2240 2241 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 2242 /*to Unicode*/ 2243 { 2244#if !UCONFIG_NO_LEGACY_CONVERSION 2245 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 2246 0x81, 0xad, /*unassigned*/ 2247 0x89, 0xd3 }; 2248 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 2249 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 2250 0x7B87}; 2251 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 2252 2253 /* EUC_JP*/ 2254 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 2255 0x8f, 0xda, 0xa1, /*unassigned*/ 2256 0x8e, 0xe0, 2257 }; 2258 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 2259 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, 2260 0x00a2 }; 2261 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, 2262 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2263 9, 2264 }; 2265 2266 /*EUC_TW*/ 2267 static const uint8_t sampleTxt_euc_tw[]={ 2268 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 2269 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 2270 0xe6, 0xca, 0x8a, 2271 }; 2272 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 2273 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, 2274 0x8706, 0x8a, }; 2275 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 2276 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2277 11, 13}; 2278 2279 /*iso-2022-jp*/ 2280 static const uint8_t sampleTxt_iso_2022_jp[]={ 2281 0x1b, 0x28, 0x42, 0x41, 2282 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ 2283 0x1b, 0x28, 0x42, 0x42, 2284 2285 }; 2286 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 }; 2287 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; 2288 2289 /*iso-2022-cn*/ 2290 static const uint8_t sampleTxt_iso_2022_cn[]={ 2291 0x0f, 0x41, 0x44, 2292 0x1B, 0x24, 0x29, 0x47, 2293 0x0E, 0x40, 0x6c, /*unassigned*/ 2294 0x0f, 0x42, 2295 2296 }; 2297 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; 2298 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; 2299 2300 /*iso-2022-kr*/ 2301 static const uint8_t sampleTxt_iso_2022_kr[]={ 2302 0x1b, 0x24, 0x29, 0x43, 2303 0x41, 2304 0x0E, 0x7f, 0x1E, 2305 0x0e, 0x25, 0x50, 2306 0x0f, 0x51, 2307 0x42, 0x43, 2308 2309 }; 2310 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; 2311 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; 2312 2313 /*hz*/ 2314 static const uint8_t sampleTxt_hz[]={ 2315 0x41, 2316 0x7e, 0x7b, 0x26, 0x30, 2317 0x7f, 0x1E, /*unassigned*/ 2318 0x26, 0x30, 2319 0x7e, 0x7d, 0x42, 2320 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 2321 0x7e, 0x7d, 0x42, 2322 }; 2323 static const UChar hztoUnicode[]={ 2324 0x41, 2325 0x03a0, 2326 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2327 0x03A0, 2328 0x42, 2329 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2330 0x42,}; 2331 2332 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; 2333 2334 2335 /*iscii*/ 2336 static const uint8_t sampleTxt_iscii[]={ 2337 0x41, 2338 0x30, 2339 0xEB, /*unassigned*/ 2340 0xa3, 2341 0x42, 2342 0xEC, /*unassigned*/ 2343 0x42, 2344 }; 2345 static const UChar isciitoUnicode[]={ 2346 0x41, 2347 0x30, 2348 0x25, 0x58, 0x45, 0x42, 2349 0x0903, 2350 0x42, 2351 0x25, 0x58, 0x45, 0x43, 2352 0x42,}; 2353 2354 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; 2355#endif 2356 2357 /*UTF8*/ 2358 static const uint8_t sampleTxtUTF8[]={ 2359 0x20, 0x64, 0x50, 2360 0xC2, 0x7E, /* truncated char */ 2361 0x20, 2362 0xE0, 0xB5, 0x7E, /* truncated char */ 2363 0x40, 2364 }; 2365 static const UChar UTF8ToUnicode[]={ 2366 0x0020, 0x0064, 0x0050, 2367 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ 2368 0x0020, 2369 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, 2370 0x0040 2371 }; 2372 static const int32_t fromUTF8[] = { 2373 0, 1, 2, 2374 3, 3, 3, 3, 4, 2375 5, 2376 6, 6, 6, 6, 6, 6, 6, 6, 8, 2377 9 2378 }; 2379 static const UChar UTF8ToUnicodeXML_DEC[]={ 2380 0x0020, 0x0064, 0x0050, 2381 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ 2382 0x0020, 2383 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, 2384 0x0040 2385 }; 2386 static const int32_t fromUTF8XML_DEC[] = { 2387 0, 1, 2, 2388 3, 3, 3, 3, 3, 3, 4, 2389 5, 2390 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 2391 9 2392 }; 2393 2394 2395#if !UCONFIG_NO_LEGACY_CONVERSION 2396 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), 2397 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 2398 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) 2399 log_err("ibm-943->u with substitute with value did not match.\n"); 2400 2401 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), 2402 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp", 2403 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) 2404 log_err("euc-jp->u with substitute with value did not match.\n"); 2405 2406 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 2407 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 2408 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) 2409 log_err("euc-tw->u with substitute with value did not match.\n"); 2410 2411 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2412 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2413 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) 2414 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2415 2416 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2417 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2418 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) 2419 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2420 2421 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ 2422 { 2423 static const UChar iso_2022_jptoUnicodeDec[]={ 2424 0x0041, 2425 0x0026, 0x0023, 0x0034, 0x0032, 0x003b, 2426 0x0026, 0x0023, 0x0036, 0x0038, 0x003b, 2427 0x0042 }; 2428 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; 2429 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2430 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2431 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2432 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); 2433 } 2434 { 2435 static const UChar iso_2022_jptoUnicodeHex[]={ 2436 0x0041, 2437 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b, 2438 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b, 2439 0x0042 }; 2440 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; 2441 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2442 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2443 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) 2444 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); 2445 } 2446 { 2447 static const UChar iso_2022_jptoUnicodeC[]={ 2448 0x0041, 2449 0x005C, 0x0078, 0x0032, 0x0041, 2450 0x005C, 0x0078, 0x0034, 0x0034, 2451 0x0042 }; 2452 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; 2453 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2454 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2455 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2456 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); 2457 } 2458 } 2459 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 2460 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 2461 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) 2462 log_err("iso-2022-cn->u with substitute with value did not match.\n"); 2463 2464 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 2465 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 2466 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) 2467 log_err("iso-2022-kr->u with substitute with value did not match.\n"); 2468 2469 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 2470 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 2471 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) 2472 log_err("hz->u with substitute with value did not match.\n"); 2473 2474 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 2475 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 2476 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) 2477 log_err("ISCII ->u with substitute with value did not match.\n"); 2478#endif 2479 2480 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2481 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", 2482 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) 2483 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2484 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2485 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", 2486 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) 2487 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2488 } 2489} 2490 2491#if !UCONFIG_NO_LEGACY_CONVERSION 2492static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) 2493{ 2494 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; 2495 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 2496 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; 2497 2498 2499 static const uint8_t text943[] = { 2500 0x82, 0xa9, 0x82, 0x20, /*0xc8,*/ 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; 2501 static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; 2502 static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; 2503 static const UChar toUnicode943stop[]= { 0x304b}; 2504 2505 static const int32_t fromIBM943Offssub[] = {0, 2, 4, 5, 7}; 2506 static const int32_t fromIBM943Offsskip[] = { 0, 4, 5, 7}; 2507 static const int32_t fromIBM943Offsstop[] = { 0}; 2508 2509 gInBufferSize = inputsize; 2510 gOutBufferSize = outputsize; 2511 /*checking with a legal value*/ 2512 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), 2513 templegal949, sizeof(templegal949), "ibm-949", 2514 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) 2515 log_err("u-> ibm-949 with skip did not match.\n"); 2516 2517 /*checking illegal value for ibm-943 with substitute*/ 2518 if(!testConvertToUnicode(text943, sizeof(text943), 2519 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2520 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2521 log_err("ibm-943->u with subst did not match.\n"); 2522 /*checking illegal value for ibm-943 with skip */ 2523 if(!testConvertToUnicode(text943, sizeof(text943), 2524 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", 2525 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) 2526 log_err("ibm-943->u with skip did not match.\n"); 2527 2528 /*checking illegal value for ibm-943 with stop */ 2529 if(!testConvertToUnicode(text943, sizeof(text943), 2530 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", 2531 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) 2532 log_err("ibm-943->u with stop did not match.\n"); 2533 2534} 2535 2536static void TestSingleByte(int32_t inputsize, int32_t outputsize) 2537{ 2538 static const uint8_t sampleText[] = { 2539 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, 2540 0xff, /*0x82, 0xa9,*/ 0x32, 0x33}; 2541 static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033}; 2542 static const int32_t fromIBM943Offssub[] = {0, 2, 3, 4, 5, 7, 8}; 2543 /*checking illegal value for ibm-943 with substitute*/ 2544 gInBufferSize = inputsize; 2545 gOutBufferSize = outputsize; 2546 2547 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 2548 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2549 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2550 log_err("ibm-943->u with subst did not match.\n"); 2551} 2552 2553static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) 2554{ 2555 /*EBCDIC_STATEFUL*/ 2556 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; 2557 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; 2558 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; 2559/* s SO doubl SI sng s SO fe fe SI s */ 2560 2561 /*EBCDIC_STATEFUL with subChar=3f*/ 2562 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; 2563 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; 2564 static const char mySubChar[]={ 0x3f}; 2565 2566 gInBufferSize = inputsize; 2567 gOutBufferSize = outputsize; 2568 2569 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2570 toIBM930, sizeof(toIBM930), "ibm-930", 2571 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) 2572 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); 2573 2574 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2575 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", 2576 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) 2577 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); 2578} 2579#endif 2580 2581UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2582 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2583 const char *mySubChar, int8_t len) 2584{ 2585 2586 2587 UErrorCode status = U_ZERO_ERROR; 2588 UConverter *conv = 0; 2589 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2590 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2591 const UChar *src; 2592 char *end; 2593 char *targ; 2594 int32_t *offs; 2595 int i; 2596 int32_t realBufferSize; 2597 char *realBufferEnd; 2598 const UChar *realSourceEnd; 2599 const UChar *sourceLimit; 2600 UBool checkOffsets = TRUE; 2601 UBool doFlush; 2602 char junk[9999]; 2603 char offset_str[9999]; 2604 char *p; 2605 UConverterFromUCallback oldAction = NULL; 2606 const void* oldContext = NULL; 2607 2608 2609 for(i=0;i<NEW_MAX_BUFFER;i++) 2610 junkout[i] = (char)0xF0; 2611 for(i=0;i<NEW_MAX_BUFFER;i++) 2612 junokout[i] = 0xFF; 2613 setNuConvTestName(codepage, "FROM"); 2614 2615 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 2616 gOutBufferSize); 2617 2618 conv = ucnv_open(codepage, &status); 2619 if(U_FAILURE(status)) 2620 { 2621 log_data_err("Couldn't open converter %s\n",codepage); 2622 return TRUE; 2623 } 2624 2625 log_verbose("Converter opened..\n"); 2626 2627 /*----setting the callback routine----*/ 2628 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2629 if (U_FAILURE(status)) 2630 { 2631 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2632 } 2633 /*------------------------*/ 2634 /*setting the subChar*/ 2635 if(mySubChar != NULL){ 2636 ucnv_setSubstChars(conv, mySubChar, len, &status); 2637 if (U_FAILURE(status)) { 2638 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2639 } 2640 } 2641 /*------------*/ 2642 2643 src = source; 2644 targ = junkout; 2645 offs = junokout; 2646 2647 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2648 realBufferEnd = junkout + realBufferSize; 2649 realSourceEnd = source + sourceLen; 2650 2651 if ( gOutBufferSize != realBufferSize ) 2652 checkOffsets = FALSE; 2653 2654 if( gInBufferSize != NEW_MAX_BUFFER ) 2655 checkOffsets = FALSE; 2656 2657 do 2658 { 2659 end = nct_min(targ + gOutBufferSize, realBufferEnd); 2660 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 2661 2662 doFlush = (UBool)(sourceLimit == realSourceEnd); 2663 2664 if(targ == realBufferEnd) 2665 { 2666 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 2667 return FALSE; 2668 } 2669 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 2670 2671 2672 status = U_ZERO_ERROR; 2673 2674 ucnv_fromUnicode (conv, 2675 (char **)&targ, 2676 (const char *)end, 2677 &src, 2678 sourceLimit, 2679 checkOffsets ? offs : NULL, 2680 doFlush, /* flush if we're at the end of the input data */ 2681 &status); 2682 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 2683 2684 2685 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2686 UChar errChars[50]; /* should be sufficient */ 2687 int8_t errLen = 50; 2688 UErrorCode err = U_ZERO_ERROR; 2689 const UChar* limit= NULL; 2690 const UChar* start= NULL; 2691 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); 2692 if(U_FAILURE(err)){ 2693 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); 2694 } 2695 /* src points to limit of invalid chars */ 2696 limit = src; 2697 /* length of in invalid chars should be equal to returned length*/ 2698 start = src - errLen; 2699 if(u_strncmp(errChars,start,errLen)!=0){ 2700 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2701 } 2702 } 2703 /* allow failure codes for the stop callback */ 2704 if(U_FAILURE(status) && 2705 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) 2706 { 2707 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2708 return FALSE; 2709 } 2710 2711 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 2712 sourceLen, targ-junkout); 2713 if(VERBOSITY) 2714 { 2715 2716 junk[0] = 0; 2717 offset_str[0] = 0; 2718 for(p = junkout;p<targ;p++) 2719 { 2720 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 2721 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 2722 } 2723 2724 log_verbose(junk); 2725 printSeq(expect, expectLen); 2726 if ( checkOffsets ) 2727 { 2728 log_verbose("\nOffsets:"); 2729 log_verbose(offset_str); 2730 } 2731 log_verbose("\n"); 2732 } 2733 ucnv_close(conv); 2734 2735 2736 if(expectLen != targ-junkout) 2737 { 2738 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2739 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2740 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2741 printSeqErr(expect, expectLen); 2742 return FALSE; 2743 } 2744 2745 if (checkOffsets && (expectOffsets != 0) ) 2746 { 2747 log_verbose("comparing %d offsets..\n", targ-junkout); 2748 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 2749 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2750 log_err("Got Output : "); 2751 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2752 log_err("Got Offsets: "); 2753 for(p=junkout;p<targ;p++) 2754 log_err("%d,", junokout[p-junkout]); 2755 log_err("\n"); 2756 log_err("Expected Offsets: "); 2757 for(i=0; i<(targ-junkout); i++) 2758 log_err("%d,", expectOffsets[i]); 2759 log_err("\n"); 2760 return FALSE; 2761 } 2762 } 2763 2764 if(!memcmp(junkout, expect, expectLen)) 2765 { 2766 log_verbose("String matches! %s\n", gNuConvTestName); 2767 return TRUE; 2768 } 2769 else 2770 { 2771 log_err("String does not match. %s\n", gNuConvTestName); 2772 log_err("source: "); 2773 printUSeqErr(source, sourceLen); 2774 log_err("Got: "); 2775 printSeqErr((const uint8_t *)junkout, expectLen); 2776 log_err("Expected: "); 2777 printSeqErr(expect, expectLen); 2778 return FALSE; 2779 } 2780} 2781 2782UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 2783 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 2784 const char *mySubChar, int8_t len) 2785{ 2786 UErrorCode status = U_ZERO_ERROR; 2787 UConverter *conv = 0; 2788 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 2789 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2790 const char *src; 2791 const char *realSourceEnd; 2792 const char *srcLimit; 2793 UChar *targ; 2794 UChar *end; 2795 int32_t *offs; 2796 int i; 2797 UBool checkOffsets = TRUE; 2798 char junk[9999]; 2799 char offset_str[9999]; 2800 UChar *p; 2801 UConverterToUCallback oldAction = NULL; 2802 const void* oldContext = NULL; 2803 2804 int32_t realBufferSize; 2805 UChar *realBufferEnd; 2806 2807 2808 for(i=0;i<NEW_MAX_BUFFER;i++) 2809 junkout[i] = 0xFFFE; 2810 2811 for(i=0;i<NEW_MAX_BUFFER;i++) 2812 junokout[i] = -1; 2813 2814 setNuConvTestName(codepage, "TO"); 2815 2816 log_verbose("\n========= %s\n", gNuConvTestName); 2817 2818 conv = ucnv_open(codepage, &status); 2819 if(U_FAILURE(status)) 2820 { 2821 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 2822 return TRUE; 2823 } 2824 2825 log_verbose("Converter opened..\n"); 2826 2827 src = (const char *)source; 2828 targ = junkout; 2829 offs = junokout; 2830 2831 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2832 realBufferEnd = junkout + realBufferSize; 2833 realSourceEnd = src + sourcelen; 2834 /*----setting the callback routine----*/ 2835 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2836 if (U_FAILURE(status)) 2837 { 2838 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2839 } 2840 /*-------------------------------------*/ 2841 /*setting the subChar*/ 2842 if(mySubChar != NULL){ 2843 ucnv_setSubstChars(conv, mySubChar, len, &status); 2844 if (U_FAILURE(status)) { 2845 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2846 } 2847 } 2848 /*------------*/ 2849 2850 2851 if ( gOutBufferSize != realBufferSize ) 2852 checkOffsets = FALSE; 2853 2854 if( gInBufferSize != NEW_MAX_BUFFER ) 2855 checkOffsets = FALSE; 2856 2857 do 2858 { 2859 end = nct_min( targ + gOutBufferSize, realBufferEnd); 2860 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 2861 2862 if(targ == realBufferEnd) 2863 { 2864 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 2865 return FALSE; 2866 } 2867 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 2868 2869 2870 2871 status = U_ZERO_ERROR; 2872 2873 ucnv_toUnicode (conv, 2874 &targ, 2875 end, 2876 (const char **)&src, 2877 (const char *)srcLimit, 2878 checkOffsets ? offs : NULL, 2879 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 2880 &status); 2881 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 2882 2883 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2884 char errChars[50]; /* should be sufficient */ 2885 int8_t errLen = 50; 2886 UErrorCode err = U_ZERO_ERROR; 2887 const char* limit= NULL; 2888 const char* start= NULL; 2889 ucnv_getInvalidChars(conv,errChars, &errLen, &err); 2890 if(U_FAILURE(err)){ 2891 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); 2892 } 2893 /* src points to limit of invalid chars */ 2894 limit = src; 2895 /* length of in invalid chars should be equal to returned length*/ 2896 start = src - errLen; 2897 if(uprv_strncmp(errChars,start,errLen)!=0){ 2898 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2899 } 2900 } 2901 /* allow failure codes for the stop callback */ 2902 if(U_FAILURE(status) && 2903 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) 2904 { 2905 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2906 return FALSE; 2907 } 2908 2909 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 2910 sourcelen, targ-junkout); 2911 if(VERBOSITY) 2912 { 2913 2914 junk[0] = 0; 2915 offset_str[0] = 0; 2916 2917 for(p = junkout;p<targ;p++) 2918 { 2919 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 2920 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 2921 } 2922 2923 log_verbose(junk); 2924 printUSeq(expect, expectlen); 2925 if ( checkOffsets ) 2926 { 2927 log_verbose("\nOffsets:"); 2928 log_verbose(offset_str); 2929 } 2930 log_verbose("\n"); 2931 } 2932 ucnv_close(conv); 2933 2934 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 2935 2936 if (checkOffsets && (expectOffsets != 0)) 2937 { 2938 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 2939 { 2940 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2941 log_err("Got offsets: "); 2942 for(p=junkout;p<targ;p++) 2943 log_err(" %2d,", junokout[p-junkout]); 2944 log_err("\n"); 2945 log_err("Expected offsets: "); 2946 for(i=0; i<(targ-junkout); i++) 2947 log_err(" %2d,", expectOffsets[i]); 2948 log_err("\n"); 2949 log_err("Got output: "); 2950 for(i=0; i<(targ-junkout); i++) 2951 log_err("0x%04x,", junkout[i]); 2952 log_err("\n"); 2953 log_err("From source: "); 2954 for(i=0; i<(src-(const char *)source); i++) 2955 log_err(" 0x%02x,", (unsigned char)source[i]); 2956 log_err("\n"); 2957 } 2958 } 2959 2960 if(!memcmp(junkout, expect, expectlen*2)) 2961 { 2962 log_verbose("Matches!\n"); 2963 return TRUE; 2964 } 2965 else 2966 { 2967 log_err("String does not match. %s\n", gNuConvTestName); 2968 log_verbose("String does not match. %s\n", gNuConvTestName); 2969 log_err("Got: "); 2970 printUSeqErr(junkout, expectlen); 2971 log_err("Expected: "); 2972 printUSeqErr(expect, expectlen); 2973 log_err("\n"); 2974 return FALSE; 2975 } 2976} 2977 2978UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2979 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2980 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 2981{ 2982 2983 2984 UErrorCode status = U_ZERO_ERROR; 2985 UConverter *conv = 0; 2986 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2987 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2988 const UChar *src; 2989 char *end; 2990 char *targ; 2991 int32_t *offs; 2992 int i; 2993 int32_t realBufferSize; 2994 char *realBufferEnd; 2995 const UChar *realSourceEnd; 2996 const UChar *sourceLimit; 2997 UBool checkOffsets = TRUE; 2998 UBool doFlush; 2999 char junk[9999]; 3000 char offset_str[9999]; 3001 char *p; 3002 UConverterFromUCallback oldAction = NULL; 3003 const void* oldContext = NULL; 3004 3005 3006 for(i=0;i<NEW_MAX_BUFFER;i++) 3007 junkout[i] = (char)0xF0; 3008 for(i=0;i<NEW_MAX_BUFFER;i++) 3009 junokout[i] = 0xFF; 3010 setNuConvTestName(codepage, "FROM"); 3011 3012 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 3013 gOutBufferSize); 3014 3015 conv = ucnv_open(codepage, &status); 3016 if(U_FAILURE(status)) 3017 { 3018 log_data_err("Couldn't open converter %s\n",codepage); 3019 return TRUE; /* Because the err has already been logged. */ 3020 } 3021 3022 log_verbose("Converter opened..\n"); 3023 3024 /*----setting the callback routine----*/ 3025 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3026 if (U_FAILURE(status)) 3027 { 3028 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3029 } 3030 /*------------------------*/ 3031 /*setting the subChar*/ 3032 if(mySubChar != NULL){ 3033 ucnv_setSubstChars(conv, mySubChar, len, &status); 3034 if (U_FAILURE(status)) { 3035 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); 3036 } 3037 } 3038 /*------------*/ 3039 3040 src = source; 3041 targ = junkout; 3042 offs = junokout; 3043 3044 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3045 realBufferEnd = junkout + realBufferSize; 3046 realSourceEnd = source + sourceLen; 3047 3048 if ( gOutBufferSize != realBufferSize ) 3049 checkOffsets = FALSE; 3050 3051 if( gInBufferSize != NEW_MAX_BUFFER ) 3052 checkOffsets = FALSE; 3053 3054 do 3055 { 3056 end = nct_min(targ + gOutBufferSize, realBufferEnd); 3057 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 3058 3059 doFlush = (UBool)(sourceLimit == realSourceEnd); 3060 3061 if(targ == realBufferEnd) 3062 { 3063 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 3064 return FALSE; 3065 } 3066 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 3067 3068 3069 status = U_ZERO_ERROR; 3070 3071 ucnv_fromUnicode (conv, 3072 (char **)&targ, 3073 (const char *)end, 3074 &src, 3075 sourceLimit, 3076 checkOffsets ? offs : NULL, 3077 doFlush, /* flush if we're at the end of the input data */ 3078 &status); 3079 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 3080 3081 /* allow failure codes for the stop callback */ 3082 if(U_FAILURE(status) && status != expectedError) 3083 { 3084 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3085 return FALSE; 3086 } 3087 3088 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 3089 sourceLen, targ-junkout); 3090 if(VERBOSITY) 3091 { 3092 3093 junk[0] = 0; 3094 offset_str[0] = 0; 3095 for(p = junkout;p<targ;p++) 3096 { 3097 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 3098 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 3099 } 3100 3101 log_verbose(junk); 3102 printSeq(expect, expectLen); 3103 if ( checkOffsets ) 3104 { 3105 log_verbose("\nOffsets:"); 3106 log_verbose(offset_str); 3107 } 3108 log_verbose("\n"); 3109 } 3110 ucnv_close(conv); 3111 3112 3113 if(expectLen != targ-junkout) 3114 { 3115 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3116 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3117 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3118 printSeqErr(expect, expectLen); 3119 return FALSE; 3120 } 3121 3122 if (checkOffsets && (expectOffsets != 0) ) 3123 { 3124 log_verbose("comparing %d offsets..\n", targ-junkout); 3125 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 3126 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3127 log_err("Got Output : "); 3128 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3129 log_err("Got Offsets: "); 3130 for(p=junkout;p<targ;p++) 3131 log_err("%d,", junokout[p-junkout]); 3132 log_err("\n"); 3133 log_err("Expected Offsets: "); 3134 for(i=0; i<(targ-junkout); i++) 3135 log_err("%d,", expectOffsets[i]); 3136 log_err("\n"); 3137 return FALSE; 3138 } 3139 } 3140 3141 if(!memcmp(junkout, expect, expectLen)) 3142 { 3143 log_verbose("String matches! %s\n", gNuConvTestName); 3144 return TRUE; 3145 } 3146 else 3147 { 3148 log_err("String does not match. %s\n", gNuConvTestName); 3149 log_err("source: "); 3150 printUSeqErr(source, sourceLen); 3151 log_err("Got: "); 3152 printSeqErr((const uint8_t *)junkout, expectLen); 3153 log_err("Expected: "); 3154 printSeqErr(expect, expectLen); 3155 return FALSE; 3156 } 3157} 3158UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 3159 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 3160 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3161{ 3162 UErrorCode status = U_ZERO_ERROR; 3163 UConverter *conv = 0; 3164 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 3165 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3166 const char *src; 3167 const char *realSourceEnd; 3168 const char *srcLimit; 3169 UChar *targ; 3170 UChar *end; 3171 int32_t *offs; 3172 int i; 3173 UBool checkOffsets = TRUE; 3174 char junk[9999]; 3175 char offset_str[9999]; 3176 UChar *p; 3177 UConverterToUCallback oldAction = NULL; 3178 const void* oldContext = NULL; 3179 3180 int32_t realBufferSize; 3181 UChar *realBufferEnd; 3182 3183 3184 for(i=0;i<NEW_MAX_BUFFER;i++) 3185 junkout[i] = 0xFFFE; 3186 3187 for(i=0;i<NEW_MAX_BUFFER;i++) 3188 junokout[i] = -1; 3189 3190 setNuConvTestName(codepage, "TO"); 3191 3192 log_verbose("\n========= %s\n", gNuConvTestName); 3193 3194 conv = ucnv_open(codepage, &status); 3195 if(U_FAILURE(status)) 3196 { 3197 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 3198 return TRUE; 3199 } 3200 3201 log_verbose("Converter opened..\n"); 3202 3203 src = (const char *)source; 3204 targ = junkout; 3205 offs = junokout; 3206 3207 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3208 realBufferEnd = junkout + realBufferSize; 3209 realSourceEnd = src + sourcelen; 3210 /*----setting the callback routine----*/ 3211 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3212 if (U_FAILURE(status)) 3213 { 3214 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3215 } 3216 /*-------------------------------------*/ 3217 /*setting the subChar*/ 3218 if(mySubChar != NULL){ 3219 ucnv_setSubstChars(conv, mySubChar, len, &status); 3220 if (U_FAILURE(status)) { 3221 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3222 } 3223 } 3224 /*------------*/ 3225 3226 3227 if ( gOutBufferSize != realBufferSize ) 3228 checkOffsets = FALSE; 3229 3230 if( gInBufferSize != NEW_MAX_BUFFER ) 3231 checkOffsets = FALSE; 3232 3233 do 3234 { 3235 end = nct_min( targ + gOutBufferSize, realBufferEnd); 3236 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 3237 3238 if(targ == realBufferEnd) 3239 { 3240 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 3241 return FALSE; 3242 } 3243 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 3244 3245 3246 3247 status = U_ZERO_ERROR; 3248 3249 ucnv_toUnicode (conv, 3250 &targ, 3251 end, 3252 (const char **)&src, 3253 (const char *)srcLimit, 3254 checkOffsets ? offs : NULL, 3255 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 3256 &status); 3257 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 3258 3259 /* allow failure codes for the stop callback */ 3260 if(U_FAILURE(status) && status!=expectedError) 3261 { 3262 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3263 return FALSE; 3264 } 3265 3266 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 3267 sourcelen, targ-junkout); 3268 if(VERBOSITY) 3269 { 3270 3271 junk[0] = 0; 3272 offset_str[0] = 0; 3273 3274 for(p = junkout;p<targ;p++) 3275 { 3276 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 3277 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 3278 } 3279 3280 log_verbose(junk); 3281 printUSeq(expect, expectlen); 3282 if ( checkOffsets ) 3283 { 3284 log_verbose("\nOffsets:"); 3285 log_verbose(offset_str); 3286 } 3287 log_verbose("\n"); 3288 } 3289 ucnv_close(conv); 3290 3291 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3292 3293 if (checkOffsets && (expectOffsets != 0)) 3294 { 3295 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3296 { 3297 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3298 log_err("Got offsets: "); 3299 for(p=junkout;p<targ;p++) 3300 log_err(" %2d,", junokout[p-junkout]); 3301 log_err("\n"); 3302 log_err("Expected offsets: "); 3303 for(i=0; i<(targ-junkout); i++) 3304 log_err(" %2d,", expectOffsets[i]); 3305 log_err("\n"); 3306 log_err("Got output: "); 3307 for(i=0; i<(targ-junkout); i++) 3308 log_err("0x%04x,", junkout[i]); 3309 log_err("\n"); 3310 log_err("From source: "); 3311 for(i=0; i<(src-(const char *)source); i++) 3312 log_err(" 0x%02x,", (unsigned char)source[i]); 3313 log_err("\n"); 3314 } 3315 } 3316 3317 if(!memcmp(junkout, expect, expectlen*2)) 3318 { 3319 log_verbose("Matches!\n"); 3320 return TRUE; 3321 } 3322 else 3323 { 3324 log_err("String does not match. %s\n", gNuConvTestName); 3325 log_verbose("String does not match. %s\n", gNuConvTestName); 3326 log_err("Got: "); 3327 printUSeqErr(junkout, expectlen); 3328 log_err("Expected: "); 3329 printUSeqErr(expect, expectlen); 3330 log_err("\n"); 3331 return FALSE; 3332 } 3333} 3334 3335static void TestCallBackFailure(void) { 3336 UErrorCode status = U_USELESS_COLLATOR_ERROR; 3337 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); 3338 if (status != U_USELESS_COLLATOR_ERROR) { 3339 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); 3340 } 3341 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); 3342 if (status != U_USELESS_COLLATOR_ERROR) { 3343 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); 3344 } 3345 ucnv_cbFromUWriteSub(NULL, -1, &status); 3346 if (status != U_USELESS_COLLATOR_ERROR) { 3347 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); 3348 } 3349 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); 3350 if (status != U_USELESS_COLLATOR_ERROR) { 3351 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); 3352 } 3353} 3354 3355