1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/***************************************************************************** 7* 8* File CCONVTST.C 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 13****************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include "unicode/uloc.h" 19#include "unicode/ucnv.h" 20#include "unicode/utypes.h" 21#include "unicode/ustring.h" 22#include "unicode/uset.h" 23#include "cintltst.h" 24 25#define MAX_LENGTH 999 26 27#define UNICODE_LIMIT 0x10FFFF 28#define SURROGATE_HIGH_START 0xD800 29#define SURROGATE_LOW_END 0xDFFF 30 31static int32_t gInBufferSize = 0; 32static int32_t gOutBufferSize = 0; 33static char gNuConvTestName[1024]; 34 35#define nct_min(x,y) ((x<y) ? x : y) 36#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 37 38static void printSeq(const unsigned char* a, int len); 39static void printSeqErr(const unsigned char* a, int len); 40static void printUSeq(const UChar* a, int len); 41static void printUSeqErr(const UChar* a, int len); 42static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 43 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 44static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 45 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 46 47static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 48 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 49static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 50 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 51 52static void setNuConvTestName(const char *codepage, const char *direction) 53{ 54 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 55 codepage, 56 direction, 57 (int)gInBufferSize, 58 (int)gOutBufferSize); 59} 60 61 62static void TestSurrogateBehaviour(void); 63static void TestErrorBehaviour(void); 64 65#if !UCONFIG_NO_LEGACY_CONVERSION 66static void TestToUnicodeErrorBehaviour(void); 67static void TestGetNextErrorBehaviour(void); 68#endif 69 70static void TestRegressionUTF8(void); 71static void TestRegressionUTF32(void); 72static void TestAvailableConverters(void); 73static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 74static void TestResetBehaviour(void); 75static void TestTruncated(void); 76static void TestUnicodeSet(void); 77 78static void TestWithBufferSize(int32_t osize, int32_t isize); 79 80 81static void printSeq(const unsigned char* a, int len) 82{ 83 int i=0; 84 log_verbose("\n{"); 85 while (i<len) 86 log_verbose("0x%02X ", a[i++]); 87 log_verbose("}\n"); 88} 89 90static void printUSeq(const UChar* a, int len) 91{ 92 int i=0; 93 log_verbose("\n{"); 94 while (i<len) 95 log_verbose("%0x04X ", a[i++]); 96 log_verbose("}\n"); 97} 98 99static void printSeqErr(const unsigned char* a, int len) 100{ 101 int i=0; 102 fprintf(stderr, "\n{"); 103 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 104 fprintf(stderr, "}\n"); 105} 106 107static void printUSeqErr(const UChar* a, int len) 108{ 109 int i=0; 110 fprintf(stderr, "\n{"); 111 while (i<len) 112 fprintf(stderr, "0x%04X ", a[i++]); 113 fprintf(stderr,"}\n"); 114} 115 116void addExtraTests(TestNode** root); 117 118void addExtraTests(TestNode** root) 119{ 120 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 121 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 122 123#if !UCONFIG_NO_LEGACY_CONVERSION 124 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 125 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 126#endif 127 128 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 129 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 130 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 131 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 132 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 133 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 134 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 135} 136 137/*test surrogate behaviour*/ 138static void TestSurrogateBehaviour(){ 139 log_verbose("Testing for SBCS and LATIN_1\n"); 140 { 141 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 142 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 143 144#if !UCONFIG_NO_LEGACY_CONVERSION 145 /*SBCS*/ 146 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 147 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 148 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 149#endif 150 151 /*LATIN_1*/ 152 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 153 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 154 log_err("u-> LATIN_1 not match.\n"); 155 156 } 157 158#if !UCONFIG_NO_LEGACY_CONVERSION 159 log_verbose("Testing for DBCS and MBCS\n"); 160 { 161 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 162 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 163 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 164 165 /*DBCS*/ 166 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 167 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 168 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 169 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 170 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 171 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 172 /*MBCS*/ 173 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 174 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 175 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 176 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 177 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 178 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 179 } 180 181 log_verbose("Testing for ISO-2022-jp\n"); 182 { 183 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 184 185 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 186 0x31,0x1A, 0x32}; 187 188 189 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 190 191 /*iso-2022-jp*/ 192 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 193 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 194 log_err("u-> not match.\n"); 195 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 196 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 197 log_err("u-> not match.\n"); 198 } 199 200 log_verbose("Testing for ISO-2022-cn\n"); 201 { 202 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 203 204 static const uint8_t expected[] = { 205 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 206 0x36, 0x21, 207 0x0F, 0x31, 208 0x1A, 209 0x32 210 }; 211 212 213 214 static const int32_t offsets[] = { 215 0, 0, 0, 0, 0, 0, 0, 216 1, 1, 217 2, 2, 218 3, 219 5, }; 220 221 /*iso-2022-CN*/ 222 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 223 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 224 log_err("u-> not match.\n"); 225 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 226 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 227 log_err("u-> not match.\n"); 228 } 229 230 log_verbose("Testing for ISO-2022-kr\n"); 231 { 232 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 233 234 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 235 0x0E, 0x6C, 0x69, 236 0x0f, 0x1A, 237 0x0e, 0x6F, 0x4B, 238 0x0F, 0x31, 239 0x1A, 240 0x32 }; 241 242 static const int32_t offsets[] = {-1, -1, -1, -1, 243 0, 0, 0, 244 1, 1, 245 3, 3, 3, 246 4, 4, 247 5, 248 7, 249 }; 250 251 /*iso-2022-kr*/ 252 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 253 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 254 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 255 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 256 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 257 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 258 } 259 260 log_verbose("Testing for HZ\n"); 261 { 262 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 263 264 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 265 0x7E, 0x7D, 0x1A, 266 0x7E, 0x7B, 0x36, 0x21, 267 0x7E, 0x7D, 0x31, 268 0x1A, 269 0x32 }; 270 271 272 static const int32_t offsets[] = {0,0,0,0, 273 1,1,1, 274 3,3,3,3, 275 4,4,4, 276 5, 277 7,}; 278 279 /*hz*/ 280 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 281 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 282 log_err("u-> HZ not match.\n"); 283 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 284 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 285 log_err("u-> HZ not match.\n"); 286 } 287#endif 288 289 /*UTF-8*/ 290 log_verbose("Testing for UTF8\n"); 291 { 292 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 293 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 294 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 295 0x04, 0x06 }; 296 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 297 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 298 299 300 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 301 /*UTF-8*/ 302 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 303 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 304 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 305 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 306 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 307 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 308 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 309 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 310 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 311 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 312 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 313 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 314 315 if(!convertToU(expected, sizeof(expected), 316 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR )) 317 log_err("UTF8 -> u did not match.\n"); 318 if(!convertToU(expected, sizeof(expected), 319 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR )) 320 log_err("UTF8 -> u did not match.\n"); 321 if(!convertToU(expected, sizeof(expected), 322 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 323 log_err("UTF8 ->u did not match.\n"); 324 if(!convertToU(expected, sizeof(expected), 325 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 326 log_err("UTF8 -> u did not match.\n"); 327 328 } 329} 330 331/*test various error behaviours*/ 332static void TestErrorBehaviour(){ 333 log_verbose("Testing for SBCS and LATIN_1\n"); 334 { 335 static const UChar sampleText[] = { 0x0031, 0xd801}; 336 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 337 static const uint8_t expected0[] = { 0x31}; 338 static const uint8_t expected[] = { 0x31, 0x1a}; 339 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 340 341#if !UCONFIG_NO_LEGACY_CONVERSION 342 /*SBCS*/ 343 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 344 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 345 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 346 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 347 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 348 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 349 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 350 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 351 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 352#endif 353 354 /*LATIN_1*/ 355 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 356 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 357 log_err("u-> LATIN_1 is supposed to fail\n"); 358 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 359 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 360 log_err("u-> LATIN_1 is supposed to fail\n"); 361 362 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 363 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 364 log_err("u-> LATIN_1 did not match\n"); 365 } 366 367#if !UCONFIG_NO_LEGACY_CONVERSION 368 log_verbose("Testing for DBCS and MBCS\n"); 369 { 370 static const UChar sampleText[] = { 0x00a1, 0xd801}; 371 static const uint8_t expected[] = { 0xa2, 0xae}; 372 static const int32_t offsets[] = { 0x00, 0x00}; 373 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 374 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 375 376 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 377 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 378 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 379 380 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 381 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 382 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 383 384 static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; 385 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; 386 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 387 388 /*DBCS*/ 389 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 390 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 391 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 392 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 393 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 394 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 395 396 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 397 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 398 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 399 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 400 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 401 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 402 403 404 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 405 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 406 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 407 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 408 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 409 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 410 411 /*MBCS*/ 412 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 413 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 414 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 415 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 416 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 417 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 418 419 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 420 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 421 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 422 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 423 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 424 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 425 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 426 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 427 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 428 429 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 430 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 431 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 432 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 433 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 434 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 435 436 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 437 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR)) 438 log_err("u-> euc-jp [UCNV_MBCS] \n"); 439 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 440 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR)) 441 log_err("u-> euc-jp [UCNV_MBCS] \n"); 442 } 443 444 /*iso-2022-jp*/ 445 log_verbose("Testing for iso-2022-jp\n"); 446 { 447 static const UChar sampleText[] = { 0x0031, 0xd801}; 448 static const uint8_t expected[] = { 0x31}; 449 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 450 static const int32_t offsets[] = { 0x00, 1}; 451 452 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 453 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 454 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 455 456 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 457 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 458 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 459 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 460 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 461 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 462 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 463 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 464 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 465 466 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 467 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 468 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 469 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 470 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 471 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 472 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 473 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 474 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 475 476 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 477 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 478 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 479 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 480 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 481 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 482 } 483 484 /*iso-2022-cn*/ 485 log_verbose("Testing for iso-2022-cn\n"); 486 { 487 static const UChar sampleText[] = { 0x0031, 0xd801}; 488 static const uint8_t expected[] = { 0x31}; 489 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 490 static const int32_t offsets[] = { 0x00, 1}; 491 492 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 493 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 494 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 495 496 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 497 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 498 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 499 500 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 501 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 502 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 503 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 504 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 505 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 506 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 507 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 508 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 509 510 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 511 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 512 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 513 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 514 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 515 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 516 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 517 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 518 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 519 520 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 521 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 522 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 523 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 524 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 525 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 526 527 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 528 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 529 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 530 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 531 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 532 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 533 } 534 535 /*iso-2022-kr*/ 536 log_verbose("Testing for iso-2022-kr\n"); 537 { 538 static const UChar sampleText[] = { 0x0031, 0xd801}; 539 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 540 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 541 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 542 543 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 544 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 545 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 546 547 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 548 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 549 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 550 551 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 552 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 553 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 554 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 555 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 556 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 557 558 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 559 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 560 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 561 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 562 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 563 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 564 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 565 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 566 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 567 568 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 569 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 570 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 571 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 572 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 573 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 574 } 575 576 /*HZ*/ 577 log_verbose("Testing for HZ\n"); 578 { 579 static const UChar sampleText[] = { 0x0031, 0xd801}; 580 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 581 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 582 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 583 584 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 585 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 586 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 587 588 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 589 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 590 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 591 592 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 593 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 594 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 595 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 596 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 597 log_err("u-> HZ [UCNV_MBCS] \n"); 598 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 599 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 600 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 601 602 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 603 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 604 log_err("u->HZ[UCNV_DBCS] did not match\n"); 605 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 606 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 607 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 608 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 609 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 610 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 611 612 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 613 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 614 log_err("u->HZ [UCNV_MBCS] \n"); 615 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 616 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 617 log_err("u-> HZ[UCNV_MBCS] \n"); 618 619 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 620 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 621 log_err("u-> HZ [UCNV_MBCS] \n"); 622 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 623 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 624 log_err("u-> HZ [UCNV_MBCS] \n"); 625 } 626#endif 627} 628 629#if !UCONFIG_NO_LEGACY_CONVERSION 630/*test different convertToUnicode error behaviours*/ 631static void TestToUnicodeErrorBehaviour() 632{ 633 log_verbose("Testing error conditions for DBCS\n"); 634 { 635 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 636 const UChar expected[] = { 0x00a1 }; 637 638 if(!convertToU(sampleText, sizeof(sampleText), 639 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING )) 640 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 641 if(!convertToU(sampleText, sizeof(sampleText), 642 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING )) 643 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 644 } 645 log_verbose("Testing error conditions for SBCS\n"); 646 { 647 uint8_t sampleText[] = { 0xa2, 0xFF}; 648 const UChar expected[] = { 0x00c2 }; 649 650 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 651 const UChar expected2[] = { 0x0073 };*/ 652 653 if(!convertToU(sampleText, sizeof(sampleText), 654 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 655 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 656 if(!convertToU(sampleText, sizeof(sampleText), 657 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 658 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 659 660 } 661} 662 663static void TestGetNextErrorBehaviour(){ 664 /*Test for unassigned character*/ 665#define INPUT_SIZE 1 666 static const char input1[INPUT_SIZE]={ 0x70 }; 667 const char* source=(const char*)input1; 668 UErrorCode err=U_ZERO_ERROR; 669 UChar32 c=0; 670 UConverter *cnv=ucnv_open("ibm-424", &err); 671 if(U_FAILURE(err)) { 672 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 673 return; 674 } 675 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 676 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 677 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 678 } 679 ucnv_close(cnv); 680} 681#endif 682 683#define MAX_UTF16_LEN 2 684#define MAX_UTF8_LEN 4 685 686/*Regression test for utf8 converter*/ 687static void TestRegressionUTF8(){ 688 UChar32 currCh = 0; 689 int32_t offset8; 690 int32_t offset16; 691 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 692 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 693 694 while (currCh <= UNICODE_LIMIT) { 695 offset16 = 0; 696 offset8 = 0; 697 while(currCh <= UNICODE_LIMIT 698 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 699 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 700 { 701 if (currCh == SURROGATE_HIGH_START) { 702 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 703 } 704 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 705 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); 706 currCh++; 707 } 708 if(!convertFromU(standardForm, offset16, 709 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 710 log_err("Unicode->UTF8 did not match.\n"); 711 } 712 if(!convertToU(utf8, offset8, 713 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 714 log_err("UTF8->Unicode did not match.\n"); 715 } 716 } 717 718 free(standardForm); 719 free(utf8); 720 721 { 722 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 723 static const UChar expected[] = { 0x0301, 0x0300 }; 724 UConverter *conv8; 725 UErrorCode err = U_ZERO_ERROR; 726 UChar pivotBuffer[100]; 727 const UChar* const pivEnd = pivotBuffer + 100; 728 const char* srcBeg; 729 const char* srcEnd; 730 UChar* pivBeg; 731 732 conv8 = ucnv_open("UTF-8", &err); 733 734 srcBeg = src8; 735 pivBeg = pivotBuffer; 736 srcEnd = src8 + 3; 737 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 738 if (srcBeg != srcEnd) { 739 log_err("Did not consume whole buffer on first call.\n"); 740 } 741 742 srcEnd = src8 + 4; 743 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 744 if (srcBeg != srcEnd) { 745 log_err("Did not consume whole buffer on second call.\n"); 746 } 747 748 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 749 log_err("Did not get expected results for UTF-8.\n"); 750 } 751 ucnv_close(conv8); 752 } 753} 754 755#define MAX_UTF32_LEN 1 756 757static void TestRegressionUTF32(){ 758 UChar32 currCh = 0; 759 int32_t offset32; 760 int32_t offset16; 761 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 762 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 763 764 while (currCh <= UNICODE_LIMIT) { 765 offset16 = 0; 766 offset32 = 0; 767 while(currCh <= UNICODE_LIMIT 768 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 769 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 770 { 771 if (currCh == SURROGATE_HIGH_START) { 772 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 773 } 774 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 775 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); 776 currCh++; 777 } 778 if(!convertFromU(standardForm, offset16, 779 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 780 log_err("Unicode->UTF32 did not match.\n"); 781 } 782 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 783 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 784 log_err("UTF32->Unicode did not match.\n"); 785 } 786 } 787 free(standardForm); 788 free(utf32); 789 790 { 791 /* Check for lone surrogate error handling. */ 792 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 793 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 794 static const uint8_t expectedUTF32BE[] = { 795 0x00, 0x00, 0x00, 0x31, 796 0x00, 0x00, 0xff, 0xfd, 797 0x00, 0x00, 0x00, 0x32 798 }; 799 static const uint8_t expectedUTF32LE[] = { 800 0x31, 0x00, 0x00, 0x00, 801 0xfd, 0xff, 0x00, 0x00, 802 0x32, 0x00, 0x00, 0x00 803 }; 804 static const int32_t offsetsUTF32[] = { 805 0x00, 0x00, 0x00, 0x00, 806 0x01, 0x01, 0x01, 0x01, 807 0x02, 0x02, 0x02, 0x02 808 }; 809 810 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 811 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 812 log_err("u->UTF-32BE\n"); 813 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 814 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 815 log_err("u->UTF-32BE\n"); 816 817 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 818 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 819 log_err("u->UTF-32LE\n"); 820 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 821 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 822 log_err("u->UTF-32LE\n"); 823 } 824 825 { 826 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 827 static const UChar expected[] = { 0x0031, 0x0030 }; 828 UConverter *convBE; 829 UErrorCode err = U_ZERO_ERROR; 830 UChar pivotBuffer[100]; 831 const UChar* const pivEnd = pivotBuffer + 100; 832 const char* srcBeg; 833 const char* srcEnd; 834 UChar* pivBeg; 835 836 convBE = ucnv_open("UTF-32BE", &err); 837 838 srcBeg = srcBE; 839 pivBeg = pivotBuffer; 840 srcEnd = srcBE + 5; 841 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 842 if (srcBeg != srcEnd) { 843 log_err("Did not consume whole buffer on first call.\n"); 844 } 845 846 srcEnd = srcBE + 8; 847 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 848 if (srcBeg != srcEnd) { 849 log_err("Did not consume whole buffer on second call.\n"); 850 } 851 852 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 853 log_err("Did not get expected results for UTF-32BE.\n"); 854 } 855 ucnv_close(convBE); 856 } 857 { 858 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 859 static const UChar expected[] = { 0x0031, 0x0030 }; 860 UConverter *convLE; 861 UErrorCode err = U_ZERO_ERROR; 862 UChar pivotBuffer[100]; 863 const UChar* const pivEnd = pivotBuffer + 100; 864 const char* srcBeg; 865 const char* srcEnd; 866 UChar* pivBeg; 867 868 convLE = ucnv_open("UTF-32LE", &err); 869 870 srcBeg = srcLE; 871 pivBeg = pivotBuffer; 872 srcEnd = srcLE + 5; 873 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 874 if (srcBeg != srcEnd) { 875 log_err("Did not consume whole buffer on first call.\n"); 876 } 877 878 srcEnd = srcLE + 8; 879 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 880 if (srcBeg != srcEnd) { 881 log_err("Did not consume whole buffer on second call.\n"); 882 } 883 884 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 885 log_err("Did not get expected results for UTF-32LE.\n"); 886 } 887 ucnv_close(convLE); 888 } 889} 890 891/*Walk through the available converters*/ 892static void TestAvailableConverters(){ 893 UErrorCode status=U_ZERO_ERROR; 894 UConverter *conv=NULL; 895 int32_t i=0; 896 for(i=0; i < ucnv_countAvailable(); i++){ 897 status=U_ZERO_ERROR; 898 conv=ucnv_open(ucnv_getAvailableName(i), &status); 899 if(U_FAILURE(status)){ 900 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 901 ucnv_getAvailableName(i), myErrorName(status)); 902 continue; 903 } 904 ucnv_close(conv); 905 } 906 907} 908 909static void TestFlushInternalBuffer(){ 910 TestWithBufferSize(MAX_LENGTH, 1); 911 TestWithBufferSize(1, 1); 912 TestWithBufferSize(1, MAX_LENGTH); 913 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 914} 915 916static void TestWithBufferSize(int32_t insize, int32_t outsize){ 917 918 gInBufferSize =insize; 919 gOutBufferSize = outsize; 920 921 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 922 { 923 UChar sampleText[] = 924 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 925 const uint8_t expectedUTF8[] = 926 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 927 int32_t toUTF8Offs[] = 928 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 929 /* int32_t fmUTF8Offs[] = 930 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 931 932 /*UTF-8*/ 933 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 934 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 935 log_err("u-> UTF8 did not match.\n"); 936 } 937 938#if !UCONFIG_NO_LEGACY_CONVERSION 939 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 940 { 941 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 942 const uint8_t toIBM943[]= { 0x61, 943 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 944 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 945 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 946 0x61 }; 947 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 948 949 if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 950 toIBM943, sizeof(toIBM943), "ibm-943", 951 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 952 log_err("u-> ibm-943 with subst with value did not match.\n"); 953 } 954#endif 955 956 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 957 { 958 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 959 0xe0, 0x80, 0x61}; 960 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 961 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 962 963 if(!testConvertToU(sampleText1, sizeof(sampleText1), 964 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 965 log_err("utf8->u with substitute did not match.\n");; 966 } 967 968#if !UCONFIG_NO_LEGACY_CONVERSION 969 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 970 /*to Unicode*/ 971 { 972 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 973 0x81, 0xad, /*unassigned*/ 974 0x89, 0xd3 }; 975 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 976 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 977 0x7B87}; 978 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 979 980 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 981 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 982 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 983 log_err("ibm-943->u with substitute with value did not match.\n"); 984 985 } 986#endif 987} 988 989static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 990 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 991{ 992 993 int32_t i=0; 994 char *p=0; 995 const UChar *src; 996 char buffer[MAX_LENGTH]; 997 int32_t offsetBuffer[MAX_LENGTH]; 998 int32_t *offs=0; 999 char *targ; 1000 char *targetLimit; 1001 UChar *sourceLimit=0; 1002 UErrorCode status = U_ZERO_ERROR; 1003 UConverter *conv = 0; 1004 conv = ucnv_open(codepage, &status); 1005 if(U_FAILURE(status)) 1006 { 1007 log_data_err("Couldn't open converter %s\n",codepage); 1008 return TRUE; 1009 } 1010 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1011 1012 for(i=0; i<MAX_LENGTH; i++){ 1013 buffer[i]=(char)0xF0; 1014 offsetBuffer[i]=0xFF; 1015 } 1016 1017 src=source; 1018 sourceLimit=(UChar*)src+(sourceLen); 1019 targ=buffer; 1020 targetLimit=targ+MAX_LENGTH; 1021 offs=offsetBuffer; 1022 ucnv_fromUnicode (conv, 1023 (char **)&targ, 1024 (const char *)targetLimit, 1025 &src, 1026 sourceLimit, 1027 expectOffsets ? offs : NULL, 1028 doFlush, 1029 &status); 1030 ucnv_close(conv); 1031 if(status != expectedStatus){ 1032 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1033 return FALSE; 1034 } 1035 1036 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1037 sourceLen, targ-buffer); 1038 1039 if(expectLen != targ-buffer) 1040 { 1041 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1042 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1043 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1044 printSeqErr((const unsigned char*)expect, expectLen); 1045 return FALSE; 1046 } 1047 1048 if(memcmp(buffer, expect, expectLen)){ 1049 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1050 log_info("\nGot:"); 1051 printSeqErr((const unsigned char *)buffer, expectLen); 1052 log_info("\nExpected:"); 1053 printSeqErr((const unsigned char *)expect, expectLen); 1054 return FALSE; 1055 } 1056 else { 1057 log_verbose("Matches!\n"); 1058 } 1059 1060 if (expectOffsets != 0){ 1061 log_verbose("comparing %d offsets..\n", targ-buffer); 1062 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1063 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1064 log_info("\nGot : "); 1065 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1066 for(p=buffer;p<targ;p++) 1067 log_info("%d, ", offsetBuffer[p-buffer]); 1068 log_info("\nExpected: "); 1069 for(i=0; i< (targ-buffer); i++) 1070 log_info("%d,", expectOffsets[i]); 1071 } 1072 } 1073 1074 return TRUE; 1075} 1076 1077 1078static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1079 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1080{ 1081 UErrorCode status = U_ZERO_ERROR; 1082 UConverter *conv = 0; 1083 int32_t i=0; 1084 UChar *p=0; 1085 const char* src; 1086 UChar buffer[MAX_LENGTH]; 1087 int32_t offsetBuffer[MAX_LENGTH]; 1088 int32_t *offs=0; 1089 UChar *targ; 1090 UChar *targetLimit; 1091 uint8_t *sourceLimit=0; 1092 1093 1094 1095 conv = ucnv_open(codepage, &status); 1096 if(U_FAILURE(status)) 1097 { 1098 log_data_err("Couldn't open converter %s\n",codepage); 1099 return TRUE; 1100 } 1101 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1102 1103 1104 1105 for(i=0; i<MAX_LENGTH; i++){ 1106 buffer[i]=0xFFFE; 1107 offsetBuffer[i]=-1; 1108 } 1109 1110 src=(const char *)source; 1111 sourceLimit=(uint8_t*)(src+(sourceLen)); 1112 targ=buffer; 1113 targetLimit=targ+MAX_LENGTH; 1114 offs=offsetBuffer; 1115 1116 1117 1118 ucnv_toUnicode (conv, 1119 &targ, 1120 targetLimit, 1121 (const char **)&src, 1122 (const char *)sourceLimit, 1123 expectOffsets ? offs : NULL, 1124 doFlush, 1125 &status); 1126 1127 ucnv_close(conv); 1128 if(status != expectedStatus){ 1129 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1130 return FALSE; 1131 } 1132 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1133 sourceLen, targ-buffer); 1134 1135 1136 1137 1138 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1139 1140 if (expectOffsets != 0) { 1141 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1142 1143 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1144 log_info("\nGot : "); 1145 for(p=buffer;p<targ;p++) 1146 log_info("%d, ", offsetBuffer[p-buffer]); 1147 log_info("\nExpected: "); 1148 for(i=0; i<(targ-buffer); i++) 1149 log_info("%d, ", expectOffsets[i]); 1150 log_info("\nGot result:"); 1151 for(i=0; i<(targ-buffer); i++) 1152 log_info("0x%04X,", buffer[i]); 1153 log_info("\nFrom Input:"); 1154 for(i=0; i<(src-(const char *)source); i++) 1155 log_info("0x%02X,", (unsigned char)source[i]); 1156 log_info("\n"); 1157 } 1158 } 1159 if(memcmp(buffer, expect, expectLen*2)){ 1160 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1161 log_info("\nGot:"); 1162 printUSeqErr(buffer, expectLen); 1163 log_info("\nExpected:"); 1164 printUSeqErr(expect, expectLen); 1165 return FALSE; 1166 } 1167 else { 1168 log_verbose("Matches!\n"); 1169 } 1170 1171 return TRUE; 1172} 1173 1174 1175static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1176 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1177{ 1178 UErrorCode status = U_ZERO_ERROR; 1179 UConverter *conv = 0; 1180 char junkout[MAX_LENGTH]; /* FIX */ 1181 int32_t junokout[MAX_LENGTH]; /* FIX */ 1182 char *p; 1183 const UChar *src; 1184 char *end; 1185 char *targ; 1186 int32_t *offs; 1187 int i; 1188 int32_t realBufferSize; 1189 char *realBufferEnd; 1190 const UChar *realSourceEnd; 1191 const UChar *sourceLimit; 1192 UBool checkOffsets = TRUE; 1193 UBool doFlush; 1194 1195 UConverterFromUCallback oldAction = NULL; 1196 const void* oldContext = NULL; 1197 1198 for(i=0;i<MAX_LENGTH;i++) 1199 junkout[i] = (char)0xF0; 1200 for(i=0;i<MAX_LENGTH;i++) 1201 junokout[i] = 0xFF; 1202 1203 setNuConvTestName(codepage, "FROM"); 1204 1205 log_verbose("\n========= %s\n", gNuConvTestName); 1206 1207 conv = ucnv_open(codepage, &status); 1208 if(U_FAILURE(status)) 1209 { 1210 log_data_err("Couldn't open converter %s\n",codepage); 1211 return TRUE; 1212 } 1213 1214 log_verbose("Converter opened..\n"); 1215 /*----setting the callback routine----*/ 1216 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1217 if (U_FAILURE(status)) { 1218 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1219 } 1220 /*------------------------*/ 1221 1222 src = source; 1223 targ = junkout; 1224 offs = junokout; 1225 1226 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1227 realBufferEnd = junkout + realBufferSize; 1228 realSourceEnd = source + sourceLen; 1229 1230 if ( gOutBufferSize != realBufferSize ) 1231 checkOffsets = FALSE; 1232 1233 if( gInBufferSize != MAX_LENGTH ) 1234 checkOffsets = FALSE; 1235 1236 do 1237 { 1238 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1239 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1240 1241 doFlush = (UBool)(sourceLimit == realSourceEnd); 1242 1243 if(targ == realBufferEnd) 1244 { 1245 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1246 return FALSE; 1247 } 1248 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1249 1250 1251 status = U_ZERO_ERROR; 1252 if(gInBufferSize ==999 && gOutBufferSize==999) 1253 doFlush = FALSE; 1254 ucnv_fromUnicode (conv, 1255 (char **)&targ, 1256 (const char *)end, 1257 &src, 1258 sourceLimit, 1259 offs, 1260 doFlush, /* flush if we're at the end of the input data */ 1261 &status); 1262 if(testReset) 1263 ucnv_resetToUnicode(conv); 1264 if(gInBufferSize ==999 && gOutBufferSize==999) 1265 ucnv_resetToUnicode(conv); 1266 1267 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1268 1269 if(U_FAILURE(status)) { 1270 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1271 return FALSE; 1272 } 1273 1274 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1275 sourceLen, targ-junkout); 1276 if(getTestOption(VERBOSITY_OPTION)) 1277 { 1278 char junk[999]; 1279 char offset_str[999]; 1280 char *ptr; 1281 1282 junk[0] = 0; 1283 offset_str[0] = 0; 1284 for(ptr = junkout;ptr<targ;ptr++) 1285 { 1286 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1287 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1288 } 1289 1290 log_verbose(junk); 1291 printSeq((const unsigned char *)expect, expectLen); 1292 if ( checkOffsets ) 1293 { 1294 log_verbose("\nOffsets:"); 1295 log_verbose(offset_str); 1296 } 1297 log_verbose("\n"); 1298 } 1299 ucnv_close(conv); 1300 1301 1302 if(expectLen != targ-junkout) 1303 { 1304 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1305 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1306 log_info("\nGot:"); 1307 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1308 log_info("\nExpected:"); 1309 printSeqErr((const unsigned char*)expect, expectLen); 1310 return FALSE; 1311 } 1312 1313 if (checkOffsets && (expectOffsets != 0) ) 1314 { 1315 log_verbose("comparing %d offsets..\n", targ-junkout); 1316 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1317 log_err("did not get the expected offsets. %s", gNuConvTestName); 1318 log_err("Got : "); 1319 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1320 for(p=junkout;p<targ;p++) 1321 log_err("%d, ", junokout[p-junkout]); 1322 log_err("\nExpected: "); 1323 for(i=0; i<(targ-junkout); i++) 1324 log_err("%d,", expectOffsets[i]); 1325 } 1326 } 1327 1328 log_verbose("comparing..\n"); 1329 if(!memcmp(junkout, expect, expectLen)) 1330 { 1331 log_verbose("Matches!\n"); 1332 return TRUE; 1333 } 1334 else 1335 { 1336 log_err("String does not match. %s\n", gNuConvTestName); 1337 printUSeqErr(source, sourceLen); 1338 log_info("\nGot:"); 1339 printSeqErr((const unsigned char *)junkout, expectLen); 1340 log_info("\nExpected:"); 1341 printSeqErr((const unsigned char *)expect, expectLen); 1342 1343 return FALSE; 1344 } 1345} 1346 1347static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1348 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1349{ 1350 UErrorCode status = U_ZERO_ERROR; 1351 UConverter *conv = 0; 1352 UChar junkout[MAX_LENGTH]; /* FIX */ 1353 int32_t junokout[MAX_LENGTH]; /* FIX */ 1354 const char *src; 1355 const char *realSourceEnd; 1356 const char *srcLimit; 1357 UChar *p; 1358 UChar *targ; 1359 UChar *end; 1360 int32_t *offs; 1361 int i; 1362 UBool checkOffsets = TRUE; 1363 int32_t realBufferSize; 1364 UChar *realBufferEnd; 1365 UBool doFlush; 1366 1367 UConverterToUCallback oldAction = NULL; 1368 const void* oldContext = NULL; 1369 1370 1371 for(i=0;i<MAX_LENGTH;i++) 1372 junkout[i] = 0xFFFE; 1373 1374 for(i=0;i<MAX_LENGTH;i++) 1375 junokout[i] = -1; 1376 1377 setNuConvTestName(codepage, "TO"); 1378 1379 log_verbose("\n========= %s\n", gNuConvTestName); 1380 1381 conv = ucnv_open(codepage, &status); 1382 if(U_FAILURE(status)) 1383 { 1384 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1385 return TRUE; 1386 } 1387 1388 log_verbose("Converter opened..\n"); 1389 /*----setting the callback routine----*/ 1390 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1391 if (U_FAILURE(status)) { 1392 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1393 } 1394 /*-------------------------------------*/ 1395 1396 src = (const char *)source; 1397 targ = junkout; 1398 offs = junokout; 1399 1400 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1401 realBufferEnd = junkout + realBufferSize; 1402 realSourceEnd = src + sourcelen; 1403 1404 if ( gOutBufferSize != realBufferSize ) 1405 checkOffsets = FALSE; 1406 1407 if( gInBufferSize != MAX_LENGTH ) 1408 checkOffsets = FALSE; 1409 1410 do 1411 { 1412 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1413 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1414 1415 if(targ == realBufferEnd) 1416 { 1417 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1418 return FALSE; 1419 } 1420 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1421 1422 /* oldTarg = targ; */ 1423 1424 status = U_ZERO_ERROR; 1425 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1426 1427 ucnv_toUnicode (conv, 1428 &targ, 1429 end, 1430 (const char **)&src, 1431 (const char *)srcLimit, 1432 offs, 1433 doFlush, /* flush if we're at the end of hte source data */ 1434 &status); 1435 if(testReset) 1436 ucnv_resetFromUnicode(conv); 1437 if(gInBufferSize ==999 && gOutBufferSize==999) 1438 ucnv_resetToUnicode(conv); 1439 /* offs += (targ-oldTarg); */ 1440 1441 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1442 1443 if(U_FAILURE(status)) 1444 { 1445 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1446 return FALSE; 1447 } 1448 1449 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1450 sourcelen, targ-junkout); 1451 if(getTestOption(VERBOSITY_OPTION)) 1452 { 1453 char junk[999]; 1454 char offset_str[999]; 1455 1456 UChar *ptr; 1457 1458 junk[0] = 0; 1459 offset_str[0] = 0; 1460 1461 for(ptr = junkout;ptr<targ;ptr++) 1462 { 1463 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1464 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1465 } 1466 1467 log_verbose(junk); 1468 1469 if ( checkOffsets ) 1470 { 1471 log_verbose("\nOffsets:"); 1472 log_verbose(offset_str); 1473 } 1474 log_verbose("\n"); 1475 } 1476 ucnv_close(conv); 1477 1478 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1479 1480 if (checkOffsets && (expectOffsets != 0)) 1481 { 1482 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1483 1484 log_err("did not get the expected offsets. %s",gNuConvTestName); 1485 for(p=junkout;p<targ;p++) 1486 log_err("%d, ", junokout[p-junkout]); 1487 log_err("\nExpected: "); 1488 for(i=0; i<(targ-junkout); i++) 1489 log_err("%d,", expectOffsets[i]); 1490 log_err(""); 1491 for(i=0; i<(targ-junkout); i++) 1492 log_err("%X,", junkout[i]); 1493 log_err(""); 1494 for(i=0; i<(src-(const char *)source); i++) 1495 log_err("%X,", (unsigned char)source[i]); 1496 } 1497 } 1498 1499 if(!memcmp(junkout, expect, expectlen*2)) 1500 { 1501 log_verbose("Matches!\n"); 1502 return TRUE; 1503 } 1504 else 1505 { 1506 log_err("String does not match. %s\n", gNuConvTestName); 1507 log_verbose("String does not match. %s\n", gNuConvTestName); 1508 log_info("\nGot:"); 1509 printUSeq(junkout, expectlen); 1510 log_info("\nExpected:"); 1511 printUSeq(expect, expectlen); 1512 return FALSE; 1513 } 1514} 1515 1516 1517static void TestResetBehaviour(void){ 1518#if !UCONFIG_NO_LEGACY_CONVERSION 1519 log_verbose("Testing Reset for DBCS and MBCS\n"); 1520 { 1521 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1522 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1523 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1524 1525 1526 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1527 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1528 static const int32_t offsets1[] = { 0,2,4,6}; 1529 1530 /*DBCS*/ 1531 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1532 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1533 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1534 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1535 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1536 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1537 1538 if(!testConvertToU(expected1, sizeof(expected1), 1539 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1540 offsets1, TRUE)) 1541 log_err("ibm-1363 -> did not match.\n"); 1542 /*MBCS*/ 1543 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1544 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1545 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1546 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1547 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1548 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1549 1550 if(!testConvertToU(expected1, sizeof(expected1), 1551 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1552 offsets1, TRUE)) 1553 log_err("ibm-1363 -> did not match.\n"); 1554 1555 } 1556 1557 log_verbose("Testing Reset for ISO-2022-jp\n"); 1558 { 1559 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1560 1561 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1562 0x31,0x1A, 0x32}; 1563 1564 1565 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1566 1567 1568 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1569 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1570 0x31,0x1A, 0x32}; 1571 static const int32_t offsets1[] = { 3,5,10,11,12}; 1572 1573 /*iso-2022-jp*/ 1574 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1575 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1576 log_err("u-> not match.\n"); 1577 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1578 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1579 log_err("u-> not match.\n"); 1580 1581 if(!testConvertToU(expected1, sizeof(expected1), 1582 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1583 offsets1, TRUE)) 1584 log_err("iso-2022-jp -> did not match.\n"); 1585 1586 } 1587 1588 log_verbose("Testing Reset for ISO-2022-cn\n"); 1589 { 1590 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1591 1592 static const uint8_t expected[] = { 1593 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1594 0x36, 0x21, 1595 0x0f, 0x31, 1596 0x1A, 1597 0x32 1598 }; 1599 1600 1601 static const int32_t offsets[] = { 1602 0, 0, 0, 0, 0, 0, 0, 1603 1, 1, 1604 2, 2, 1605 3, 1606 5, }; 1607 1608 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1609 static const uint8_t expected1[] = { 1610 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1611 0x36, 0x21, 1612 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1613 0x0f, 0x1A, 1614 0x32 1615 }; 1616 static const int32_t offsets1[] = { 5,7,13,16,17}; 1617 1618 /*iso-2022-CN*/ 1619 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1620 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1621 log_err("u-> not match.\n"); 1622 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1623 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1624 log_err("u-> not match.\n"); 1625 1626 if(!testConvertToU(expected1, sizeof(expected1), 1627 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1628 offsets1, TRUE)) 1629 log_err("iso-2022-cn -> did not match.\n"); 1630 } 1631 1632 log_verbose("Testing Reset for ISO-2022-kr\n"); 1633 { 1634 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1635 1636 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1637 0x0E, 0x6C, 0x69, 1638 0x0f, 0x1A, 1639 0x0e, 0x6F, 0x4B, 1640 0x0F, 0x31, 1641 0x1A, 1642 0x32 }; 1643 1644 static const int32_t offsets[] = {-1, -1, -1, -1, 1645 0, 0, 0, 1646 1, 1, 1647 3, 3, 3, 1648 4, 4, 1649 5, 1650 7, 1651 }; 1652 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1653 1654 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1655 0x0E, 0x6C, 0x69, 1656 0x0f, 0x41, 1657 0x0e, 0x6F, 0x4B, 1658 0x0F, 0x31, 1659 0x42, 1660 0x32 }; 1661 1662 static const int32_t offsets1[] = { 1663 5, 8, 10, 1664 13, 14, 15 1665 1666 }; 1667 /*iso-2022-kr*/ 1668 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1669 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1670 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1671 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1672 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1673 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1674 if(!testConvertToU(expected1, sizeof(expected1), 1675 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1676 offsets1, TRUE)) 1677 log_err("iso-2022-kr -> did not match.\n"); 1678 } 1679 1680 log_verbose("Testing Reset for HZ\n"); 1681 { 1682 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1683 1684 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1685 0x7E, 0x7D, 0x1A, 1686 0x7E, 0x7B, 0x36, 0x21, 1687 0x7E, 0x7D, 0x31, 1688 0x1A, 1689 0x32 }; 1690 1691 1692 static const int32_t offsets[] = {0,0,0,0, 1693 1,1,1, 1694 3,3,3,3, 1695 4,4,4, 1696 5, 1697 7,}; 1698 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1699 1700 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1701 0x7E, 0x7D, 0x35, 1702 0x7E, 0x7B, 0x36, 0x21, 1703 0x7E, 0x7D, 0x31, 1704 0x41, 1705 0x32 }; 1706 1707 1708 static const int32_t offsets1[] = {2,6,9,13,14,15 1709 }; 1710 1711 /*hz*/ 1712 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1713 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1714 log_err("u-> not match.\n"); 1715 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1716 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1717 log_err("u-> not match.\n"); 1718 if(!testConvertToU(expected1, sizeof(expected1), 1719 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1720 offsets1, TRUE)) 1721 log_err("hz -> did not match.\n"); 1722 } 1723#endif 1724 1725 /*UTF-8*/ 1726 log_verbose("Testing for UTF8\n"); 1727 { 1728 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1729 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1730 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1731 0x04, 0x06 }; 1732 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1733 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1734 1735 1736 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1737 /*UTF-8*/ 1738 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1739 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1740 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1741 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1742 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1743 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1744 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1745 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1746 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1747 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1748 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1749 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1750 if(!testConvertToU(expected, sizeof(expected), 1751 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1752 log_err("UTF8 -> did not match.\n"); 1753 if(!testConvertToU(expected, sizeof(expected), 1754 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1755 log_err("UTF8 -> did not match.\n"); 1756 if(!testConvertToU(expected, sizeof(expected), 1757 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1758 log_err("UTF8 -> did not match.\n"); 1759 if(!testConvertToU(expected, sizeof(expected), 1760 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1761 log_err("UTF8 -> did not match.\n"); 1762 1763 } 1764 1765} 1766 1767/* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1768static void 1769doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1770 UConverter *cnv; 1771 1772 UChar buffer[2]; 1773 UChar *target, *targetLimit; 1774 const char *source, *sourceLimit; 1775 1776 UErrorCode errorCode; 1777 1778 errorCode=U_ZERO_ERROR; 1779 cnv=ucnv_open(cnvName, &errorCode); 1780 if(U_FAILURE(errorCode)) { 1781 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1782 return; 1783 } 1784 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1785 if(U_FAILURE(errorCode)) { 1786 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1787 cnvName, u_errorName(errorCode)); 1788 ucnv_close(cnv); 1789 return; 1790 } 1791 1792 source=(const char *)bytes; 1793 sourceLimit=source+length; 1794 target=buffer; 1795 targetLimit=buffer+LENGTHOF(buffer); 1796 1797 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1798 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1799 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1800 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1801 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1802 } 1803 1804 errorCode=U_ZERO_ERROR; 1805 source=sourceLimit; 1806 target=buffer; 1807 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1808 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1809 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1810 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1811 } 1812 1813 /* 2. input bytes with flush=TRUE */ 1814 ucnv_resetToUnicode(cnv); 1815 1816 errorCode=U_ZERO_ERROR; 1817 source=(const char *)bytes; 1818 target=buffer; 1819 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1820 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1821 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1822 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1823 } 1824 1825 1826 ucnv_close(cnv); 1827} 1828 1829static void 1830TestTruncated() { 1831 static const struct { 1832 const char *cnvName; 1833 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1834 int32_t length; 1835 } testCases[]={ 1836 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1837 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1838 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1839 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1840 1841 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1842 { "UTF-8", { 0xd1 }, 1 }, 1843 1844 { "UTF-16BE", { 0x4e }, 1 }, 1845 { "UTF-16LE", { 0x4e }, 1 }, 1846 { "UTF-16", { 0x4e }, 1 }, 1847 { "UTF-16", { 0xff }, 1 }, 1848 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1849 1850 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1851 { "UTF-32LE", { 0x4e }, 1 }, 1852 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1853 { "UTF-32", { 0xff }, 1 }, 1854 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1855 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1856 1857#if !UCONFIG_NO_LEGACY_CONVERSION 1858 { "BOCU-1", { 0xd5 }, 1 }, 1859 1860 { "Shift-JIS", { 0xe0 }, 1 }, 1861 1862 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1863#else 1864 { "BOCU-1", { 0xd5 }, 1 ,} 1865#endif 1866 }; 1867 int32_t i; 1868 1869 for(i=0; i<LENGTHOF(testCases); ++i) { 1870 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1871 } 1872} 1873 1874typedef struct NameRange { 1875 const char *name; 1876 UChar32 start, end, start2, end2, notStart, notEnd; 1877} NameRange; 1878 1879static void 1880TestUnicodeSet() { 1881 UErrorCode errorCode; 1882 UConverter *cnv; 1883 USet *set; 1884 const char *name; 1885 int32_t i, count; 1886 1887 static const char *const completeSetNames[]={ 1888 "UTF-7", 1889 "UTF-8", 1890 "UTF-16", 1891 "UTF-16BE", 1892 "UTF-16LE", 1893 "UTF-32", 1894 "UTF-32BE", 1895 "UTF-32LE", 1896 "SCSU", 1897 "BOCU-1", 1898 "CESU-8", 1899#if !UCONFIG_NO_LEGACY_CONVERSION 1900 "gb18030", 1901#endif 1902 "IMAP-mailbox-name" 1903 }; 1904#if !UCONFIG_NO_LEGACY_CONVERSION 1905 static const char *const lmbcsNames[]={ 1906 "LMBCS-1", 1907 "LMBCS-2", 1908 "LMBCS-3", 1909 "LMBCS-4", 1910 "LMBCS-5", 1911 "LMBCS-6", 1912 "LMBCS-8", 1913 "LMBCS-11", 1914 "LMBCS-16", 1915 "LMBCS-17", 1916 "LMBCS-18", 1917 "LMBCS-19" 1918 }; 1919#endif 1920 1921 static const NameRange nameRanges[]={ 1922 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1923#if !UCONFIG_NO_LEGACY_CONVERSION 1924 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1925#endif 1926 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1927#if !UCONFIG_NO_LEGACY_CONVERSION 1928 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1929 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1930 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1931 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1932#else 1933 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1934#endif 1935 }; 1936 1937 /* open an empty set */ 1938 set=uset_open(1, 0); 1939 1940 count=ucnv_countAvailable(); 1941 for(i=0; i<count; ++i) { 1942 errorCode=U_ZERO_ERROR; 1943 name=ucnv_getAvailableName(i); 1944 cnv=ucnv_open(name, &errorCode); 1945 if(U_FAILURE(errorCode)) { 1946 log_data_err("error: unable to open converter %s - %s\n", 1947 name, u_errorName(errorCode)); 1948 continue; 1949 } 1950 1951 uset_clear(set); 1952 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1953 if(U_FAILURE(errorCode)) { 1954 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1955 name, u_errorName(errorCode)); 1956 } else if(uset_size(set)==0) { 1957 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1958 } 1959 1960 ucnv_close(cnv); 1961 } 1962 1963 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1964 for(i=0; i<LENGTHOF(completeSetNames); ++i) { 1965 errorCode=U_ZERO_ERROR; 1966 name=completeSetNames[i]; 1967 cnv=ucnv_open(name, &errorCode); 1968 if(U_FAILURE(errorCode)) { 1969 log_data_err("error: unable to open converter %s - %s\n", 1970 name, u_errorName(errorCode)); 1971 continue; 1972 } 1973 1974 uset_clear(set); 1975 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1976 if(U_FAILURE(errorCode)) { 1977 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1978 name, u_errorName(errorCode)); 1979 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 1980 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 1981 } 1982 1983 ucnv_close(cnv); 1984 } 1985 1986#if !UCONFIG_NO_LEGACY_CONVERSION 1987 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 1988 for(i=0; i<LENGTHOF(lmbcsNames); ++i) { 1989 errorCode=U_ZERO_ERROR; 1990 name=lmbcsNames[i]; 1991 cnv=ucnv_open(name, &errorCode); 1992 if(U_FAILURE(errorCode)) { 1993 log_data_err("error: unable to open converter %s - %s\n", 1994 name, u_errorName(errorCode)); 1995 continue; 1996 } 1997 1998 uset_clear(set); 1999 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2000 if(U_FAILURE(errorCode)) { 2001 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2002 name, u_errorName(errorCode)); 2003 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2004 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2005 } 2006 2007 ucnv_close(cnv); 2008 } 2009#endif 2010 2011 /* test specific sets */ 2012 for(i=0; i<LENGTHOF(nameRanges); ++i) { 2013 errorCode=U_ZERO_ERROR; 2014 name=nameRanges[i].name; 2015 cnv=ucnv_open(name, &errorCode); 2016 if(U_FAILURE(errorCode)) { 2017 log_data_err("error: unable to open converter %s - %s\n", 2018 name, u_errorName(errorCode)); 2019 continue; 2020 } 2021 2022 uset_clear(set); 2023 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2024 if(U_FAILURE(errorCode)) { 2025 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2026 name, u_errorName(errorCode)); 2027 } else if( 2028 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2029 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2030 ) { 2031 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2032 } else if(nameRanges[i].notStart>=0) { 2033 /* simulate containsAny() with the C API */ 2034 uset_complement(set); 2035 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2036 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2037 } 2038 } 2039 2040 ucnv_close(cnv); 2041 } 2042 2043 errorCode = U_ZERO_ERROR; 2044 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2045 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2046 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2047 } 2048 errorCode = U_PARSE_ERROR; 2049 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2050 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2051 if (errorCode != U_PARSE_ERROR) { 2052 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2053 } 2054 2055 uset_close(set); 2056} 2057