1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/***************************************************************************** 7* 8* File CCONVTST.C 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 13****************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include "unicode/uloc.h" 19#include "unicode/ucnv.h" 20#include "unicode/utypes.h" 21#include "unicode/ustring.h" 22#include "unicode/uset.h" 23#include "cintltst.h" 24 25#define MAX_LENGTH 999 26 27#define UNICODE_LIMIT 0x10FFFF 28#define SURROGATE_HIGH_START 0xD800 29#define SURROGATE_LOW_END 0xDFFF 30 31static int32_t gInBufferSize = 0; 32static int32_t gOutBufferSize = 0; 33static char gNuConvTestName[1024]; 34 35#define nct_min(x,y) ((x<y) ? x : y) 36#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 37 38static void printSeq(const unsigned char* a, int len); 39static void printSeqErr(const unsigned char* a, int len); 40static void printUSeq(const UChar* a, int len); 41static void printUSeqErr(const UChar* a, int len); 42static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 43 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 44static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 45 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 46 47static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 48 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 49static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 50 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 51 52static void setNuConvTestName(const char *codepage, const char *direction) 53{ 54 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 55 codepage, 56 direction, 57 (int)gInBufferSize, 58 (int)gOutBufferSize); 59} 60 61 62static void TestSurrogateBehaviour(void); 63static void TestErrorBehaviour(void); 64 65#if !UCONFIG_NO_LEGACY_CONVERSION 66static void TestToUnicodeErrorBehaviour(void); 67static void TestGetNextErrorBehaviour(void); 68#endif 69 70static void TestRegressionUTF8(void); 71static void TestRegressionUTF32(void); 72static void TestAvailableConverters(void); 73static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 74static void TestResetBehaviour(void); 75static void TestTruncated(void); 76static void TestUnicodeSet(void); 77 78static void TestWithBufferSize(int32_t osize, int32_t isize); 79 80 81static void printSeq(const unsigned char* a, int len) 82{ 83 int i=0; 84 log_verbose("\n{"); 85 while (i<len) 86 log_verbose("0x%02X ", a[i++]); 87 log_verbose("}\n"); 88} 89 90static void printUSeq(const UChar* a, int len) 91{ 92 int i=0; 93 log_verbose("\n{"); 94 while (i<len) 95 log_verbose("%0x04X ", a[i++]); 96 log_verbose("}\n"); 97} 98 99static void printSeqErr(const unsigned char* a, int len) 100{ 101 int i=0; 102 fprintf(stderr, "\n{"); 103 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 104 fprintf(stderr, "}\n"); 105} 106 107static void printUSeqErr(const UChar* a, int len) 108{ 109 int i=0; 110 fprintf(stderr, "\n{"); 111 while (i<len) 112 fprintf(stderr, "0x%04X ", a[i++]); 113 fprintf(stderr,"}\n"); 114} 115 116void addExtraTests(TestNode** root); 117 118void addExtraTests(TestNode** root) 119{ 120 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 121 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 122 123#if !UCONFIG_NO_LEGACY_CONVERSION 124 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 125 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 126#endif 127 128 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 129 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 130 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 131 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 132 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 133 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 134 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 135} 136 137/*test surrogate behaviour*/ 138static void TestSurrogateBehaviour(){ 139 log_verbose("Testing for SBCS and LATIN_1\n"); 140 { 141 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 142 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 143 144#if !UCONFIG_NO_LEGACY_CONVERSION 145 /*SBCS*/ 146 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 147 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 148 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 149#endif 150 151 /*LATIN_1*/ 152 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 153 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 154 log_err("u-> LATIN_1 not match.\n"); 155 156 } 157 158#if !UCONFIG_NO_LEGACY_CONVERSION 159 log_verbose("Testing for DBCS and MBCS\n"); 160 { 161 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 162 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 163 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 164 165 /*DBCS*/ 166 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 167 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 168 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 169 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 170 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 171 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 172 /*MBCS*/ 173 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 174 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 175 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 176 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 177 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 178 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 179 } 180 181 /* BEGIN android-removed */ 182 /* To save space, Android does not build full ISO2022 CJK tables. 183 We skip the tests for ISO-2022. */ 184 /* 185 log_verbose("Testing for ISO-2022-jp\n"); 186 { 187 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 188 189 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 190 0x31,0x1A, 0x32}; 191 192 193 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 194 195 // iso-2022-jp 196 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 197 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 198 log_err("u-> not match.\n"); 199 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 200 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 201 log_err("u-> not match.\n"); 202 } 203 204 log_verbose("Testing for ISO-2022-cn\n"); 205 { 206 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 207 208 static const uint8_t expected[] = { 209 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 210 0x36, 0x21, 211 0x0F, 0x31, 212 0x1A, 213 0x32 214 }; 215 216 217 218 static const int32_t offsets[] = { 219 0, 0, 0, 0, 0, 0, 0, 220 1, 1, 221 2, 2, 222 3, 223 5, }; 224 225 // iso-2022-CN 226 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 227 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 228 log_err("u-> not match.\n"); 229 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 230 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 231 log_err("u-> not match.\n"); 232 } 233 234 log_verbose("Testing for ISO-2022-kr\n"); 235 { 236 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 237 238 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 239 0x0E, 0x6C, 0x69, 240 0x0f, 0x1A, 241 0x0e, 0x6F, 0x4B, 242 0x0F, 0x31, 243 0x1A, 244 0x32 }; 245 246 static const int32_t offsets[] = {-1, -1, -1, -1, 247 0, 0, 0, 248 1, 1, 249 3, 3, 3, 250 4, 4, 251 5, 252 7, 253 }; 254 255 // iso-2022-kr 256 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 257 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 258 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 259 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 260 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 261 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 262 } 263 */ 264 /* END android-removed */ 265 266 log_verbose("Testing for HZ\n"); 267 { 268 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 269 270 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 271 0x7E, 0x7D, 0x1A, 272 0x7E, 0x7B, 0x36, 0x21, 273 0x7E, 0x7D, 0x31, 274 0x1A, 275 0x32 }; 276 277 278 static const int32_t offsets[] = {0,0,0,0, 279 1,1,1, 280 3,3,3,3, 281 4,4,4, 282 5, 283 7,}; 284 285 /*hz*/ 286 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 287 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 288 log_err("u-> HZ not match.\n"); 289 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 290 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 291 log_err("u-> HZ not match.\n"); 292 } 293#endif 294 295 /*UTF-8*/ 296 log_verbose("Testing for UTF8\n"); 297 { 298 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 299 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 300 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 301 0x04, 0x06 }; 302 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 303 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 304 305 306 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 307 /*UTF-8*/ 308 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 309 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 310 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 311 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 312 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 313 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 314 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 315 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 316 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 317 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 318 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 319 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 320 321 if(!convertToU(expected, sizeof(expected), 322 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR )) 323 log_err("UTF8 -> u did not match.\n"); 324 if(!convertToU(expected, sizeof(expected), 325 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR )) 326 log_err("UTF8 -> u did not match.\n"); 327 if(!convertToU(expected, sizeof(expected), 328 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 329 log_err("UTF8 ->u did not match.\n"); 330 if(!convertToU(expected, sizeof(expected), 331 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 332 log_err("UTF8 -> u did not match.\n"); 333 334 } 335} 336 337/*test various error behaviours*/ 338static void TestErrorBehaviour(){ 339 log_verbose("Testing for SBCS and LATIN_1\n"); 340 { 341 static const UChar sampleText[] = { 0x0031, 0xd801}; 342 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 343 static const uint8_t expected0[] = { 0x31}; 344 static const uint8_t expected[] = { 0x31, 0x1a}; 345 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 346 347#if !UCONFIG_NO_LEGACY_CONVERSION 348 /*SBCS*/ 349 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 350 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 351 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 352 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 353 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 354 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 355 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 356 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 357 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 358#endif 359 360 /*LATIN_1*/ 361 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 362 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 363 log_err("u-> LATIN_1 is supposed to fail\n"); 364 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 365 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 366 log_err("u-> LATIN_1 is supposed to fail\n"); 367 368 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 369 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 370 log_err("u-> LATIN_1 did not match\n"); 371 } 372 373#if !UCONFIG_NO_LEGACY_CONVERSION 374 log_verbose("Testing for DBCS and MBCS\n"); 375 { 376 static const UChar sampleText[] = { 0x00a1, 0xd801}; 377 static const uint8_t expected[] = { 0xa2, 0xae}; 378 static const int32_t offsets[] = { 0x00, 0x00}; 379 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 380 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 381 382 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 383 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 384 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 385 386 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 387 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 388 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 389 390 /* BEGIN android-changed */ 391 /* Android uses a different EUC-JP table. We change one character, 392 * choosing a mapping that is common to both tables. */ 393 static const UChar sampleText4MBCS[] = { 0x0061, 0x9ED1, 0xdc01}; 394 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xf4, 0xf8, 0xf4, 0xfe}; 395 /* static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; */ 396 /* static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; */ 397 /* END android-changed */ 398 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 399 400 401 /*DBCS*/ 402 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 403 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 404 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 405 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 406 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 407 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 408 409 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 410 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 411 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 412 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 413 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR)) 414 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 415 416 417 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 418 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 419 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 420 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 421 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 422 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 423 424 /*MBCS*/ 425 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 426 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 427 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 428 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 429 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 430 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 431 432 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 433 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 434 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 435 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 436 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 437 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 438 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 439 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 440 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 441 442 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 443 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 444 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 445 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 446 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 447 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 448 449 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 450 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 451 log_err("u-> euc-jp [UCNV_MBCS] \n"); 452 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 453 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 454 log_err("u-> euc-jp [UCNV_MBCS] \n"); 455 } 456 457 /* BEGIN android-removed */ 458 /* To save space, Android does not build full ISO2022 CJK tables. 459 We skip the tests for ISO-2022. */ 460 /* 461 // iso-2022-jp 462 log_verbose("Testing for iso-2022-jp\n"); 463 { 464 static const UChar sampleText[] = { 0x0031, 0xd801}; 465 static const uint8_t expected[] = { 0x31}; 466 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 467 static const int32_t offsets[] = { 0x00, 1}; 468 469 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 470 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 471 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 472 473 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 474 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 475 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 476 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 477 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 478 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 479 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 480 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR)) 481 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 482 483 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 484 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 485 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 486 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 487 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 488 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 489 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 490 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 491 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 492 493 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 494 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 495 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 496 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 497 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 498 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 499 } 500 501 // iso-2022-cn 502 log_verbose("Testing for iso-2022-cn\n"); 503 { 504 static const UChar sampleText[] = { 0x0031, 0xd801}; 505 static const uint8_t expected[] = { 0x31}; 506 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 507 static const int32_t offsets[] = { 0x00, 1}; 508 509 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 510 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 511 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 512 513 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 514 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 515 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 516 517 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 518 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 519 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 520 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 521 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 522 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 523 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 524 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 525 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 526 527 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 528 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 529 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 530 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 531 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 532 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 533 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 534 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 535 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 536 537 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 538 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 539 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 540 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 541 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 542 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 543 544 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 545 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 546 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 547 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 548 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 549 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 550 } 551 552 // iso-2022-kr 553 log_verbose("Testing for iso-2022-kr\n"); 554 { 555 static const UChar sampleText[] = { 0x0031, 0xd801}; 556 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 557 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 558 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 559 560 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 561 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 562 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 563 564 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 565 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 566 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 567 568 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 569 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 570 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 571 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 572 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 573 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 574 575 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 576 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 577 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 578 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 579 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 580 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 581 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 582 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 583 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 584 585 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 586 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 587 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 588 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 589 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 590 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 591 } 592 */ 593 /* END android-removed */ 594 595 /*HZ*/ 596 log_verbose("Testing for HZ\n"); 597 { 598 static const UChar sampleText[] = { 0x0031, 0xd801}; 599 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 600 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 601 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 602 603 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 604 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 605 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 606 607 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 608 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 609 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 610 611 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 612 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 613 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 614 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 615 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 616 log_err("u-> HZ [UCNV_MBCS] \n"); 617 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 618 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 619 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 620 621 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 622 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 623 log_err("u->HZ[UCNV_DBCS] did not match\n"); 624 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 625 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 626 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 627 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 628 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 629 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 630 631 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 632 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 633 log_err("u->HZ [UCNV_MBCS] \n"); 634 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 635 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 636 log_err("u-> HZ[UCNV_MBCS] \n"); 637 638 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 639 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 640 log_err("u-> HZ [UCNV_MBCS] \n"); 641 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 642 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 643 log_err("u-> HZ [UCNV_MBCS] \n"); 644 } 645#endif 646} 647 648#if !UCONFIG_NO_LEGACY_CONVERSION 649/*test different convertToUnicode error behaviours*/ 650static void TestToUnicodeErrorBehaviour() 651{ 652 log_verbose("Testing error conditions for DBCS\n"); 653 { 654 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 655 const UChar expected[] = { 0x00a1 }; 656 657 if(!convertToU(sampleText, sizeof(sampleText), 658 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR )) 659 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 660 if(!convertToU(sampleText, sizeof(sampleText), 661 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR )) 662 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 663 } 664 log_verbose("Testing error conditions for SBCS\n"); 665 { 666 uint8_t sampleText[] = { 0xa2, 0xFF}; 667 const UChar expected[] = { 0x00c2 }; 668 669 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 670 const UChar expected2[] = { 0x0073 };*/ 671 672 if(!convertToU(sampleText, sizeof(sampleText), 673 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 674 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 675 if(!convertToU(sampleText, sizeof(sampleText), 676 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 677 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 678 679 } 680} 681 682static void TestGetNextErrorBehaviour(){ 683 /*Test for unassigned character*/ 684#define INPUT_SIZE 1 685 static const char input1[INPUT_SIZE]={ 0x70 }; 686 const char* source=(const char*)input1; 687 UErrorCode err=U_ZERO_ERROR; 688 UChar32 c=0; 689 UConverter *cnv=ucnv_open("ibm-424", &err); 690 if(U_FAILURE(err)) { 691 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 692 return; 693 } 694 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 695 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 696 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 697 } 698 ucnv_close(cnv); 699} 700#endif 701 702#define MAX_UTF16_LEN 2 703#define MAX_UTF8_LEN 4 704 705/*Regression test for utf8 converter*/ 706static void TestRegressionUTF8(){ 707 UChar32 currCh = 0; 708 int32_t offset8; 709 int32_t offset16; 710 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 711 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 712 713 while (currCh <= UNICODE_LIMIT) { 714 offset16 = 0; 715 offset8 = 0; 716 while(currCh <= UNICODE_LIMIT 717 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 718 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 719 { 720 if (currCh == SURROGATE_HIGH_START) { 721 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 722 } 723 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 724 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); 725 currCh++; 726 } 727 if(!convertFromU(standardForm, offset16, 728 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 729 log_err("Unicode->UTF8 did not match.\n"); 730 } 731 if(!convertToU(utf8, offset8, 732 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 733 log_err("UTF8->Unicode did not match.\n"); 734 } 735 } 736 737 free(standardForm); 738 free(utf8); 739 740 { 741 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 742 static const UChar expected[] = { 0x0301, 0x0300 }; 743 UConverter *conv8; 744 UErrorCode err = U_ZERO_ERROR; 745 UChar pivotBuffer[100]; 746 const UChar* const pivEnd = pivotBuffer + 100; 747 const char* srcBeg; 748 const char* srcEnd; 749 UChar* pivBeg; 750 751 conv8 = ucnv_open("UTF-8", &err); 752 753 srcBeg = src8; 754 pivBeg = pivotBuffer; 755 srcEnd = src8 + 3; 756 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 757 if (srcBeg != srcEnd) { 758 log_err("Did not consume whole buffer on first call.\n"); 759 } 760 761 srcEnd = src8 + 4; 762 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 763 if (srcBeg != srcEnd) { 764 log_err("Did not consume whole buffer on second call.\n"); 765 } 766 767 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 768 log_err("Did not get expected results for UTF-8.\n"); 769 } 770 ucnv_close(conv8); 771 } 772} 773 774#define MAX_UTF32_LEN 1 775 776static void TestRegressionUTF32(){ 777 UChar32 currCh = 0; 778 int32_t offset32; 779 int32_t offset16; 780 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 781 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 782 783 while (currCh <= UNICODE_LIMIT) { 784 offset16 = 0; 785 offset32 = 0; 786 while(currCh <= UNICODE_LIMIT 787 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 788 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 789 { 790 if (currCh == SURROGATE_HIGH_START) { 791 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 792 } 793 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 794 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); 795 currCh++; 796 } 797 if(!convertFromU(standardForm, offset16, 798 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 799 log_err("Unicode->UTF32 did not match.\n"); 800 } 801 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 802 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 803 log_err("UTF32->Unicode did not match.\n"); 804 } 805 } 806 free(standardForm); 807 free(utf32); 808 809 { 810 /* Check for lone surrogate error handling. */ 811 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 812 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 813 static const uint8_t expectedUTF32BE[] = { 814 0x00, 0x00, 0x00, 0x31, 815 0x00, 0x00, 0xff, 0xfd, 816 0x00, 0x00, 0x00, 0x32 817 }; 818 static const uint8_t expectedUTF32LE[] = { 819 0x31, 0x00, 0x00, 0x00, 820 0xfd, 0xff, 0x00, 0x00, 821 0x32, 0x00, 0x00, 0x00 822 }; 823 static const int32_t offsetsUTF32[] = { 824 0x00, 0x00, 0x00, 0x00, 825 0x01, 0x01, 0x01, 0x01, 826 0x02, 0x02, 0x02, 0x02 827 }; 828 829 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 830 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 831 log_err("u->UTF-32BE\n"); 832 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 833 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 834 log_err("u->UTF-32BE\n"); 835 836 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 837 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 838 log_err("u->UTF-32LE\n"); 839 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 840 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 841 log_err("u->UTF-32LE\n"); 842 } 843 844 { 845 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 846 static const UChar expected[] = { 0x0031, 0x0030 }; 847 UConverter *convBE; 848 UErrorCode err = U_ZERO_ERROR; 849 UChar pivotBuffer[100]; 850 const UChar* const pivEnd = pivotBuffer + 100; 851 const char* srcBeg; 852 const char* srcEnd; 853 UChar* pivBeg; 854 855 convBE = ucnv_open("UTF-32BE", &err); 856 857 srcBeg = srcBE; 858 pivBeg = pivotBuffer; 859 srcEnd = srcBE + 5; 860 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 861 if (srcBeg != srcEnd) { 862 log_err("Did not consume whole buffer on first call.\n"); 863 } 864 865 srcEnd = srcBE + 8; 866 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 867 if (srcBeg != srcEnd) { 868 log_err("Did not consume whole buffer on second call.\n"); 869 } 870 871 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 872 log_err("Did not get expected results for UTF-32BE.\n"); 873 } 874 ucnv_close(convBE); 875 } 876 { 877 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 878 static const UChar expected[] = { 0x0031, 0x0030 }; 879 UConverter *convLE; 880 UErrorCode err = U_ZERO_ERROR; 881 UChar pivotBuffer[100]; 882 const UChar* const pivEnd = pivotBuffer + 100; 883 const char* srcBeg; 884 const char* srcEnd; 885 UChar* pivBeg; 886 887 convLE = ucnv_open("UTF-32LE", &err); 888 889 srcBeg = srcLE; 890 pivBeg = pivotBuffer; 891 srcEnd = srcLE + 5; 892 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 893 if (srcBeg != srcEnd) { 894 log_err("Did not consume whole buffer on first call.\n"); 895 } 896 897 srcEnd = srcLE + 8; 898 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 899 if (srcBeg != srcEnd) { 900 log_err("Did not consume whole buffer on second call.\n"); 901 } 902 903 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 904 log_err("Did not get expected results for UTF-32LE.\n"); 905 } 906 ucnv_close(convLE); 907 } 908} 909 910/*Walk through the available converters*/ 911static void TestAvailableConverters(){ 912 UErrorCode status=U_ZERO_ERROR; 913 UConverter *conv=NULL; 914 int32_t i=0; 915 for(i=0; i < ucnv_countAvailable(); i++){ 916 status=U_ZERO_ERROR; 917 conv=ucnv_open(ucnv_getAvailableName(i), &status); 918 if(U_FAILURE(status)){ 919 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 920 ucnv_getAvailableName(i), myErrorName(status)); 921 continue; 922 } 923 ucnv_close(conv); 924 } 925 926} 927 928static void TestFlushInternalBuffer(){ 929 TestWithBufferSize(MAX_LENGTH, 1); 930 TestWithBufferSize(1, 1); 931 TestWithBufferSize(1, MAX_LENGTH); 932 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 933} 934 935static void TestWithBufferSize(int32_t insize, int32_t outsize){ 936 937 gInBufferSize =insize; 938 gOutBufferSize = outsize; 939 940 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 941 { 942 UChar sampleText[] = 943 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 944 const uint8_t expectedUTF8[] = 945 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 946 int32_t toUTF8Offs[] = 947 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 948 /* int32_t fmUTF8Offs[] = 949 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 950 951 /*UTF-8*/ 952 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 953 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 954 log_err("u-> UTF8 did not match.\n"); 955 } 956 957#if !UCONFIG_NO_LEGACY_CONVERSION 958 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 959 { 960 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 961 const uint8_t toIBM943[]= { 0x61, 962 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 963 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 964 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 965 0x61 }; 966 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 967 968 if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 969 toIBM943, sizeof(toIBM943), "ibm-943", 970 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 971 log_err("u-> ibm-943 with subst with value did not match.\n"); 972 } 973#endif 974 975 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 976 { 977 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 978 0xe0, 0x80, 0x61}; 979 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 980 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 981 982 if(!testConvertToU(sampleText1, sizeof(sampleText1), 983 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 984 log_err("utf8->u with substitute did not match.\n");; 985 } 986 987#if !UCONFIG_NO_LEGACY_CONVERSION 988 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 989 /*to Unicode*/ 990 { 991 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 992 0x81, 0xad, /*unassigned*/ 993 0x89, 0xd3 }; 994 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 995 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 996 0x7B87}; 997 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 998 999 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 1000 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 1001 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 1002 log_err("ibm-943->u with substitute with value did not match.\n"); 1003 1004 } 1005#endif 1006} 1007 1008static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1009 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1010{ 1011 1012 int32_t i=0; 1013 char *p=0; 1014 const UChar *src; 1015 char buffer[MAX_LENGTH]; 1016 int32_t offsetBuffer[MAX_LENGTH]; 1017 int32_t *offs=0; 1018 char *targ; 1019 char *targetLimit; 1020 UChar *sourceLimit=0; 1021 UErrorCode status = U_ZERO_ERROR; 1022 UConverter *conv = 0; 1023 conv = ucnv_open(codepage, &status); 1024 if(U_FAILURE(status)) 1025 { 1026 log_data_err("Couldn't open converter %s\n",codepage); 1027 return TRUE; 1028 } 1029 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1030 1031 for(i=0; i<MAX_LENGTH; i++){ 1032 buffer[i]=(char)0xF0; 1033 offsetBuffer[i]=0xFF; 1034 } 1035 1036 src=source; 1037 sourceLimit=(UChar*)src+(sourceLen); 1038 targ=buffer; 1039 targetLimit=targ+MAX_LENGTH; 1040 offs=offsetBuffer; 1041 ucnv_fromUnicode (conv, 1042 (char **)&targ, 1043 (const char *)targetLimit, 1044 &src, 1045 sourceLimit, 1046 expectOffsets ? offs : NULL, 1047 doFlush, 1048 &status); 1049 ucnv_close(conv); 1050 if(status != expectedStatus){ 1051 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1052 return FALSE; 1053 } 1054 1055 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1056 sourceLen, targ-buffer); 1057 1058 if(expectLen != targ-buffer) 1059 { 1060 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1061 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1062 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1063 printSeqErr((const unsigned char*)expect, expectLen); 1064 return FALSE; 1065 } 1066 1067 if(memcmp(buffer, expect, expectLen)){ 1068 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1069 log_info("\nGot:"); 1070 printSeqErr((const unsigned char *)buffer, expectLen); 1071 log_info("\nExpected:"); 1072 printSeqErr((const unsigned char *)expect, expectLen); 1073 return FALSE; 1074 } 1075 else { 1076 log_verbose("Matches!\n"); 1077 } 1078 1079 if (expectOffsets != 0){ 1080 log_verbose("comparing %d offsets..\n", targ-buffer); 1081 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1082 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1083 log_info("\nGot : "); 1084 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1085 for(p=buffer;p<targ;p++) 1086 log_info("%d, ", offsetBuffer[p-buffer]); 1087 log_info("\nExpected: "); 1088 for(i=0; i< (targ-buffer); i++) 1089 log_info("%d,", expectOffsets[i]); 1090 } 1091 } 1092 1093 return TRUE; 1094} 1095 1096 1097static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1098 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1099{ 1100 UErrorCode status = U_ZERO_ERROR; 1101 UConverter *conv = 0; 1102 int32_t i=0; 1103 UChar *p=0; 1104 const char* src; 1105 UChar buffer[MAX_LENGTH]; 1106 int32_t offsetBuffer[MAX_LENGTH]; 1107 int32_t *offs=0; 1108 UChar *targ; 1109 UChar *targetLimit; 1110 uint8_t *sourceLimit=0; 1111 1112 1113 1114 conv = ucnv_open(codepage, &status); 1115 if(U_FAILURE(status)) 1116 { 1117 log_data_err("Couldn't open converter %s\n",codepage); 1118 return TRUE; 1119 } 1120 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1121 1122 1123 1124 for(i=0; i<MAX_LENGTH; i++){ 1125 buffer[i]=0xFFFE; 1126 offsetBuffer[i]=-1; 1127 } 1128 1129 src=(const char *)source; 1130 sourceLimit=(uint8_t*)(src+(sourceLen)); 1131 targ=buffer; 1132 targetLimit=targ+MAX_LENGTH; 1133 offs=offsetBuffer; 1134 1135 1136 1137 ucnv_toUnicode (conv, 1138 &targ, 1139 targetLimit, 1140 (const char **)&src, 1141 (const char *)sourceLimit, 1142 expectOffsets ? offs : NULL, 1143 doFlush, 1144 &status); 1145 1146 ucnv_close(conv); 1147 if(status != expectedStatus){ 1148 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1149 return FALSE; 1150 } 1151 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1152 sourceLen, targ-buffer); 1153 1154 1155 1156 1157 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1158 1159 if (expectOffsets != 0) { 1160 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1161 1162 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1163 log_info("\nGot : "); 1164 for(p=buffer;p<targ;p++) 1165 log_info("%d, ", offsetBuffer[p-buffer]); 1166 log_info("\nExpected: "); 1167 for(i=0; i<(targ-buffer); i++) 1168 log_info("%d, ", expectOffsets[i]); 1169 log_info("\nGot result:"); 1170 for(i=0; i<(targ-buffer); i++) 1171 log_info("0x%04X,", buffer[i]); 1172 log_info("\nFrom Input:"); 1173 for(i=0; i<(src-(const char *)source); i++) 1174 log_info("0x%02X,", (unsigned char)source[i]); 1175 log_info("\n"); 1176 } 1177 } 1178 if(memcmp(buffer, expect, expectLen*2)){ 1179 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1180 log_info("\nGot:"); 1181 printUSeqErr(buffer, expectLen); 1182 log_info("\nExpected:"); 1183 printUSeqErr(expect, expectLen); 1184 return FALSE; 1185 } 1186 else { 1187 log_verbose("Matches!\n"); 1188 } 1189 1190 return TRUE; 1191} 1192 1193 1194static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1195 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1196{ 1197 UErrorCode status = U_ZERO_ERROR; 1198 UConverter *conv = 0; 1199 char junkout[MAX_LENGTH]; /* FIX */ 1200 int32_t junokout[MAX_LENGTH]; /* FIX */ 1201 char *p; 1202 const UChar *src; 1203 char *end; 1204 char *targ; 1205 int32_t *offs; 1206 int i; 1207 int32_t realBufferSize; 1208 char *realBufferEnd; 1209 const UChar *realSourceEnd; 1210 const UChar *sourceLimit; 1211 UBool checkOffsets = TRUE; 1212 UBool doFlush; 1213 1214 UConverterFromUCallback oldAction = NULL; 1215 const void* oldContext = NULL; 1216 1217 for(i=0;i<MAX_LENGTH;i++) 1218 junkout[i] = (char)0xF0; 1219 for(i=0;i<MAX_LENGTH;i++) 1220 junokout[i] = 0xFF; 1221 1222 setNuConvTestName(codepage, "FROM"); 1223 1224 log_verbose("\n========= %s\n", gNuConvTestName); 1225 1226 conv = ucnv_open(codepage, &status); 1227 if(U_FAILURE(status)) 1228 { 1229 log_data_err("Couldn't open converter %s\n",codepage); 1230 return TRUE; 1231 } 1232 1233 log_verbose("Converter opened..\n"); 1234 /*----setting the callback routine----*/ 1235 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1236 if (U_FAILURE(status)) { 1237 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1238 } 1239 /*------------------------*/ 1240 1241 src = source; 1242 targ = junkout; 1243 offs = junokout; 1244 1245 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1246 realBufferEnd = junkout + realBufferSize; 1247 realSourceEnd = source + sourceLen; 1248 1249 if ( gOutBufferSize != realBufferSize ) 1250 checkOffsets = FALSE; 1251 1252 if( gInBufferSize != MAX_LENGTH ) 1253 checkOffsets = FALSE; 1254 1255 do 1256 { 1257 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1258 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1259 1260 doFlush = (UBool)(sourceLimit == realSourceEnd); 1261 1262 if(targ == realBufferEnd) 1263 { 1264 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1265 return FALSE; 1266 } 1267 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1268 1269 1270 status = U_ZERO_ERROR; 1271 if(gInBufferSize ==999 && gOutBufferSize==999) 1272 doFlush = FALSE; 1273 ucnv_fromUnicode (conv, 1274 (char **)&targ, 1275 (const char *)end, 1276 &src, 1277 sourceLimit, 1278 offs, 1279 doFlush, /* flush if we're at the end of the input data */ 1280 &status); 1281 if(testReset) 1282 ucnv_resetToUnicode(conv); 1283 if(gInBufferSize ==999 && gOutBufferSize==999) 1284 ucnv_resetToUnicode(conv); 1285 1286 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1287 1288 if(U_FAILURE(status)) { 1289 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1290 return FALSE; 1291 } 1292 1293 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1294 sourceLen, targ-junkout); 1295 if(getTestOption(VERBOSITY_OPTION)) 1296 { 1297 char junk[999]; 1298 char offset_str[999]; 1299 char *ptr; 1300 1301 junk[0] = 0; 1302 offset_str[0] = 0; 1303 for(ptr = junkout;ptr<targ;ptr++) 1304 { 1305 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1306 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1307 } 1308 1309 log_verbose(junk); 1310 printSeq((const unsigned char *)expect, expectLen); 1311 if ( checkOffsets ) 1312 { 1313 log_verbose("\nOffsets:"); 1314 log_verbose(offset_str); 1315 } 1316 log_verbose("\n"); 1317 } 1318 ucnv_close(conv); 1319 1320 1321 if(expectLen != targ-junkout) 1322 { 1323 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1324 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1325 log_info("\nGot:"); 1326 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1327 log_info("\nExpected:"); 1328 printSeqErr((const unsigned char*)expect, expectLen); 1329 return FALSE; 1330 } 1331 1332 if (checkOffsets && (expectOffsets != 0) ) 1333 { 1334 log_verbose("comparing %d offsets..\n", targ-junkout); 1335 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1336 log_err("did not get the expected offsets. %s", gNuConvTestName); 1337 log_err("Got : "); 1338 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1339 for(p=junkout;p<targ;p++) 1340 log_err("%d, ", junokout[p-junkout]); 1341 log_err("\nExpected: "); 1342 for(i=0; i<(targ-junkout); i++) 1343 log_err("%d,", expectOffsets[i]); 1344 } 1345 } 1346 1347 log_verbose("comparing..\n"); 1348 if(!memcmp(junkout, expect, expectLen)) 1349 { 1350 log_verbose("Matches!\n"); 1351 return TRUE; 1352 } 1353 else 1354 { 1355 log_err("String does not match. %s\n", gNuConvTestName); 1356 printUSeqErr(source, sourceLen); 1357 log_info("\nGot:"); 1358 printSeqErr((const unsigned char *)junkout, expectLen); 1359 log_info("\nExpected:"); 1360 printSeqErr((const unsigned char *)expect, expectLen); 1361 1362 return FALSE; 1363 } 1364} 1365 1366static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1367 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1368{ 1369 UErrorCode status = U_ZERO_ERROR; 1370 UConverter *conv = 0; 1371 UChar junkout[MAX_LENGTH]; /* FIX */ 1372 int32_t junokout[MAX_LENGTH]; /* FIX */ 1373 const char *src; 1374 const char *realSourceEnd; 1375 const char *srcLimit; 1376 UChar *p; 1377 UChar *targ; 1378 UChar *end; 1379 int32_t *offs; 1380 int i; 1381 UBool checkOffsets = TRUE; 1382 int32_t realBufferSize; 1383 UChar *realBufferEnd; 1384 UBool doFlush; 1385 1386 UConverterToUCallback oldAction = NULL; 1387 const void* oldContext = NULL; 1388 1389 1390 for(i=0;i<MAX_LENGTH;i++) 1391 junkout[i] = 0xFFFE; 1392 1393 for(i=0;i<MAX_LENGTH;i++) 1394 junokout[i] = -1; 1395 1396 setNuConvTestName(codepage, "TO"); 1397 1398 log_verbose("\n========= %s\n", gNuConvTestName); 1399 1400 conv = ucnv_open(codepage, &status); 1401 if(U_FAILURE(status)) 1402 { 1403 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1404 return TRUE; 1405 } 1406 1407 log_verbose("Converter opened..\n"); 1408 /*----setting the callback routine----*/ 1409 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1410 if (U_FAILURE(status)) { 1411 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1412 } 1413 /*-------------------------------------*/ 1414 1415 src = (const char *)source; 1416 targ = junkout; 1417 offs = junokout; 1418 1419 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1420 realBufferEnd = junkout + realBufferSize; 1421 realSourceEnd = src + sourcelen; 1422 1423 if ( gOutBufferSize != realBufferSize ) 1424 checkOffsets = FALSE; 1425 1426 if( gInBufferSize != MAX_LENGTH ) 1427 checkOffsets = FALSE; 1428 1429 do 1430 { 1431 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1432 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1433 1434 if(targ == realBufferEnd) 1435 { 1436 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1437 return FALSE; 1438 } 1439 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1440 1441 /* oldTarg = targ; */ 1442 1443 status = U_ZERO_ERROR; 1444 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1445 1446 ucnv_toUnicode (conv, 1447 &targ, 1448 end, 1449 (const char **)&src, 1450 (const char *)srcLimit, 1451 offs, 1452 doFlush, /* flush if we're at the end of hte source data */ 1453 &status); 1454 if(testReset) 1455 ucnv_resetFromUnicode(conv); 1456 if(gInBufferSize ==999 && gOutBufferSize==999) 1457 ucnv_resetToUnicode(conv); 1458 /* offs += (targ-oldTarg); */ 1459 1460 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1461 1462 if(U_FAILURE(status)) 1463 { 1464 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1465 return FALSE; 1466 } 1467 1468 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1469 sourcelen, targ-junkout); 1470 if(getTestOption(VERBOSITY_OPTION)) 1471 { 1472 char junk[999]; 1473 char offset_str[999]; 1474 1475 UChar *ptr; 1476 1477 junk[0] = 0; 1478 offset_str[0] = 0; 1479 1480 for(ptr = junkout;ptr<targ;ptr++) 1481 { 1482 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1483 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1484 } 1485 1486 log_verbose(junk); 1487 1488 if ( checkOffsets ) 1489 { 1490 log_verbose("\nOffsets:"); 1491 log_verbose(offset_str); 1492 } 1493 log_verbose("\n"); 1494 } 1495 ucnv_close(conv); 1496 1497 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1498 1499 if (checkOffsets && (expectOffsets != 0)) 1500 { 1501 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1502 1503 log_err("did not get the expected offsets. %s",gNuConvTestName); 1504 for(p=junkout;p<targ;p++) 1505 log_err("%d, ", junokout[p-junkout]); 1506 log_err("\nExpected: "); 1507 for(i=0; i<(targ-junkout); i++) 1508 log_err("%d,", expectOffsets[i]); 1509 log_err(""); 1510 for(i=0; i<(targ-junkout); i++) 1511 log_err("%X,", junkout[i]); 1512 log_err(""); 1513 for(i=0; i<(src-(const char *)source); i++) 1514 log_err("%X,", (unsigned char)source[i]); 1515 } 1516 } 1517 1518 if(!memcmp(junkout, expect, expectlen*2)) 1519 { 1520 log_verbose("Matches!\n"); 1521 return TRUE; 1522 } 1523 else 1524 { 1525 log_err("String does not match. %s\n", gNuConvTestName); 1526 log_verbose("String does not match. %s\n", gNuConvTestName); 1527 log_info("\nGot:"); 1528 printUSeq(junkout, expectlen); 1529 log_info("\nExpected:"); 1530 printUSeq(expect, expectlen); 1531 return FALSE; 1532 } 1533} 1534 1535 1536static void TestResetBehaviour(void){ 1537#if !UCONFIG_NO_LEGACY_CONVERSION 1538 log_verbose("Testing Reset for DBCS and MBCS\n"); 1539 { 1540 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1541 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1542 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1543 1544 1545 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1546 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1547 static const int32_t offsets1[] = { 0,2,4,6}; 1548 1549 /*DBCS*/ 1550 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1551 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1552 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1553 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1554 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1555 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1556 1557 if(!testConvertToU(expected1, sizeof(expected1), 1558 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1559 offsets1, TRUE)) 1560 log_err("ibm-1363 -> did not match.\n"); 1561 /*MBCS*/ 1562 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1563 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1564 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1565 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1566 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1567 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1568 1569 if(!testConvertToU(expected1, sizeof(expected1), 1570 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1571 offsets1, TRUE)) 1572 log_err("ibm-1363 -> did not match.\n"); 1573 1574 } 1575 1576 /* BEGIN android-removed */ 1577 /* To save space, Android does not build full ISO2022 CJK tables. 1578 We skip the tests for ISO-2022. */ 1579 /* 1580 log_verbose("Testing Reset for ISO-2022-jp\n"); 1581 { 1582 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1583 1584 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1585 0x31,0x1A, 0x32}; 1586 1587 1588 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1589 1590 1591 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1592 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1593 0x31,0x1A, 0x32}; 1594 static const int32_t offsets1[] = { 3,5,10,11,12}; 1595 1596 // iso-2022-jp 1597 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1598 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1599 log_err("u-> not match.\n"); 1600 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1601 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1602 log_err("u-> not match.\n"); 1603 1604 if(!testConvertToU(expected1, sizeof(expected1), 1605 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1606 offsets1, TRUE)) 1607 log_err("iso-2022-jp -> did not match.\n"); 1608 1609 } 1610 1611 log_verbose("Testing Reset for ISO-2022-cn\n"); 1612 { 1613 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1614 1615 static const uint8_t expected[] = { 1616 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1617 0x36, 0x21, 1618 0x0f, 0x31, 1619 0x1A, 1620 0x32 1621 }; 1622 1623 1624 static const int32_t offsets[] = { 1625 0, 0, 0, 0, 0, 0, 0, 1626 1, 1, 1627 2, 2, 1628 3, 1629 5, }; 1630 1631 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1632 static const uint8_t expected1[] = { 1633 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1634 0x36, 0x21, 1635 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1636 0x0f, 0x1A, 1637 0x32 1638 }; 1639 static const int32_t offsets1[] = { 5,7,13,16,17}; 1640 1641 // iso-2022-CN 1642 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1643 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1644 log_err("u-> not match.\n"); 1645 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1646 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1647 log_err("u-> not match.\n"); 1648 1649 if(!testConvertToU(expected1, sizeof(expected1), 1650 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1651 offsets1, TRUE)) 1652 log_err("iso-2022-cn -> did not match.\n"); 1653 } 1654 1655 log_verbose("Testing Reset for ISO-2022-kr\n"); 1656 { 1657 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1658 1659 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1660 0x0E, 0x6C, 0x69, 1661 0x0f, 0x1A, 1662 0x0e, 0x6F, 0x4B, 1663 0x0F, 0x31, 1664 0x1A, 1665 0x32 }; 1666 1667 static const int32_t offsets[] = {-1, -1, -1, -1, 1668 0, 0, 0, 1669 1, 1, 1670 3, 3, 3, 1671 4, 4, 1672 5, 1673 7, 1674 }; 1675 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1676 1677 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1678 0x0E, 0x6C, 0x69, 1679 0x0f, 0x41, 1680 0x0e, 0x6F, 0x4B, 1681 0x0F, 0x31, 1682 0x42, 1683 0x32 }; 1684 1685 static const int32_t offsets1[] = { 1686 5, 8, 10, 1687 13, 14, 15 1688 1689 }; 1690 // iso-2022-kr 1691 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1692 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1693 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1694 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1695 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1696 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1697 if(!testConvertToU(expected1, sizeof(expected1), 1698 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1699 offsets1, TRUE)) 1700 log_err("iso-2022-kr -> did not match.\n"); 1701 } 1702 */ 1703 /* END android-removed */ 1704 1705 1706 log_verbose("Testing Reset for HZ\n"); 1707 { 1708 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1709 1710 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1711 0x7E, 0x7D, 0x1A, 1712 0x7E, 0x7B, 0x36, 0x21, 1713 0x7E, 0x7D, 0x31, 1714 0x1A, 1715 0x32 }; 1716 1717 1718 static const int32_t offsets[] = {0,0,0,0, 1719 1,1,1, 1720 3,3,3,3, 1721 4,4,4, 1722 5, 1723 7,}; 1724 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1725 1726 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1727 0x7E, 0x7D, 0x35, 1728 0x7E, 0x7B, 0x36, 0x21, 1729 0x7E, 0x7D, 0x31, 1730 0x41, 1731 0x32 }; 1732 1733 1734 static const int32_t offsets1[] = {2,6,9,13,14,15 1735 }; 1736 1737 /*hz*/ 1738 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1739 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1740 log_err("u-> not match.\n"); 1741 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1742 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1743 log_err("u-> not match.\n"); 1744 if(!testConvertToU(expected1, sizeof(expected1), 1745 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1746 offsets1, TRUE)) 1747 log_err("hz -> did not match.\n"); 1748 } 1749#endif 1750 1751 /*UTF-8*/ 1752 log_verbose("Testing for UTF8\n"); 1753 { 1754 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1755 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1756 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1757 0x04, 0x06 }; 1758 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1759 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1760 1761 1762 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1763 /*UTF-8*/ 1764 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1765 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1766 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1767 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1768 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1769 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1770 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1771 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1772 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1773 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1774 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1775 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1776 if(!testConvertToU(expected, sizeof(expected), 1777 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1778 log_err("UTF8 -> did not match.\n"); 1779 if(!testConvertToU(expected, sizeof(expected), 1780 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1781 log_err("UTF8 -> did not match.\n"); 1782 if(!testConvertToU(expected, sizeof(expected), 1783 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1784 log_err("UTF8 -> did not match.\n"); 1785 if(!testConvertToU(expected, sizeof(expected), 1786 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1787 log_err("UTF8 -> did not match.\n"); 1788 1789 } 1790 1791} 1792 1793/* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1794static void 1795doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1796 UConverter *cnv; 1797 1798 UChar buffer[2]; 1799 UChar *target, *targetLimit; 1800 const char *source, *sourceLimit; 1801 1802 UErrorCode errorCode; 1803 1804 errorCode=U_ZERO_ERROR; 1805 cnv=ucnv_open(cnvName, &errorCode); 1806 if(U_FAILURE(errorCode)) { 1807 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1808 return; 1809 } 1810 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1811 if(U_FAILURE(errorCode)) { 1812 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1813 cnvName, u_errorName(errorCode)); 1814 ucnv_close(cnv); 1815 return; 1816 } 1817 1818 source=(const char *)bytes; 1819 sourceLimit=source+length; 1820 target=buffer; 1821 targetLimit=buffer+LENGTHOF(buffer); 1822 1823 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1824 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1825 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1826 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1827 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1828 } 1829 1830 errorCode=U_ZERO_ERROR; 1831 source=sourceLimit; 1832 target=buffer; 1833 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1834 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1835 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1836 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1837 } 1838 1839 /* 2. input bytes with flush=TRUE */ 1840 ucnv_resetToUnicode(cnv); 1841 1842 errorCode=U_ZERO_ERROR; 1843 source=(const char *)bytes; 1844 target=buffer; 1845 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1846 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1847 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1848 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1849 } 1850 1851 1852 ucnv_close(cnv); 1853} 1854 1855static void 1856TestTruncated() { 1857 static const struct { 1858 const char *cnvName; 1859 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1860 int32_t length; 1861 } testCases[]={ 1862 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1863 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1864 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1865 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1866 1867 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1868 { "UTF-8", { 0xd1 }, 1 }, 1869 1870 { "UTF-16BE", { 0x4e }, 1 }, 1871 { "UTF-16LE", { 0x4e }, 1 }, 1872 { "UTF-16", { 0x4e }, 1 }, 1873 { "UTF-16", { 0xff }, 1 }, 1874 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1875 1876 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1877 { "UTF-32LE", { 0x4e }, 1 }, 1878 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1879 { "UTF-32", { 0xff }, 1 }, 1880 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1881 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1882 1883#if !UCONFIG_NO_LEGACY_CONVERSION 1884 { "BOCU-1", { 0xd5 }, 1 }, 1885 1886 { "Shift-JIS", { 0xe0 }, 1 }, 1887 1888 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1889#else 1890 { "BOCU-1", { 0xd5 }, 1 ,} 1891#endif 1892 }; 1893 int32_t i; 1894 1895 for(i=0; i<LENGTHOF(testCases); ++i) { 1896 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1897 } 1898} 1899 1900typedef struct NameRange { 1901 const char *name; 1902 UChar32 start, end, start2, end2, notStart, notEnd; 1903} NameRange; 1904 1905static void 1906TestUnicodeSet() { 1907 UErrorCode errorCode; 1908 UConverter *cnv; 1909 USet *set; 1910 const char *name; 1911 int32_t i, count; 1912 1913 static const char *const completeSetNames[]={ 1914 "UTF-7", 1915 "UTF-8", 1916 "UTF-16", 1917 "UTF-16BE", 1918 "UTF-16LE", 1919 "UTF-32", 1920 "UTF-32BE", 1921 "UTF-32LE", 1922 "SCSU", 1923 "BOCU-1", 1924 "CESU-8", 1925#if !UCONFIG_NO_LEGACY_CONVERSION 1926 "gb18030", 1927#endif 1928 "IMAP-mailbox-name" 1929 }; 1930#if !UCONFIG_NO_LEGACY_CONVERSION 1931 static const char *const lmbcsNames[]={ 1932 "LMBCS-1", 1933 "LMBCS-2", 1934 "LMBCS-3", 1935 "LMBCS-4", 1936 "LMBCS-5", 1937 "LMBCS-6", 1938 "LMBCS-8", 1939 "LMBCS-11", 1940 "LMBCS-16", 1941 "LMBCS-17", 1942 "LMBCS-18", 1943 "LMBCS-19" 1944 }; 1945#endif 1946 1947 static const NameRange nameRanges[]={ 1948 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1949#if !UCONFIG_NO_LEGACY_CONVERSION 1950 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1951#endif 1952 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1953#if !UCONFIG_NO_LEGACY_CONVERSION 1954 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1955 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1956 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1957 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1958#else 1959 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1960#endif 1961 }; 1962 1963 /* open an empty set */ 1964 set=uset_open(1, 0); 1965 1966 count=ucnv_countAvailable(); 1967 for(i=0; i<count; ++i) { 1968 errorCode=U_ZERO_ERROR; 1969 name=ucnv_getAvailableName(i); 1970 cnv=ucnv_open(name, &errorCode); 1971 if(U_FAILURE(errorCode)) { 1972 log_data_err("error: unable to open converter %s - %s\n", 1973 name, u_errorName(errorCode)); 1974 continue; 1975 } 1976 1977 uset_clear(set); 1978 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1979 if(U_FAILURE(errorCode)) { 1980 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1981 name, u_errorName(errorCode)); 1982 } else if(uset_size(set)==0) { 1983 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1984 } 1985 1986 ucnv_close(cnv); 1987 } 1988 1989 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1990 for(i=0; i<LENGTHOF(completeSetNames); ++i) { 1991 errorCode=U_ZERO_ERROR; 1992 name=completeSetNames[i]; 1993 cnv=ucnv_open(name, &errorCode); 1994 if(U_FAILURE(errorCode)) { 1995 log_data_err("error: unable to open converter %s - %s\n", 1996 name, u_errorName(errorCode)); 1997 continue; 1998 } 1999 2000 uset_clear(set); 2001 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2002 if(U_FAILURE(errorCode)) { 2003 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2004 name, u_errorName(errorCode)); 2005 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 2006 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 2007 } 2008 2009 ucnv_close(cnv); 2010 } 2011 2012#if !UCONFIG_NO_LEGACY_CONVERSION 2013 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 2014 for(i=0; i<LENGTHOF(lmbcsNames); ++i) { 2015 errorCode=U_ZERO_ERROR; 2016 name=lmbcsNames[i]; 2017 cnv=ucnv_open(name, &errorCode); 2018 if(U_FAILURE(errorCode)) { 2019 log_data_err("error: unable to open converter %s - %s\n", 2020 name, u_errorName(errorCode)); 2021 continue; 2022 } 2023 2024 uset_clear(set); 2025 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2026 if(U_FAILURE(errorCode)) { 2027 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2028 name, u_errorName(errorCode)); 2029 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2030 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2031 } 2032 2033 ucnv_close(cnv); 2034 } 2035#endif 2036 2037 /* test specific sets */ 2038 for(i=0; i<LENGTHOF(nameRanges); ++i) { 2039 errorCode=U_ZERO_ERROR; 2040 name=nameRanges[i].name; 2041 cnv=ucnv_open(name, &errorCode); 2042 if(U_FAILURE(errorCode)) { 2043 log_data_err("error: unable to open converter %s - %s\n", 2044 name, u_errorName(errorCode)); 2045 continue; 2046 } 2047 2048 uset_clear(set); 2049 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2050 if(U_FAILURE(errorCode)) { 2051 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2052 name, u_errorName(errorCode)); 2053 } else if( 2054 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2055 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2056 ) { 2057 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2058 } else if(nameRanges[i].notStart>=0) { 2059 /* simulate containsAny() with the C API */ 2060 uset_complement(set); 2061 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2062 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2063 } 2064 } 2065 2066 ucnv_close(cnv); 2067 } 2068 2069 errorCode = U_ZERO_ERROR; 2070 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2071 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2072 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2073 } 2074 errorCode = U_PARSE_ERROR; 2075 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2076 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2077 if (errorCode != U_PARSE_ERROR) { 2078 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2079 } 2080 2081 uset_close(set); 2082} 2083