1/* GLIB - Library of useful routines for C programming 2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the 16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 * Boston, MA 02111-1307, USA. 18 */ 19 20/* 21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS 22 * file for a list of people on the GLib Team. See the ChangeLog 23 * files for a list of changes. These files are distributed with 24 * GLib at ftp://ftp.gtk.org/pub/gtk/. 25 */ 26 27#undef G_DISABLE_ASSERT 28#undef G_LOG_DOMAIN 29 30#include <string.h> 31 32#include <glib.h> 33 34/* Bug 311337 */ 35static void 36test_iconv_state (void) 37{ 38 gchar *in = "\xf4\xe5\xf8\xe5\xed"; 39 gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d"; 40 gchar *out; 41 gsize bytes_read = 0; 42 gsize bytes_written = 0; 43 GError *error = NULL; 44 45 out = g_convert (in, -1, "UTF-8", "CP1255", 46 &bytes_read, &bytes_written, &error); 47 48 if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION) 49 return; /* silently skip if CP1255 is not supported, see bug 467707 */ 50 51 g_assert_no_error (error); 52 g_assert (bytes_read == 5); 53 g_assert (bytes_written == 10); 54 g_assert (strcmp (out, expected) == 0); 55 g_free (out); 56} 57 58/* some tests involving "vulgar fraction one half" */ 59static void 60test_one_half (void) 61{ 62 gchar *in = "\xc2\xbd"; 63 gchar *out; 64 gsize bytes_read = 0; 65 gsize bytes_written = 0; 66 GError *error = NULL; 67 68 out = g_convert (in, -1, 69 "ISO-8859-1", "UTF-8", 70 &bytes_read, &bytes_written, 71 &error); 72 73 g_assert_no_error (error); 74 g_assert (bytes_read == 2); 75 g_assert (bytes_written == 1); 76 g_assert (strcmp (out, "\xbd") == 0); 77 g_free (out); 78 79 out = g_convert (in, -1, 80 "ISO-8859-15", "UTF-8", 81 &bytes_read, &bytes_written, 82 &error); 83 84 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); 85 g_assert (bytes_read == 0); 86 g_assert (bytes_written == 0); 87 g_assert (out == NULL); 88 g_clear_error (&error); 89 g_free (out); 90 91 out = g_convert_with_fallback (in, -1, 92 "ISO8859-15", "UTF-8", 93 "a", 94 &bytes_read, &bytes_written, 95 &error); 96 97 g_assert_no_error (error); 98 g_assert (bytes_read == 2); 99 g_assert (bytes_written == 1); 100 g_assert (strcmp (out, "a") == 0); 101 g_free (out); 102} 103 104static void 105test_byte_order (void) 106{ 107 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */ 108 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03}; 109 gchar *expected = "\xce\x93"; 110 gchar *out; 111 gsize bytes_read = 0; 112 gsize bytes_written = 0; 113 GError *error = NULL; 114 115 out = g_convert (in_be, sizeof (in_be), 116 "UTF-8", "UTF-16", 117 &bytes_read, &bytes_written, 118 &error); 119 120 g_assert_no_error (error); 121 g_assert (bytes_read == 4); 122 g_assert (bytes_written == 2); 123 g_assert (strcmp (out, expected) == 0); 124 g_free (out); 125 126 out = g_convert (in_le, sizeof (in_le), 127 "UTF-8", "UTF-16", 128 &bytes_read, &bytes_written, 129 &error); 130 131 g_assert_no_error (error); 132 g_assert (bytes_read == 4); 133 g_assert (bytes_written == 2); 134 g_assert (strcmp (out, expected) == 0); 135 g_free (out); 136} 137 138static void 139check_utf8_to_ucs4 (const char *utf8, 140 glong utf8_len, 141 const gunichar *ucs4, 142 glong ucs4_len, 143 glong error_pos) 144{ 145 gunichar *result, *result2, *result3; 146 glong items_read, items_read2; 147 glong items_written, items_written2; 148 GError *error, *error2, *error3; 149 gint i; 150 151 if (!error_pos) 152 { 153 /* check the fast conversion */ 154 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written); 155 156 g_assert (items_written == ucs4_len); 157 g_assert (result); 158 for (i = 0; i <= items_written; i++) 159 g_assert (result[i] == ucs4[i]); 160 161 g_free (result); 162 } 163 164 error = NULL; 165 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error); 166 167 if (utf8_len == strlen (utf8)) 168 { 169 /* check that len == -1 yields identical results */ 170 error2 = NULL; 171 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2); 172 g_assert (error || items_read2 == items_read); 173 g_assert (error || items_written2 == items_written2); 174 g_assert (!!result == !!result2); 175 g_assert (!!error == !!error2); 176 if (result) 177 for (i = 0; i <= items_written; i++) 178 g_assert (result[i] == result2[i]); 179 180 g_free (result2); 181 if (error2) 182 g_error_free (error2); 183 } 184 185 error3 = NULL; 186 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3); 187 188 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) 189 { 190 g_assert_no_error (error); 191 g_assert (items_read == error_pos); 192 g_assert (items_written == ucs4_len); 193 g_assert (result); 194 for (i = 0; i <= items_written; i++) 195 g_assert (result[i] == ucs4[i]); 196 } 197 else if (error_pos) 198 { 199 g_assert (error != NULL); 200 g_assert (result == NULL); 201 g_assert (items_read == error_pos); 202 g_error_free (error); 203 204 g_assert (error3 != NULL); 205 g_assert (result3 == NULL); 206 g_error_free (error3); 207 } 208 else 209 { 210 g_assert_no_error (error); 211 g_assert (items_read == utf8_len); 212 g_assert (items_written == ucs4_len); 213 g_assert (result); 214 for (i = 0; i <= items_written; i++) 215 g_assert (result[i] == ucs4[i]); 216 217 g_assert_no_error (error3); 218 g_assert (result3); 219 for (i = 0; i <= ucs4_len; i++) 220 g_assert (result3[i] == ucs4[i]); 221 } 222 223 g_free (result); 224 g_free (result3); 225} 226 227static void 228check_ucs4_to_utf8 (const gunichar *ucs4, 229 glong ucs4_len, 230 const char *utf8, 231 glong utf8_len, 232 glong error_pos) 233{ 234 gchar *result, *result2, *result3; 235 glong items_read, items_read2; 236 glong items_written, items_written2; 237 GError *error, *error2, *error3; 238 239 error = NULL; 240 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error); 241 242 if (ucs4[ucs4_len] == 0) 243 { 244 /* check that len == -1 yields identical results */ 245 error2 = NULL; 246 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2); 247 248 g_assert (error || items_read2 == items_read); 249 g_assert (error || items_written2 == items_written); 250 g_assert (!!result == !!result2); 251 g_assert (!!error == !!error2); 252 if (result) 253 g_assert (strcmp (result, result2) == 0); 254 255 g_free (result2); 256 if (error2) 257 g_error_free (error2); 258 } 259 260 error3 = NULL; 261 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3); 262 263 if (error_pos) 264 { 265 g_assert (error != NULL); 266 g_assert (result == NULL); 267 g_assert (items_read == error_pos); 268 g_error_free (error); 269 270 g_assert (error3 != NULL); 271 g_assert (result3 == NULL); 272 g_error_free (error3); 273 } 274 else 275 { 276 g_assert_no_error (error); 277 g_assert (items_read == ucs4_len); 278 g_assert (items_written == utf8_len); 279 g_assert (result); 280 g_assert (strcmp (result, utf8) == 0); 281 282 g_assert_no_error (error3); 283 g_assert (result3); 284 g_assert (strcmp (result3, utf8) == 0); 285 } 286 287 g_free (result); 288 g_free (result3); 289} 290 291static void 292check_utf8_to_utf16 (const char *utf8, 293 glong utf8_len, 294 const gunichar2 *utf16, 295 glong utf16_len, 296 glong error_pos) 297{ 298 gunichar2 *result, *result2, *result3; 299 glong items_read, items_read2; 300 glong items_written, items_written2; 301 GError *error, *error2, *error3; 302 gint i; 303 304 error = NULL; 305 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error); 306 307 if (utf8_len == strlen (utf8)) 308 { 309 /* check that len == -1 yields identical results */ 310 error2 = NULL; 311 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2); 312 g_assert (error || items_read2 == items_read); 313 g_assert (error || items_written2 == items_written2); 314 g_assert (!!result == !!result2); 315 g_assert (!!error == !!error2); 316 if (result) 317 for (i = 0; i <= items_written; i++) 318 g_assert (result[i] == result2[i]); 319 320 g_free (result2); 321 if (error2) 322 g_error_free (error2); 323 } 324 325 error3 = NULL; 326 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3); 327 328 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) 329 { 330 g_assert_no_error (error); 331 g_assert (items_read == error_pos); 332 g_assert (items_written == utf16_len); 333 g_assert (result); 334 for (i = 0; i <= items_written; i++) 335 g_assert (result[i] == utf16[i]); 336 } 337 else if (error_pos) 338 { 339 g_assert (error != NULL); 340 g_assert (result == NULL); 341 g_assert (items_read == error_pos); 342 g_error_free (error); 343 344 g_assert (error3 != NULL); 345 g_assert (result3 == NULL); 346 g_error_free (error3); 347 } 348 else 349 { 350 g_assert_no_error (error); 351 g_assert (items_read == utf8_len); 352 g_assert (items_written == utf16_len); 353 g_assert (result); 354 for (i = 0; i <= items_written; i++) 355 g_assert (result[i] == utf16[i]); 356 357 g_assert_no_error (error3); 358 g_assert (result3); 359 for (i = 0; i <= utf16_len; i++) 360 g_assert (result3[i] == utf16[i]); 361 } 362 363 g_free (result); 364 g_free (result3); 365} 366 367static void 368check_utf16_to_utf8 (const gunichar2 *utf16, 369 glong utf16_len, 370 const char *utf8, 371 glong utf8_len, 372 glong error_pos) 373{ 374 gchar *result, *result2, *result3; 375 glong items_read, items_read2; 376 glong items_written, items_written2; 377 GError *error, *error2, *error3; 378 379 error = NULL; 380 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error); 381 if (utf16[utf16_len] == 0) 382 { 383 /* check that len == -1 yields identical results */ 384 error2 = NULL; 385 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2); 386 387 g_assert (error || items_read2 == items_read); 388 g_assert (error || items_written2 == items_written); 389 g_assert (!!result == !!result2); 390 g_assert (!!error == !!error2); 391 if (result) 392 g_assert (strcmp (result, result2) == 0); 393 394 g_free (result2); 395 if (error2) 396 g_error_free (error2); 397 } 398 399 error3 = NULL; 400 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3); 401 402 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) 403 { 404 g_assert_no_error (error); 405 g_assert (items_read == error_pos); 406 g_assert (items_read + 1 == utf16_len); 407 g_assert (items_written == utf8_len); 408 g_assert (result); 409 g_assert (strcmp (result, utf8) == 0); 410 } 411 else if (error_pos) 412 { 413 g_assert (error != NULL); 414 g_assert (result == NULL); 415 g_assert (items_read == error_pos); 416 g_error_free (error); 417 418 g_assert (error3 != NULL); 419 g_assert (result3 == NULL); 420 g_error_free (error3); 421 } 422 else 423 { 424 g_assert_no_error (error); 425 g_assert (items_read == utf16_len); 426 g_assert (items_written == utf8_len); 427 g_assert (result); 428 g_assert (strcmp (result, utf8) == 0); 429 430 g_assert_no_error (error3); 431 g_assert (result3); 432 g_assert (strcmp (result3, utf8) == 0); 433 } 434 435 g_free (result); 436 g_free (result3); 437} 438 439static void 440check_ucs4_to_utf16 (const gunichar *ucs4, 441 glong ucs4_len, 442 const gunichar2 *utf16, 443 glong utf16_len, 444 glong error_pos) 445{ 446 gunichar2 *result, *result2, *result3; 447 glong items_read, items_read2; 448 glong items_written, items_written2; 449 GError *error, *error2, *error3; 450 gint i; 451 452 error = NULL; 453 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error); 454 455 if (ucs4[ucs4_len] == 0) 456 { 457 /* check that len == -1 yields identical results */ 458 error2 = NULL; 459 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2); 460 461 g_assert (error || items_read2 == items_read); 462 g_assert (error || items_written2 == items_written); 463 g_assert (!!result == !!result2); 464 g_assert (!!error == !!error2); 465 if (result) 466 for (i = 0; i <= utf16_len; i++) 467 g_assert (result[i] == result2[i]); 468 469 g_free (result2); 470 if (error2) 471 g_error_free (error2); 472 } 473 474 error3 = NULL; 475 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3); 476 477 if (error_pos) 478 { 479 g_assert (error != NULL); 480 g_assert (result == NULL); 481 g_assert (items_read == error_pos); 482 g_error_free (error); 483 484 g_assert (error3 != NULL); 485 g_assert (result3 == NULL); 486 g_error_free (error3); 487 } 488 else 489 { 490 g_assert_no_error (error); 491 g_assert (items_read == ucs4_len); 492 g_assert (items_written == utf16_len); 493 g_assert (result); 494 for (i = 0; i <= utf16_len; i++) 495 g_assert (result[i] == utf16[i]); 496 497 g_assert_no_error (error3); 498 g_assert (result3); 499 for (i = 0; i <= utf16_len; i++) 500 g_assert (result3[i] == utf16[i]); 501 } 502 503 g_free (result); 504 g_free (result3); 505} 506 507static void 508check_utf16_to_ucs4 (const gunichar2 *utf16, 509 glong utf16_len, 510 const gunichar *ucs4, 511 glong ucs4_len, 512 glong error_pos) 513{ 514 gunichar *result, *result2, *result3; 515 glong items_read, items_read2; 516 glong items_written, items_written2; 517 GError *error, *error2, *error3; 518 gint i; 519 520 error = NULL; 521 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error); 522 if (utf16[utf16_len] == 0) 523 { 524 /* check that len == -1 yields identical results */ 525 error2 = NULL; 526 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2); 527 g_assert (error || items_read2 == items_read); 528 g_assert (error || items_written2 == items_written2); 529 g_assert (!!result == !!result2); 530 g_assert (!!error == !!error2); 531 if (result) 532 for (i = 0; i <= items_written; i++) 533 g_assert (result[i] == result2[i]); 534 535 g_free (result2); 536 if (error2) 537 g_error_free (error2); 538 } 539 540 error3 = NULL; 541 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3); 542 543 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) 544 { 545 g_assert_no_error (error); 546 g_assert (items_read == error_pos); 547 g_assert (items_read + 1 == utf16_len); 548 g_assert (items_written == ucs4_len); 549 g_assert (result); 550 for (i = 0; i <= items_written; i++) 551 g_assert (result[i] == ucs4[i]); 552 } 553 else if (error_pos) 554 { 555 g_assert (error != NULL); 556 g_assert (result == NULL); 557 g_assert (items_read == error_pos); 558 g_error_free (error); 559 560 g_assert (error3 != NULL); 561 g_assert (result3 == NULL); 562 g_error_free (error3); 563 } 564 else 565 { 566 g_assert_no_error (error); 567 g_assert (items_read == utf16_len); 568 g_assert (items_written == ucs4_len); 569 g_assert (result); 570 for (i = 0; i <= ucs4_len; i++) 571 g_assert (result[i] == ucs4[i]); 572 573 g_assert_no_error (error3); 574 g_assert (result3); 575 for (i = 0; i <= ucs4_len; i++) 576 g_assert (result3[i] == ucs4[i]); 577 } 578 579 g_free (result); 580 g_free (result3); 581} 582 583static void 584test_unicode_conversions (void) 585{ 586 char *utf8; 587 gunichar ucs4[100]; 588 gunichar2 utf16[100]; 589 590 utf8 = "abc"; 591 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0; 592 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0; 593 594 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0); 595 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0); 596 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0); 597 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0); 598 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0); 599 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0); 600 601 utf8 = "\316\261\316\262\316\263"; 602 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0; 603 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0; 604 605 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0); 606 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0); 607 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0); 608 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0); 609 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0); 610 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0); 611 612 /* partial utf8 character */ 613 utf8 = "abc\316"; 614 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0; 615 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0; 616 617 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3); 618 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3); 619 620 /* invalid utf8 */ 621 utf8 = "abc\316\316"; 622 ucs4[0] = 0; 623 utf16[0] = 0; 624 625 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3); 626 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3); 627 628 /* partial utf16 character */ 629 utf8 = "ab"; 630 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0; 631 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0; 632 633 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2); 634 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2); 635 636 /* invalid utf16 */ 637 utf8 = NULL; 638 ucs4[0] = 0; 639 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0; 640 641 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2); 642 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2); 643 644 /* invalid ucs4 */ 645 utf8 = NULL; 646 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0; 647 utf16[0] = 0; 648 649 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2); 650 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2); 651} 652 653int 654main (int argc, char *argv[]) 655{ 656 test_iconv_state (); 657 test_one_half (); 658 test_byte_order (); 659 test_unicode_conversions (); 660 661 return 0; 662} 663