1/* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
18 */
19
20/*
21 * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
22 * file for a list of people on the GLib Team.  See the ChangeLog
23 * files for a list of changes.  These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
25 */
26
27#undef G_DISABLE_ASSERT
28#undef G_LOG_DOMAIN
29
30#include <string.h>
31
32#include <glib.h>
33
34/* Bug 311337 */
35static void
36test_iconv_state (void)
37{
38  gchar *in = "\xf4\xe5\xf8\xe5\xed";
39  gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
40  gchar *out;
41  gsize bytes_read = 0;
42  gsize bytes_written = 0;
43  GError *error = NULL;
44
45  out = g_convert (in, -1, "UTF-8", "CP1255",
46		   &bytes_read, &bytes_written, &error);
47
48  if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
49    return; /* silently skip if CP1255 is not supported, see bug 467707 */
50
51  g_assert_no_error (error);
52  g_assert (bytes_read == 5);
53  g_assert (bytes_written == 10);
54  g_assert (strcmp (out, expected) == 0);
55  g_free (out);
56}
57
58/* some tests involving "vulgar fraction one half" */
59static void
60test_one_half (void)
61{
62  gchar *in = "\xc2\xbd";
63  gchar *out;
64  gsize bytes_read = 0;
65  gsize bytes_written = 0;
66  GError *error = NULL;
67
68  out = g_convert (in, -1,
69		   "ISO-8859-1", "UTF-8",
70		   &bytes_read, &bytes_written,
71		   &error);
72
73  g_assert_no_error (error);
74  g_assert (bytes_read == 2);
75  g_assert (bytes_written == 1);
76  g_assert (strcmp (out, "\xbd") == 0);
77  g_free (out);
78
79  out = g_convert (in, -1,
80		   "ISO-8859-15", "UTF-8",
81		   &bytes_read, &bytes_written,
82		   &error);
83
84  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
85  g_assert (bytes_read == 0);
86  g_assert (bytes_written == 0);
87  g_assert (out == NULL);
88  g_clear_error (&error);
89  g_free (out);
90
91  out = g_convert_with_fallback (in, -1,
92				 "ISO8859-15", "UTF-8",
93				 "a",
94				 &bytes_read, &bytes_written,
95				 &error);
96
97  g_assert_no_error (error);
98  g_assert (bytes_read == 2);
99  g_assert (bytes_written == 1);
100  g_assert (strcmp (out, "a") == 0);
101  g_free (out);
102}
103
104static void
105test_byte_order (void)
106{
107  gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
108  gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
109  gchar *expected = "\xce\x93";
110  gchar *out;
111  gsize bytes_read = 0;
112  gsize bytes_written = 0;
113  GError *error = NULL;
114
115  out = g_convert (in_be, sizeof (in_be),
116		   "UTF-8", "UTF-16",
117		   &bytes_read, &bytes_written,
118		   &error);
119
120  g_assert_no_error (error);
121  g_assert (bytes_read == 4);
122  g_assert (bytes_written == 2);
123  g_assert (strcmp (out, expected) == 0);
124  g_free (out);
125
126  out = g_convert (in_le, sizeof (in_le),
127		   "UTF-8", "UTF-16",
128		   &bytes_read, &bytes_written,
129		   &error);
130
131  g_assert_no_error (error);
132  g_assert (bytes_read == 4);
133  g_assert (bytes_written == 2);
134  g_assert (strcmp (out, expected) == 0);
135  g_free (out);
136}
137
138static void
139check_utf8_to_ucs4 (const char     *utf8,
140		    glong           utf8_len,
141		    const gunichar *ucs4,
142		    glong           ucs4_len,
143		    glong           error_pos)
144{
145  gunichar *result, *result2, *result3;
146  glong items_read, items_read2;
147  glong items_written, items_written2;
148  GError *error, *error2, *error3;
149  gint i;
150
151  if (!error_pos)
152    {
153      /* check the fast conversion */
154      result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
155
156      g_assert (items_written == ucs4_len);
157      g_assert (result);
158      for (i = 0; i <= items_written; i++)
159	g_assert (result[i] == ucs4[i]);
160
161      g_free (result);
162    }
163
164  error = NULL;
165  result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
166
167  if (utf8_len == strlen (utf8))
168    {
169      /* check that len == -1 yields identical results */
170      error2 = NULL;
171      result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
172      g_assert (error || items_read2 == items_read);
173      g_assert (error || items_written2 == items_written2);
174      g_assert (!!result == !!result2);
175      g_assert (!!error == !!error2);
176      if (result)
177	for (i = 0; i <= items_written; i++)
178	  g_assert (result[i] == result2[i]);
179
180      g_free (result2);
181      if (error2)
182	g_error_free (error2);
183    }
184
185  error3 = NULL;
186  result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
187
188  if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
189    {
190      g_assert_no_error (error);
191      g_assert (items_read == error_pos);
192      g_assert (items_written == ucs4_len);
193      g_assert (result);
194      for (i = 0; i <= items_written; i++)
195	g_assert (result[i] == ucs4[i]);
196    }
197  else if (error_pos)
198    {
199      g_assert (error != NULL);
200      g_assert (result == NULL);
201      g_assert (items_read == error_pos);
202      g_error_free (error);
203
204      g_assert (error3 != NULL);
205      g_assert (result3 == NULL);
206      g_error_free (error3);
207    }
208  else
209    {
210      g_assert_no_error (error);
211      g_assert (items_read == utf8_len);
212      g_assert (items_written == ucs4_len);
213      g_assert (result);
214      for (i = 0; i <= items_written; i++)
215	g_assert (result[i] == ucs4[i]);
216
217      g_assert_no_error (error3);
218      g_assert (result3);
219      for (i = 0; i <= ucs4_len; i++)
220	g_assert (result3[i] == ucs4[i]);
221    }
222
223  g_free (result);
224  g_free (result3);
225}
226
227static void
228check_ucs4_to_utf8 (const gunichar *ucs4,
229		    glong           ucs4_len,
230		    const char     *utf8,
231		    glong           utf8_len,
232		    glong           error_pos)
233{
234  gchar *result, *result2, *result3;
235  glong items_read, items_read2;
236  glong items_written, items_written2;
237  GError *error, *error2, *error3;
238
239  error = NULL;
240  result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
241
242  if (ucs4[ucs4_len] == 0)
243    {
244      /* check that len == -1 yields identical results */
245      error2 = NULL;
246      result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
247
248      g_assert (error || items_read2 == items_read);
249      g_assert (error || items_written2 == items_written);
250      g_assert (!!result == !!result2);
251      g_assert (!!error == !!error2);
252      if (result)
253	g_assert (strcmp (result, result2) == 0);
254
255      g_free (result2);
256      if (error2)
257	g_error_free (error2);
258    }
259
260  error3 = NULL;
261  result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
262
263  if (error_pos)
264    {
265      g_assert (error != NULL);
266      g_assert (result == NULL);
267      g_assert (items_read == error_pos);
268      g_error_free (error);
269
270      g_assert (error3 != NULL);
271      g_assert (result3 == NULL);
272      g_error_free (error3);
273    }
274  else
275    {
276      g_assert_no_error (error);
277      g_assert (items_read == ucs4_len);
278      g_assert (items_written == utf8_len);
279      g_assert (result);
280      g_assert (strcmp (result, utf8) == 0);
281
282      g_assert_no_error (error3);
283      g_assert (result3);
284      g_assert (strcmp (result3, utf8) == 0);
285    }
286
287  g_free (result);
288  g_free (result3);
289}
290
291static void
292check_utf8_to_utf16 (const char      *utf8,
293		     glong            utf8_len,
294		     const gunichar2 *utf16,
295		     glong            utf16_len,
296		     glong            error_pos)
297{
298  gunichar2 *result, *result2, *result3;
299  glong items_read, items_read2;
300  glong items_written, items_written2;
301  GError *error, *error2, *error3;
302  gint i;
303
304  error = NULL;
305  result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
306
307  if (utf8_len == strlen (utf8))
308    {
309      /* check that len == -1 yields identical results */
310      error2 = NULL;
311      result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
312      g_assert (error || items_read2 == items_read);
313      g_assert (error || items_written2 == items_written2);
314      g_assert (!!result == !!result2);
315      g_assert (!!error == !!error2);
316      if (result)
317	for (i = 0; i <= items_written; i++)
318	  g_assert (result[i] == result2[i]);
319
320      g_free (result2);
321      if (error2)
322	g_error_free (error2);
323    }
324
325  error3 = NULL;
326  result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
327
328  if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
329    {
330      g_assert_no_error (error);
331      g_assert (items_read == error_pos);
332      g_assert (items_written == utf16_len);
333      g_assert (result);
334      for (i = 0; i <= items_written; i++)
335	g_assert (result[i] == utf16[i]);
336    }
337  else if (error_pos)
338    {
339      g_assert (error != NULL);
340      g_assert (result == NULL);
341      g_assert (items_read == error_pos);
342      g_error_free (error);
343
344      g_assert (error3 != NULL);
345      g_assert (result3 == NULL);
346      g_error_free (error3);
347    }
348  else
349    {
350      g_assert_no_error (error);
351      g_assert (items_read == utf8_len);
352      g_assert (items_written == utf16_len);
353      g_assert (result);
354      for (i = 0; i <= items_written; i++)
355	g_assert (result[i] == utf16[i]);
356
357      g_assert_no_error (error3);
358      g_assert (result3);
359      for (i = 0; i <= utf16_len; i++)
360	g_assert (result3[i] == utf16[i]);
361    }
362
363  g_free (result);
364  g_free (result3);
365}
366
367static void
368check_utf16_to_utf8 (const gunichar2 *utf16,
369		     glong            utf16_len,
370		     const char      *utf8,
371		     glong            utf8_len,
372		     glong            error_pos)
373{
374  gchar *result, *result2, *result3;
375  glong items_read, items_read2;
376  glong items_written, items_written2;
377  GError *error, *error2, *error3;
378
379  error = NULL;
380  result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
381  if (utf16[utf16_len] == 0)
382    {
383      /* check that len == -1 yields identical results */
384      error2 = NULL;
385      result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
386
387      g_assert (error || items_read2 == items_read);
388      g_assert (error || items_written2 == items_written);
389      g_assert (!!result == !!result2);
390      g_assert (!!error == !!error2);
391      if (result)
392	g_assert (strcmp (result, result2) == 0);
393
394      g_free (result2);
395      if (error2)
396	g_error_free (error2);
397    }
398
399  error3 = NULL;
400  result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
401
402  if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
403    {
404      g_assert_no_error (error);
405      g_assert (items_read == error_pos);
406      g_assert (items_read + 1 == utf16_len);
407      g_assert (items_written == utf8_len);
408      g_assert (result);
409      g_assert (strcmp (result, utf8) == 0);
410    }
411  else if (error_pos)
412    {
413      g_assert (error != NULL);
414      g_assert (result == NULL);
415      g_assert (items_read == error_pos);
416      g_error_free (error);
417
418      g_assert (error3 != NULL);
419      g_assert (result3 == NULL);
420      g_error_free (error3);
421    }
422  else
423    {
424      g_assert_no_error (error);
425      g_assert (items_read == utf16_len);
426      g_assert (items_written == utf8_len);
427      g_assert (result);
428      g_assert (strcmp (result, utf8) == 0);
429
430      g_assert_no_error (error3);
431      g_assert (result3);
432      g_assert (strcmp (result3, utf8) == 0);
433    }
434
435  g_free (result);
436  g_free (result3);
437}
438
439static void
440check_ucs4_to_utf16 (const gunichar  *ucs4,
441		     glong            ucs4_len,
442		     const gunichar2 *utf16,
443		     glong            utf16_len,
444		     glong            error_pos)
445{
446  gunichar2 *result, *result2, *result3;
447  glong items_read, items_read2;
448  glong items_written, items_written2;
449  GError *error, *error2, *error3;
450  gint i;
451
452  error = NULL;
453  result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
454
455  if (ucs4[ucs4_len] == 0)
456    {
457      /* check that len == -1 yields identical results */
458      error2 = NULL;
459      result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
460
461      g_assert (error || items_read2 == items_read);
462      g_assert (error || items_written2 == items_written);
463      g_assert (!!result == !!result2);
464      g_assert (!!error == !!error2);
465      if (result)
466      for (i = 0; i <= utf16_len; i++)
467	g_assert (result[i] == result2[i]);
468
469      g_free (result2);
470      if (error2)
471	g_error_free (error2);
472    }
473
474  error3 = NULL;
475  result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
476
477  if (error_pos)
478    {
479      g_assert (error != NULL);
480      g_assert (result == NULL);
481      g_assert (items_read == error_pos);
482      g_error_free (error);
483
484      g_assert (error3 != NULL);
485      g_assert (result3 == NULL);
486      g_error_free (error3);
487    }
488  else
489    {
490      g_assert_no_error (error);
491      g_assert (items_read == ucs4_len);
492      g_assert (items_written == utf16_len);
493      g_assert (result);
494      for (i = 0; i <= utf16_len; i++)
495	g_assert (result[i] == utf16[i]);
496
497      g_assert_no_error (error3);
498      g_assert (result3);
499      for (i = 0; i <= utf16_len; i++)
500	g_assert (result3[i] == utf16[i]);
501    }
502
503  g_free (result);
504  g_free (result3);
505}
506
507static void
508check_utf16_to_ucs4 (const gunichar2 *utf16,
509		     glong            utf16_len,
510		     const gunichar  *ucs4,
511		     glong            ucs4_len,
512		     glong            error_pos)
513{
514  gunichar *result, *result2, *result3;
515  glong items_read, items_read2;
516  glong items_written, items_written2;
517  GError *error, *error2, *error3;
518  gint i;
519
520  error = NULL;
521  result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
522  if (utf16[utf16_len] == 0)
523    {
524      /* check that len == -1 yields identical results */
525      error2 = NULL;
526      result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
527      g_assert (error || items_read2 == items_read);
528      g_assert (error || items_written2 == items_written2);
529      g_assert (!!result == !!result2);
530      g_assert (!!error == !!error2);
531      if (result)
532	for (i = 0; i <= items_written; i++)
533	  g_assert (result[i] == result2[i]);
534
535      g_free (result2);
536      if (error2)
537	g_error_free (error2);
538    }
539
540  error3 = NULL;
541  result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
542
543  if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
544    {
545      g_assert_no_error (error);
546      g_assert (items_read == error_pos);
547      g_assert (items_read + 1 == utf16_len);
548      g_assert (items_written == ucs4_len);
549      g_assert (result);
550      for (i = 0; i <= items_written; i++)
551	g_assert (result[i] == ucs4[i]);
552    }
553  else if (error_pos)
554    {
555      g_assert (error != NULL);
556      g_assert (result == NULL);
557      g_assert (items_read == error_pos);
558      g_error_free (error);
559
560      g_assert (error3 != NULL);
561      g_assert (result3 == NULL);
562      g_error_free (error3);
563    }
564  else
565    {
566      g_assert_no_error (error);
567      g_assert (items_read == utf16_len);
568      g_assert (items_written == ucs4_len);
569      g_assert (result);
570      for (i = 0; i <= ucs4_len; i++)
571	g_assert (result[i] == ucs4[i]);
572
573      g_assert_no_error (error3);
574      g_assert (result3);
575      for (i = 0; i <= ucs4_len; i++)
576	g_assert (result3[i] == ucs4[i]);
577    }
578
579  g_free (result);
580  g_free (result3);
581}
582
583static void
584test_unicode_conversions (void)
585{
586  char *utf8;
587  gunichar ucs4[100];
588  gunichar2 utf16[100];
589
590  utf8 = "abc";
591  ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
592  utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
593
594  check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
595  check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
596  check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
597  check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
598  check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
599  check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
600
601  utf8 = "\316\261\316\262\316\263";
602  ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
603  utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
604
605  check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
606  check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
607  check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
608  check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
609  check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
610  check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
611
612  /* partial utf8 character */
613  utf8 = "abc\316";
614  ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
615  utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
616
617  check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
618  check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
619
620  /* invalid utf8 */
621  utf8 = "abc\316\316";
622  ucs4[0] = 0;
623  utf16[0] = 0;
624
625  check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
626  check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
627
628  /* partial utf16 character */
629  utf8 = "ab";
630  ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
631  utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
632
633  check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
634  check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
635
636  /* invalid utf16 */
637  utf8 = NULL;
638  ucs4[0] = 0;
639  utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
640
641  check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
642  check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
643
644  /* invalid ucs4 */
645  utf8 = NULL;
646  ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
647  utf16[0] = 0;
648
649  check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
650  check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
651}
652
653int
654main (int argc, char *argv[])
655{
656  test_iconv_state ();
657  test_one_half ();
658  test_byte_order ();
659  test_unicode_conversions ();
660
661  return 0;
662}
663