1/*
2 * Copyright © 2011  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#include "hb-test.h"
28
29/* Unit tests for hb-buffer.h */
30
31
32static const char utf8[10] = "ab\360\240\200\200defg";
33static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
34static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
35
36
37typedef enum {
38  BUFFER_EMPTY,
39  BUFFER_ONE_BY_ONE,
40  BUFFER_UTF32,
41  BUFFER_UTF16,
42  BUFFER_UTF8,
43  BUFFER_NUM_TYPES,
44} buffer_type_t;
45
46static const char *buffer_names[] = {
47  "empty",
48  "one-by-one",
49  "utf32",
50  "utf16",
51  "utf8"
52};
53
54typedef struct
55{
56  hb_buffer_t *buffer;
57} fixture_t;
58
59static void
60fixture_init (fixture_t *fixture, gconstpointer user_data)
61{
62  hb_buffer_t *b;
63  unsigned int i;
64
65  b = fixture->buffer = hb_buffer_create ();
66
67  switch (GPOINTER_TO_INT (user_data))
68  {
69    case BUFFER_EMPTY:
70      break;
71
72    case BUFFER_ONE_BY_ONE:
73      for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
74	hb_buffer_add (b, utf32[i], i);
75      break;
76
77    case BUFFER_UTF32:
78      hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
79      break;
80
81    case BUFFER_UTF16:
82      hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
83      break;
84
85    case BUFFER_UTF8:
86      hb_buffer_add_utf8  (b, utf8,  G_N_ELEMENTS (utf8),  1, G_N_ELEMENTS (utf8)  - 2);
87      break;
88
89    default:
90      g_assert_not_reached ();
91  }
92}
93
94static void
95fixture_finish (fixture_t *fixture, gconstpointer user_data)
96{
97  hb_buffer_destroy (fixture->buffer);
98}
99
100
101static void
102test_buffer_properties (fixture_t *fixture, gconstpointer user_data)
103{
104  hb_buffer_t *b = fixture->buffer;
105  hb_unicode_funcs_t *ufuncs;
106
107  /* test default properties */
108
109  g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
110  g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
111  g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
112  g_assert (hb_buffer_get_language (b) == NULL);
113
114
115  /* test property changes are retained */
116  ufuncs = hb_unicode_funcs_create (NULL);
117  hb_buffer_set_unicode_funcs (b, ufuncs);
118  hb_unicode_funcs_destroy (ufuncs);
119  g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
120
121  hb_buffer_set_direction (b, HB_DIRECTION_RTL);
122  g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
123
124  hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
125  g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
126
127  hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
128  g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
129
130  hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
131  g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
132
133  hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
134  g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
135
136
137  /* test clear_contents clears all these properties: */
138
139  hb_buffer_clear_contents (b);
140
141  g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
142  g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
143  g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
144  g_assert (hb_buffer_get_language (b) == NULL);
145
146  /* but not these: */
147
148  g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAGS_DEFAULT);
149  g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
150
151
152  /* test reset clears all properties */
153
154  hb_buffer_set_direction (b, HB_DIRECTION_RTL);
155  g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
156
157  hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
158  g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
159
160  hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
161  g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
162
163  hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
164  g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
165
166  hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
167  g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
168
169  hb_buffer_reset (b);
170
171  g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
172  g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
173  g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
174  g_assert (hb_buffer_get_language (b) == NULL);
175  g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
176  g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
177}
178
179static void
180test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
181{
182  hb_buffer_t *b = fixture->buffer;
183  unsigned int i, len, len2;
184  buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
185  hb_glyph_info_t *glyphs;
186
187  if (buffer_type == BUFFER_EMPTY) {
188    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
189    return;
190  }
191
192  len = hb_buffer_get_length (b);
193  hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
194  glyphs = hb_buffer_get_glyph_infos (b, &len2);
195  g_assert_cmpint (len, ==, len2);
196  g_assert_cmpint (len, ==, 5);
197
198  for (i = 0; i < len; i++) {
199    g_assert_cmphex (glyphs[i].mask,      ==, 0);
200    g_assert_cmphex (glyphs[i].var1.u32,  ==, 0);
201    g_assert_cmphex (glyphs[i].var2.u32,  ==, 0);
202  }
203
204  for (i = 0; i < len; i++) {
205    unsigned int cluster;
206    cluster = 1+i;
207    if (i >= 2) {
208      if (buffer_type == BUFFER_UTF16)
209	cluster++;
210      else if (buffer_type == BUFFER_UTF8)
211        cluster += 3;
212    }
213    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
214    g_assert_cmphex (glyphs[i].cluster,   ==, cluster);
215  }
216
217  /* reverse, test, and reverse back */
218
219  hb_buffer_reverse (b);
220  for (i = 0; i < len; i++)
221    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
222
223  hb_buffer_reverse (b);
224  for (i = 0; i < len; i++)
225    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
226
227  /* reverse_clusters works same as reverse for now since each codepoint is
228   * in its own cluster */
229
230  hb_buffer_reverse_clusters (b);
231  for (i = 0; i < len; i++)
232    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
233
234  hb_buffer_reverse_clusters (b);
235  for (i = 0; i < len; i++)
236    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
237
238  /* now form a cluster and test again */
239  glyphs[2].cluster = glyphs[1].cluster;
240
241  /* reverse, test, and reverse back */
242
243  hb_buffer_reverse (b);
244  for (i = 0; i < len; i++)
245    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
246
247  hb_buffer_reverse (b);
248  for (i = 0; i < len; i++)
249    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
250
251  /* reverse_clusters twice still should return the original string,
252   * but when applied once, the 1-2 cluster should be retained. */
253
254  hb_buffer_reverse_clusters (b);
255  for (i = 0; i < len; i++) {
256    unsigned int j = len-1-i;
257    if (j == 1)
258      j = 2;
259    else if (j == 2)
260      j = 1;
261    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
262  }
263
264  hb_buffer_reverse_clusters (b);
265  for (i = 0; i < len; i++)
266    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
267
268
269  /* test setting length */
270
271  /* enlarge */
272  g_assert (hb_buffer_set_length (b, 10));
273  glyphs = hb_buffer_get_glyph_infos (b, NULL);
274  g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
275  for (i = 0; i < 5; i++)
276    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
277  for (i = 5; i < 10; i++)
278    g_assert_cmphex (glyphs[i].codepoint, ==, 0);
279  /* shrink */
280  g_assert (hb_buffer_set_length (b, 3));
281  glyphs = hb_buffer_get_glyph_infos (b, NULL);
282  g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
283  for (i = 0; i < 3; i++)
284    g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
285
286
287  g_assert (hb_buffer_allocation_successful (b));
288
289
290  /* test reset clears content */
291
292  hb_buffer_reset (b);
293  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
294}
295
296static void
297test_buffer_positions (fixture_t *fixture, gconstpointer user_data)
298{
299  hb_buffer_t *b = fixture->buffer;
300  unsigned int i, len, len2;
301  hb_glyph_position_t *positions;
302
303  /* Without shaping, positions should all be zero */
304  len = hb_buffer_get_length (b);
305  hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
306  positions = hb_buffer_get_glyph_positions (b, &len2);
307  g_assert_cmpint (len, ==, len2);
308  for (i = 0; i < len; i++) {
309    g_assert_cmpint (0, ==, positions[i].x_advance);
310    g_assert_cmpint (0, ==, positions[i].y_advance);
311    g_assert_cmpint (0, ==, positions[i].x_offset);
312    g_assert_cmpint (0, ==, positions[i].y_offset);
313    g_assert_cmpint (0, ==, positions[i].var.i32);
314  }
315
316  /* test reset clears content */
317  hb_buffer_reset (b);
318  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
319}
320
321static void
322test_buffer_allocation (fixture_t *fixture, gconstpointer user_data)
323{
324  hb_buffer_t *b = fixture->buffer;
325
326  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
327
328  g_assert (hb_buffer_pre_allocate (b, 100));
329  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
330  g_assert (hb_buffer_allocation_successful (b));
331
332  /* lets try a huge allocation, make sure it fails */
333  g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
334  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
335  g_assert (!hb_buffer_allocation_successful (b));
336
337  /* small one again */
338  g_assert (hb_buffer_pre_allocate (b, 50));
339  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
340  g_assert (!hb_buffer_allocation_successful (b));
341
342  hb_buffer_reset (b);
343  g_assert (hb_buffer_allocation_successful (b));
344
345  /* all allocation and size  */
346  g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
347  g_assert (!hb_buffer_allocation_successful (b));
348
349  hb_buffer_reset (b);
350  g_assert (hb_buffer_allocation_successful (b));
351
352  /* technically, this one can actually pass on 64bit machines, but
353   * I'm doubtful that any malloc allows 4GB allocations at a time.
354   * But let's only enable it on a 32-bit machine. */
355  if (sizeof (long) == 4) {
356    g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
357    g_assert (!hb_buffer_allocation_successful (b));
358  }
359
360  hb_buffer_reset (b);
361  g_assert (hb_buffer_allocation_successful (b));
362}
363
364
365typedef struct {
366  const char utf8[8];
367  const uint32_t codepoints[8];
368} utf8_conversion_test_t;
369
370/* note: we skip the first and last byte when adding to buffer */
371static const utf8_conversion_test_t utf8_conversion_tests[] = {
372  {"a\303\207", {-1}},
373  {"a\303\207b", {0xC7}},
374  {"ab\303cd", {'b', -1, 'c'}},
375  {"ab\303\302\301cd", {'b', -1, -1, -1, 'c'}}
376};
377
378static void
379test_buffer_utf8_conversion (void)
380{
381  hb_buffer_t *b;
382  hb_glyph_info_t *glyphs;
383  unsigned int bytes, chars, i, j, len;
384
385  b = hb_buffer_create ();
386  hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
387
388  for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
389  {
390    const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
391    char *escaped;
392
393    escaped = g_strescape (test->utf8, NULL);
394    g_test_message ("UTF-8 test #%d: %s", i, escaped);
395    g_free (escaped);
396
397    bytes = strlen (test->utf8);
398    for (chars = 0; test->codepoints[chars]; chars++)
399      ;
400
401    hb_buffer_clear_contents (b);
402    hb_buffer_add_utf8 (b, test->utf8, bytes,  1, bytes - 2);
403
404    glyphs = hb_buffer_get_glyph_infos (b, &len);
405    g_assert_cmpint (len, ==, chars);
406    for (j = 0; j < chars; j++)
407      g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
408  }
409
410  hb_buffer_destroy (b);
411}
412
413
414
415/* Following test table is adapted from glib/glib/tests/utf8-validate.c
416 * with relicensing permission from Matthias Clasen. */
417
418typedef struct {
419  const char *utf8;
420  int max_len;
421  unsigned int offset;
422  gboolean valid;
423} utf8_validity_test_t;
424
425static const utf8_validity_test_t utf8_validity_tests[] = {
426  /* some tests to check max_len handling */
427  /* length 1 */
428  { "abcde", -1, 5, TRUE },
429  { "abcde", 3, 3, TRUE },
430  { "abcde", 5, 5, TRUE },
431  /* length 2 */
432  { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
433  { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE },
434  { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE },
435  { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE },
436  { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE },
437  { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE },
438  { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE },
439  /* length 3 */
440  { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
441  { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
442  { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
443  { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
444  { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
445  { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
446  { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
447
448  /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
449  /* greek 'kosme' */
450  { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
451  /* first sequence of each length */
452  { "\x00", -1, 0, TRUE },
453  { "\xc2\x80", -1, 2, TRUE },
454  { "\xe0\xa0\x80", -1, 3, TRUE },
455  { "\xf0\x90\x80\x80", -1, 4, TRUE },
456  { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
457  { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
458  /* last sequence of each length */
459  { "\x7f", -1, 1, TRUE },
460  { "\xdf\xbf", -1, 2, TRUE },
461  { "\xef\xbf\xbf", -1, 0, TRUE },
462  { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
463  { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
464  { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
465  { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
466  { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
467  /* other boundary conditions */
468  { "\xed\x9f\xbf", -1, 3, TRUE },
469  { "\xed\xa0\x80", -1, 0, FALSE },
470  { "\xed\xbf\xbf", -1, 0, FALSE },
471  { "\xee\x80\x80", -1, 3, TRUE },
472  { "\xef\xbf\xbd", -1, 3, TRUE },
473  { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
474  /* malformed sequences */
475  /* continuation bytes */
476  { "\x80", -1, 0, FALSE },
477  { "\xbf", -1, 0, FALSE },
478  { "\x80\xbf", -1, 0, FALSE },
479  { "\x80\xbf\x80", -1, 0, FALSE },
480  { "\x80\xbf\x80\xbf", -1, 0, FALSE },
481  { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
482  { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
483  { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
484
485  /* all possible continuation byte */
486  { "\x80", -1, 0, FALSE },
487  { "\x81", -1, 0, FALSE },
488  { "\x82", -1, 0, FALSE },
489  { "\x83", -1, 0, FALSE },
490  { "\x84", -1, 0, FALSE },
491  { "\x85", -1, 0, FALSE },
492  { "\x86", -1, 0, FALSE },
493  { "\x87", -1, 0, FALSE },
494  { "\x88", -1, 0, FALSE },
495  { "\x89", -1, 0, FALSE },
496  { "\x8a", -1, 0, FALSE },
497  { "\x8b", -1, 0, FALSE },
498  { "\x8c", -1, 0, FALSE },
499  { "\x8d", -1, 0, FALSE },
500  { "\x8e", -1, 0, FALSE },
501  { "\x8f", -1, 0, FALSE },
502  { "\x90", -1, 0, FALSE },
503  { "\x91", -1, 0, FALSE },
504  { "\x92", -1, 0, FALSE },
505  { "\x93", -1, 0, FALSE },
506  { "\x94", -1, 0, FALSE },
507  { "\x95", -1, 0, FALSE },
508  { "\x96", -1, 0, FALSE },
509  { "\x97", -1, 0, FALSE },
510  { "\x98", -1, 0, FALSE },
511  { "\x99", -1, 0, FALSE },
512  { "\x9a", -1, 0, FALSE },
513  { "\x9b", -1, 0, FALSE },
514  { "\x9c", -1, 0, FALSE },
515  { "\x9d", -1, 0, FALSE },
516  { "\x9e", -1, 0, FALSE },
517  { "\x9f", -1, 0, FALSE },
518  { "\xa0", -1, 0, FALSE },
519  { "\xa1", -1, 0, FALSE },
520  { "\xa2", -1, 0, FALSE },
521  { "\xa3", -1, 0, FALSE },
522  { "\xa4", -1, 0, FALSE },
523  { "\xa5", -1, 0, FALSE },
524  { "\xa6", -1, 0, FALSE },
525  { "\xa7", -1, 0, FALSE },
526  { "\xa8", -1, 0, FALSE },
527  { "\xa9", -1, 0, FALSE },
528  { "\xaa", -1, 0, FALSE },
529  { "\xab", -1, 0, FALSE },
530  { "\xac", -1, 0, FALSE },
531  { "\xad", -1, 0, FALSE },
532  { "\xae", -1, 0, FALSE },
533  { "\xaf", -1, 0, FALSE },
534  { "\xb0", -1, 0, FALSE },
535  { "\xb1", -1, 0, FALSE },
536  { "\xb2", -1, 0, FALSE },
537  { "\xb3", -1, 0, FALSE },
538  { "\xb4", -1, 0, FALSE },
539  { "\xb5", -1, 0, FALSE },
540  { "\xb6", -1, 0, FALSE },
541  { "\xb7", -1, 0, FALSE },
542  { "\xb8", -1, 0, FALSE },
543  { "\xb9", -1, 0, FALSE },
544  { "\xba", -1, 0, FALSE },
545  { "\xbb", -1, 0, FALSE },
546  { "\xbc", -1, 0, FALSE },
547  { "\xbd", -1, 0, FALSE },
548  { "\xbe", -1, 0, FALSE },
549  { "\xbf", -1, 0, FALSE },
550  /* lone start characters */
551  { "\xc0\x20", -1, 0, FALSE },
552  { "\xc1\x20", -1, 0, FALSE },
553  { "\xc2\x20", -1, 0, FALSE },
554  { "\xc3\x20", -1, 0, FALSE },
555  { "\xc4\x20", -1, 0, FALSE },
556  { "\xc5\x20", -1, 0, FALSE },
557  { "\xc6\x20", -1, 0, FALSE },
558  { "\xc7\x20", -1, 0, FALSE },
559  { "\xc8\x20", -1, 0, FALSE },
560  { "\xc9\x20", -1, 0, FALSE },
561  { "\xca\x20", -1, 0, FALSE },
562  { "\xcb\x20", -1, 0, FALSE },
563  { "\xcc\x20", -1, 0, FALSE },
564  { "\xcd\x20", -1, 0, FALSE },
565  { "\xce\x20", -1, 0, FALSE },
566  { "\xcf\x20", -1, 0, FALSE },
567  { "\xd0\x20", -1, 0, FALSE },
568  { "\xd1\x20", -1, 0, FALSE },
569  { "\xd2\x20", -1, 0, FALSE },
570  { "\xd3\x20", -1, 0, FALSE },
571  { "\xd4\x20", -1, 0, FALSE },
572  { "\xd5\x20", -1, 0, FALSE },
573  { "\xd6\x20", -1, 0, FALSE },
574  { "\xd7\x20", -1, 0, FALSE },
575  { "\xd8\x20", -1, 0, FALSE },
576  { "\xd9\x20", -1, 0, FALSE },
577  { "\xda\x20", -1, 0, FALSE },
578  { "\xdb\x20", -1, 0, FALSE },
579  { "\xdc\x20", -1, 0, FALSE },
580  { "\xdd\x20", -1, 0, FALSE },
581  { "\xde\x20", -1, 0, FALSE },
582  { "\xdf\x20", -1, 0, FALSE },
583  { "\xe0\x20", -1, 0, FALSE },
584  { "\xe1\x20", -1, 0, FALSE },
585  { "\xe2\x20", -1, 0, FALSE },
586  { "\xe3\x20", -1, 0, FALSE },
587  { "\xe4\x20", -1, 0, FALSE },
588  { "\xe5\x20", -1, 0, FALSE },
589  { "\xe6\x20", -1, 0, FALSE },
590  { "\xe7\x20", -1, 0, FALSE },
591  { "\xe8\x20", -1, 0, FALSE },
592  { "\xe9\x20", -1, 0, FALSE },
593  { "\xea\x20", -1, 0, FALSE },
594  { "\xeb\x20", -1, 0, FALSE },
595  { "\xec\x20", -1, 0, FALSE },
596  { "\xed\x20", -1, 0, FALSE },
597  { "\xee\x20", -1, 0, FALSE },
598  { "\xef\x20", -1, 0, FALSE },
599  { "\xf0\x20", -1, 0, FALSE },
600  { "\xf1\x20", -1, 0, FALSE },
601  { "\xf2\x20", -1, 0, FALSE },
602  { "\xf3\x20", -1, 0, FALSE },
603  { "\xf4\x20", -1, 0, FALSE },
604  { "\xf5\x20", -1, 0, FALSE },
605  { "\xf6\x20", -1, 0, FALSE },
606  { "\xf7\x20", -1, 0, FALSE },
607  { "\xf8\x20", -1, 0, FALSE },
608  { "\xf9\x20", -1, 0, FALSE },
609  { "\xfa\x20", -1, 0, FALSE },
610  { "\xfb\x20", -1, 0, FALSE },
611  { "\xfc\x20", -1, 0, FALSE },
612  { "\xfd\x20", -1, 0, FALSE },
613  /* missing continuation bytes */
614  { "\x20\xc0", -1, 1, FALSE },
615  { "\x20\xe0\x80", -1, 1, FALSE },
616  { "\x20\xf0\x80\x80", -1, 1, FALSE },
617  { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
618  { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
619  { "\x20\xdf", -1, 1, FALSE },
620  { "\x20\xef\xbf", -1, 1, FALSE },
621  { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
622  { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
623  { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
624  /* impossible bytes */
625  { "\x20\xfe\x20", -1, 1, FALSE },
626  { "\x20\xff\x20", -1, 1, FALSE },
627  /* overlong sequences */
628  { "\x20\xc0\xaf\x20", -1, 1, FALSE },
629  { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
630  { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
631  { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
632  { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
633  { "\x20\xc1\xbf\x20", -1, 1, FALSE },
634  { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
635  { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
636  { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
637  { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
638  { "\x20\xc0\x80\x20", -1, 1, FALSE },
639  { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
640  { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
641  { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
642  { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
643  /* illegal code positions */
644  { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
645  { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
646  { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
647  { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
648  { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
649  { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
650  { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
651  { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
652  { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
653  { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
654  { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
655  { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
656  { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
657  { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
658  { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
659#if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
660  { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
661  { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
662#endif
663  { "", -1, 0, TRUE }
664};
665
666static void
667test_buffer_utf8_validity (void)
668{
669  hb_buffer_t *b;
670  unsigned int i;
671
672  b = hb_buffer_create ();
673  hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
674
675  for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
676  {
677    const utf8_validity_test_t *test = &utf8_validity_tests[i];
678    unsigned int text_bytes, segment_bytes, j, len;
679    hb_glyph_info_t *glyphs;
680    char *escaped;
681
682    escaped = g_strescape (test->utf8, NULL);
683    g_test_message ("UTF-8 test #%d: %s", i, escaped);
684    g_free (escaped);
685
686    text_bytes = strlen (test->utf8);
687    if (test->max_len == -1)
688      segment_bytes = text_bytes;
689    else
690      segment_bytes = test->max_len;
691
692    hb_buffer_clear_contents (b);
693    hb_buffer_add_utf8 (b, test->utf8, text_bytes,  0, segment_bytes);
694
695    glyphs = hb_buffer_get_glyph_infos (b, &len);
696    for (j = 0; j < len; j++)
697      if (glyphs[j].codepoint == (hb_codepoint_t) -1)
698	break;
699
700    g_assert (test->valid ? j == len : j < len);
701    if (!test->valid)
702      g_assert (glyphs[j].cluster == test->offset);
703  }
704
705  hb_buffer_destroy (b);
706}
707
708
709typedef struct {
710  const uint16_t utf16[8];
711  const uint32_t codepoints[8];
712} utf16_conversion_test_t;
713
714/* note: we skip the first and last item from utf16 when adding to buffer */
715static const utf16_conversion_test_t utf16_conversion_tests[] = {
716  {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
717  {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
718  {{0x41, 0xD800, 0xDF02}, {-1}},
719  {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
720  {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
721  {{0x41, 0xDF00, 0x61}, {-1}},
722  {{0x41, 0x61}, {0}}
723};
724
725static void
726test_buffer_utf16_conversion (void)
727{
728  hb_buffer_t *b;
729  unsigned int i;
730
731  b = hb_buffer_create ();
732  hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
733
734  for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
735  {
736    const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
737    unsigned int u_len, chars, j, len;
738    hb_glyph_info_t *glyphs;
739
740    g_test_message ("UTF-16 test #%d", i);
741
742    for (u_len = 0; test->utf16[u_len]; u_len++)
743      ;
744    for (chars = 0; test->codepoints[chars]; chars++)
745      ;
746
747    hb_buffer_clear_contents (b);
748    hb_buffer_add_utf16 (b, test->utf16, u_len,  1, u_len - 2);
749
750    glyphs = hb_buffer_get_glyph_infos (b, &len);
751    g_assert_cmpint (len, ==, chars);
752    for (j = 0; j < chars; j++)
753      g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
754  }
755
756  hb_buffer_destroy (b);
757}
758
759
760typedef struct {
761  const uint32_t utf32[8];
762  const uint32_t codepoints[8];
763} utf32_conversion_test_t;
764
765/* note: we skip the first and last item from utf32 when adding to buffer */
766static const utf32_conversion_test_t utf32_conversion_tests[] = {
767  {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, -3, -3}},
768  {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
769  {{0x41, 0xD800, 0xDF02, 0x61}, {-3, -3}},
770  {{0x41, 0xD800, 0xDF02}, {-3}},
771  {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -3}},
772  {{0x41, 0xD800, 0x61, 0xDF02}, {-3, 0x61}},
773  {{0x41, 0xDF00, 0x61}, {-3}},
774  {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
775  {{0x41, 0x110000, 0x61}, {-3}},
776  {{0x41, 0x61}, {0}}
777};
778
779static void
780test_buffer_utf32_conversion (void)
781{
782  hb_buffer_t *b;
783  unsigned int i;
784
785  b = hb_buffer_create ();
786  hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
787
788  for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
789  {
790    const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
791    unsigned int u_len, chars, j, len;
792    hb_glyph_info_t *glyphs;
793
794    g_test_message ("UTF-32 test #%d", i);
795
796    for (u_len = 0; test->utf32[u_len]; u_len++)
797      ;
798    for (chars = 0; test->codepoints[chars]; chars++)
799      ;
800
801    hb_buffer_clear_contents (b);
802    hb_buffer_add_utf32 (b, test->utf32, u_len,  1, u_len - 2);
803
804    glyphs = hb_buffer_get_glyph_infos (b, &len);
805    g_assert_cmpint (len, ==, chars);
806    for (j = 0; j < chars; j++)
807      g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
808  }
809
810  hb_buffer_destroy (b);
811}
812
813
814static void
815test_empty (hb_buffer_t *b)
816{
817  g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
818  g_assert (!hb_buffer_get_glyph_infos (b, NULL));
819  g_assert (!hb_buffer_get_glyph_positions (b, NULL));
820}
821
822static void
823test_buffer_empty (void)
824{
825  hb_buffer_t *b = hb_buffer_get_empty ();
826
827  g_assert (hb_buffer_get_empty ());
828  g_assert (hb_buffer_get_empty () == b);
829
830  g_assert (!hb_buffer_allocation_successful (b));
831
832  test_empty (b);
833
834  hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
835
836  test_empty (b);
837
838  hb_buffer_reverse (b);
839  hb_buffer_reverse_clusters (b);
840
841  g_assert (!hb_buffer_set_length (b, 10));
842
843  test_empty (b);
844
845  g_assert (hb_buffer_set_length (b, 0));
846
847  test_empty (b);
848
849  g_assert (!hb_buffer_allocation_successful (b));
850
851  hb_buffer_reset (b);
852
853  test_empty (b);
854
855  g_assert (!hb_buffer_allocation_successful (b));
856}
857
858int
859main (int argc, char **argv)
860{
861  unsigned int i;
862
863  hb_test_init (&argc, &argv);
864
865  for (i = 0; i < BUFFER_NUM_TYPES; i++)
866  {
867    const void *buffer_type = GINT_TO_POINTER (i);
868    const char *buffer_name = buffer_names[i];
869
870    hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
871    hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
872    hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
873  }
874
875  hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
876
877  hb_test_add (test_buffer_utf8_conversion);
878  hb_test_add (test_buffer_utf8_validity);
879  hb_test_add (test_buffer_utf16_conversion);
880  hb_test_add (test_buffer_utf32_conversion);
881  hb_test_add (test_buffer_empty);
882
883  return hb_test_run();
884}
885