1#undef G_DISABLE_ASSERT
2#undef G_LOG_DOMAIN
3
4#include <glib.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8
9gboolean success = TRUE;
10
11static char *
12decode (const gchar *input)
13{
14  unsigned ch;
15  int offset = 0;
16  GString *result = g_string_new (NULL);
17
18  do
19    {
20      if (sscanf (input + offset, "%x", &ch) != 1)
21	{
22	  fprintf (stderr, "Error parsing character string %s\n", input);
23	  exit (1);
24	}
25
26      g_string_append_unichar (result, ch);
27
28      while (input[offset] && input[offset] != ' ')
29	offset++;
30      while (input[offset] && input[offset] == ' ')
31	offset++;
32    }
33  while (input[offset]);
34
35  return g_string_free (result, FALSE);
36}
37
38const char *names[4] = {
39  "NFD",
40  "NFC",
41  "NFKD",
42  "NFKC"
43};
44
45static char *
46encode (const gchar *input)
47{
48  GString *result = g_string_new(NULL);
49
50  const gchar *p = input;
51  while (*p)
52    {
53      gunichar c = g_utf8_get_char (p);
54      g_string_append_printf (result, "%04X ", c);
55      p = g_utf8_next_char(p);
56    }
57
58  return g_string_free (result, FALSE);
59}
60
61static void
62test_form (int            line,
63	   GNormalizeMode mode,
64	   gboolean       do_compat,
65	   int            expected,
66	   char         **c,
67	   char         **raw)
68{
69  int i;
70
71  gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC ||
72			     mode == G_NORMALIZE_NFKD);
73
74  if (mode_is_compat || !do_compat)
75    {
76      for (i = 0; i < 3; i++)
77	{
78	  char *result = g_utf8_normalize (c[i], -1, mode);
79	  if (strcmp (result, c[expected]) != 0)
80	    {
81	      char *result_raw = encode(result);
82	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
83	      fprintf (stderr, "  g_utf8_normalize (%s, %s) != %s but %s\n",
84		   raw[i], names[mode], raw[expected], result_raw);
85	      g_free (result_raw);
86	      success = FALSE;
87	    }
88
89	  g_free (result);
90	}
91    }
92  if (mode_is_compat || do_compat)
93    {
94      for (i = 3; i < 5; i++)
95	{
96	  char *result = g_utf8_normalize (c[i], -1, mode);
97	  if (strcmp (result, c[expected]) != 0)
98	    {
99	      char *result_raw = encode(result);
100	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
101	      fprintf (stderr, "  g_utf8_normalize (%s, %s) != %s but %s\n",
102		   raw[i], names[mode], raw[expected], result_raw);
103	      g_free (result_raw);
104	      success = FALSE;
105	    }
106
107	  g_free (result);
108	}
109    }
110}
111
112static gboolean
113process_one (int line, gchar **columns)
114{
115  char *c[5];
116  int i;
117  gboolean skip = FALSE;
118
119  for (i=0; i < 5; i++)
120    {
121      c[i] = decode(columns[i]);
122      if (!c[i])
123	skip = TRUE;
124    }
125
126  if (!skip)
127    {
128      test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns);
129      test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns);
130      test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns);
131      test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns);
132      test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns);
133      test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns);
134    }
135
136  for (i=0; i < 5; i++)
137    g_free (c[i]);
138
139  return TRUE;
140}
141
142int main (int argc, char **argv)
143{
144  GIOChannel *in;
145  GError *error = NULL;
146  GString *buffer = g_string_new (NULL);
147  int line_to_do = 0;
148  int line = 1;
149
150  if (argc != 2 && argc != 3)
151    {
152      fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n");
153      return 1;
154    }
155
156  if (argc == 3)
157    line_to_do = atoi(argv[2]);
158
159  in = g_io_channel_new_file (argv[1], "r", &error);
160  if (!in)
161    {
162      fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
163      return 1;
164    }
165
166  while (TRUE)
167    {
168      gsize term_pos;
169      gchar **columns;
170
171      if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL)
172	break;
173
174      if (line_to_do && line != line_to_do)
175	goto next;
176
177      buffer->str[term_pos] = '\0';
178
179      if (buffer->str[0] == '#') /* Comment */
180	goto next;
181      if (buffer->str[0] == '@') /* Part */
182	{
183	  fprintf (stderr, "\nProcessing %s\n", buffer->str + 1);
184	  goto next;
185	}
186
187      columns = g_strsplit (buffer->str, ";", -1);
188      if (!columns[0])
189	goto next;
190
191      if (!process_one (line, columns))
192	return 1;
193      g_strfreev (columns);
194
195    next:
196      g_string_truncate (buffer, 0);
197      line++;
198    }
199
200  if (error)
201    {
202      fprintf (stderr, "Error reading test file, %s\n", error->message);
203      return 1;
204    }
205
206  g_io_channel_unref (in);
207  g_string_free (buffer, TRUE);
208
209  return !success;
210}
211