1#undef G_DISABLE_ASSERT 2#undef G_LOG_DOMAIN 3 4#include <glib.h> 5#include <stdio.h> 6#include <stdlib.h> 7#include <string.h> 8 9gboolean success = TRUE; 10 11static char * 12decode (const gchar *input) 13{ 14 unsigned ch; 15 int offset = 0; 16 GString *result = g_string_new (NULL); 17 18 do 19 { 20 if (sscanf (input + offset, "%x", &ch) != 1) 21 { 22 fprintf (stderr, "Error parsing character string %s\n", input); 23 exit (1); 24 } 25 26 g_string_append_unichar (result, ch); 27 28 while (input[offset] && input[offset] != ' ') 29 offset++; 30 while (input[offset] && input[offset] == ' ') 31 offset++; 32 } 33 while (input[offset]); 34 35 return g_string_free (result, FALSE); 36} 37 38const char *names[4] = { 39 "NFD", 40 "NFC", 41 "NFKD", 42 "NFKC" 43}; 44 45static char * 46encode (const gchar *input) 47{ 48 GString *result = g_string_new(NULL); 49 50 const gchar *p = input; 51 while (*p) 52 { 53 gunichar c = g_utf8_get_char (p); 54 g_string_append_printf (result, "%04X ", c); 55 p = g_utf8_next_char(p); 56 } 57 58 return g_string_free (result, FALSE); 59} 60 61static void 62test_form (int line, 63 GNormalizeMode mode, 64 gboolean do_compat, 65 int expected, 66 char **c, 67 char **raw) 68{ 69 int i; 70 71 gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC || 72 mode == G_NORMALIZE_NFKD); 73 74 if (mode_is_compat || !do_compat) 75 { 76 for (i = 0; i < 3; i++) 77 { 78 char *result = g_utf8_normalize (c[i], -1, mode); 79 if (strcmp (result, c[expected]) != 0) 80 { 81 char *result_raw = encode(result); 82 fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]); 83 fprintf (stderr, " g_utf8_normalize (%s, %s) != %s but %s\n", 84 raw[i], names[mode], raw[expected], result_raw); 85 g_free (result_raw); 86 success = FALSE; 87 } 88 89 g_free (result); 90 } 91 } 92 if (mode_is_compat || do_compat) 93 { 94 for (i = 3; i < 5; i++) 95 { 96 char *result = g_utf8_normalize (c[i], -1, mode); 97 if (strcmp (result, c[expected]) != 0) 98 { 99 char *result_raw = encode(result); 100 fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]); 101 fprintf (stderr, " g_utf8_normalize (%s, %s) != %s but %s\n", 102 raw[i], names[mode], raw[expected], result_raw); 103 g_free (result_raw); 104 success = FALSE; 105 } 106 107 g_free (result); 108 } 109 } 110} 111 112static gboolean 113process_one (int line, gchar **columns) 114{ 115 char *c[5]; 116 int i; 117 gboolean skip = FALSE; 118 119 for (i=0; i < 5; i++) 120 { 121 c[i] = decode(columns[i]); 122 if (!c[i]) 123 skip = TRUE; 124 } 125 126 if (!skip) 127 { 128 test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns); 129 test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns); 130 test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns); 131 test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns); 132 test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns); 133 test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns); 134 } 135 136 for (i=0; i < 5; i++) 137 g_free (c[i]); 138 139 return TRUE; 140} 141 142int main (int argc, char **argv) 143{ 144 GIOChannel *in; 145 GError *error = NULL; 146 GString *buffer = g_string_new (NULL); 147 int line_to_do = 0; 148 int line = 1; 149 150 if (argc != 2 && argc != 3) 151 { 152 fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n"); 153 return 1; 154 } 155 156 if (argc == 3) 157 line_to_do = atoi(argv[2]); 158 159 in = g_io_channel_new_file (argv[1], "r", &error); 160 if (!in) 161 { 162 fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message); 163 return 1; 164 } 165 166 while (TRUE) 167 { 168 gsize term_pos; 169 gchar **columns; 170 171 if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL) 172 break; 173 174 if (line_to_do && line != line_to_do) 175 goto next; 176 177 buffer->str[term_pos] = '\0'; 178 179 if (buffer->str[0] == '#') /* Comment */ 180 goto next; 181 if (buffer->str[0] == '@') /* Part */ 182 { 183 fprintf (stderr, "\nProcessing %s\n", buffer->str + 1); 184 goto next; 185 } 186 187 columns = g_strsplit (buffer->str, ";", -1); 188 if (!columns[0]) 189 goto next; 190 191 if (!process_one (line, columns)) 192 return 1; 193 g_strfreev (columns); 194 195 next: 196 g_string_truncate (buffer, 0); 197 line++; 198 } 199 200 if (error) 201 { 202 fprintf (stderr, "Error reading test file, %s\n", error->message); 203 return 1; 204 } 205 206 g_io_channel_unref (in); 207 g_string_free (buffer, TRUE); 208 209 return !success; 210} 211