1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10
11package android.icu.dev.test.translit;
12import java.io.File;
13import java.io.FileOutputStream;
14import java.io.IOException;
15import java.io.OutputStreamWriter;
16import java.io.PrintWriter;
17import java.util.Enumeration;
18import java.util.Iterator;
19import java.util.Map;
20import java.util.Set;
21import java.util.TreeMap;
22import java.util.TreeSet;
23
24import android.icu.lang.UCharacter;
25import android.icu.lang.UScript;
26import android.icu.text.Normalizer;
27import android.icu.text.Transliterator;
28import android.icu.text.UTF16;
29import android.icu.text.UnicodeSet;
30import android.icu.text.UnicodeSetIterator;
31
32public class WriteCharts {
33    public static void main(String[] args) throws IOException {
34        if (false) {
35            printSet("[[\u0000-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]");
36        }
37        String testSet = "";
38        if (args.length == 0) args = getAllScripts();
39        for (int i = 0; i < args.length; ++i) {
40    // Enumeration enum = Transliterator.getAvailableIDs();
41            if (args[i].startsWith("[")) {
42                testSet = args[i];
43            } else {
44                print(testSet, args[i]);
45                testSet = "";
46            }
47        }
48    }
49
50    public static void printSet(String source) {
51        UnicodeSet s = new UnicodeSet(source);
52        System.out.println("Printout for '" + source + "'");
53        int count = s.getRangeCount();
54        for (int i = 0; i < count; ++i) {
55            int start = s.getRangeStart(i);
56            int end = s.getRangeEnd(i);
57            System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16));
58        }
59    }
60
61    public static String[] getAllScripts() {
62        Set set = new TreeSet();
63        int scripts[];
64        Enumeration sources = Transliterator.getAvailableSources();
65        while(sources.hasMoreElements()) {
66            String source = (String) sources.nextElement();
67            scripts = UScript.getCode(source);
68            if (scripts == null) {
69                System.out.println("[Skipping " + source + "]");
70                continue;
71            }
72            int sourceScript = scripts[0];
73            System.out.println("Source: " + source + ";\tScripts: " + showScripts(scripts));
74            Enumeration targets = Transliterator.getAvailableTargets(source);
75            while(targets.hasMoreElements()) {
76                String target = (String) targets.nextElement();
77                scripts = UScript.getCode(target);
78                if (scripts == null
79                        || priority(scripts[0]) < priority(sourceScript)) {
80                    // skip doing both directions
81                    System.out.println("[Skipping '" + source + "-" + target + "']");
82                    continue;
83                }
84                System.out.println("\tTarget: " + target + ";\tScripts: " + showScripts(scripts));
85                Enumeration variants = Transliterator.getAvailableVariants(source, target);
86                while(variants.hasMoreElements()) {
87                    String variant = (String) variants.nextElement();
88                    String id = source + "-" + target;
89                    if (variant.length() != 0) {
90                        id += "/" + variant;
91                        if (false) {
92                            System.out.println("SKIPPING VARIANT, SINCE IT CURRENTLY BREAKS!\t" + id);
93                            continue;
94                        }
95                    }
96                    System.out.println("\t\t\t\tAdding: '" + id + "'");
97                    set.add(id);
98                }
99            }
100        }
101        String[] results = new String[set.size()];
102        set.toArray(results);
103        return results;
104    }
105
106    static public int priority(int script) {
107        if (script == UScript.LATIN) return -2;
108        return script;
109    }
110
111    public static String showScripts(int[] scripts) {
112        StringBuffer results = new StringBuffer();
113        for (int i = 0; i < scripts.length; ++i) {
114            if (i != 0) results.append(", ");
115            results.append(UScript.getName(scripts[i]));
116        }
117        return results.toString();
118    }
119
120    public static void print(String testSet, String rawId) throws IOException {
121        System.out.println("Processing " + rawId);
122        Transliterator t = Transliterator.getInstance(rawId);
123        String id = t.getID();
124
125        // clean up IDs. Ought to be API for getting source, target, variant
126        int minusPos = id.indexOf('-');
127        String source = id.substring(0,minusPos);
128        String target = id.substring(minusPos+1);
129        int slashPos = target.indexOf('/');
130        if (slashPos >= 0) target = target.substring(0,slashPos);
131
132        // check that the source is a script
133        if (testSet.equals("")) {
134            int[] scripts = UScript.getCode(source);
135            if (scripts == null) {
136                System.out.println("FAILED: "
137                    + Transliterator.getDisplayName(id)
138                    + " does not have a script as the source");
139                return;
140            } else {
141                testSet = "[:" + source + ":]";
142                if (source.equalsIgnoreCase("katakana")) {
143                    testSet = "[" + testSet + "\u30FC]";
144                    printSet(testSet);
145                }
146            }
147        }
148        UnicodeSet sourceSet = new UnicodeSet(testSet);
149
150        // check that the target is a script
151        int[] scripts = UScript.getCode(target);
152        if (scripts == null) {
153            target = "[:Latin:]";
154        } else {
155            target = "[:" + target + ":]";
156        }
157        UnicodeSet targetSet = new UnicodeSet(target);
158
159        Transliterator inverse = t.getInverse();
160
161        //Transliterator hex = Transliterator.getInstance("Any-Hex");
162
163
164        // iterate through script
165        System.out.println("Transliterating " + sourceSet.toPattern(true)
166            + " with " + Transliterator.getDisplayName(id));
167
168        UnicodeSet leftOverSet = new UnicodeSet(targetSet);
169        UnicodeSet privateUse = new UnicodeSet("[:private use:]");
170
171        Map map = new TreeMap();
172
173        UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
174        targetSetPlusAnyways.addAll(okAnyway);
175
176        UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
177        sourceSetPlusAnyways.addAll(okAnyway);
178
179        UnicodeSetIterator usi = new UnicodeSetIterator(sourceSet);
180
181        while (usi.next()) {
182            int j = usi.codepoint;
183            /*
184        int count = sourceSet.getRangeCount();
185        for (int i = 0; i < count; ++i) {
186            int end = sourceSet.getRangeEnd(i);
187            for (int j = sourceSet.getRangeStart(i); j <= end; ++j) {
188            */
189               // String flag = "";
190                String ss = UTF16.valueOf(j);
191                String ts = t.transliterate(ss);
192                char group = 0;
193                if (!targetSetPlusAnyways.containsAll(ts)) {
194                    group |= 1;
195                }
196                if (UTF16.countCodePoint(ts) == 1) {
197                    leftOverSet.remove(UTF16.charAt(ts,0));
198                }
199                String rt = inverse.transliterate(ts);
200                if (!sourceSetPlusAnyways.containsAll(rt)) {
201                    group |= 2;
202                } else if (!ss.equals(rt)) {
203                    group |= 4;
204                }
205
206                if (!privateUse.containsNone(ts) || !privateUse.containsNone(rt)) {
207                    group |= 16;
208                }
209
210                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.NFKD))
211                        + "\u0000" + ss,
212                    "<td class='s'>" + ss + "<br><tt>" + hex(ss)
213                        + "</tt></td><td class='t'>" + ts + "<br><tt>" + hex(ts)
214                        + "</tt></td><td class='r'>" + rt + "<br><tt>" + hex(rt) + "</tt></td>" );
215
216                // Check Duals
217                /*
218                int maxDual = 200;
219              dual:
220                for (int i2 = 0; i2 < count; ++i2) {
221                    int end2 = sourceSet.getRangeEnd(i2);
222                    for (int j2 = sourceSet.getRangeStart(i2); j2 <= end; ++j2) {
223                        String ss2 = UTF16.valueOf(j2);
224                        String ts2 = t.transliterate(ss2);
225                        String rt2 = inverse.transliterate(ts2);
226
227                        String ss12 = ss + ss2;
228                        String ts12 = t.transliterate(ss + ss12);
229                        String rt12 = inverse.transliterate(ts12);
230                        if (ts12.equals(ts + ts2) && rt12.equals(rt + rt2)) continue;
231                        if (--maxDual < 0) break dual;
232
233                        // transliteration of whole differs from that of parts
234                        group = 0x100;
235                        map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss12, Normalizer.DECOMP_COMPAT, 0))
236                                + "\u0000" + ss12,
237                            "<td class='s'>" + ss12 + "<br><tt>" + hex(ss12)
238                                + "</tt></td><td class='t'>" + ts12 + "<br><tt>" + hex(ts12)
239                                + "</tt></td><td class='r'>" + rt12 + "<br><tt>" + hex(rt12) + "</tt></td>" );
240                    }
241                }
242                */
243            //}
244        }
245
246
247        leftOverSet.remove(0x0100,0x02FF); // remove extended & IPA
248
249        /*int count = leftOverSet.getRangeCount();
250        for (int i = 0; i < count; ++i) {
251            int end = leftOverSet.getRangeEnd(i);
252            for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
253            */
254
255        usi.reset(leftOverSet);
256        while (usi.next()) {
257            int j = usi.codepoint;
258
259                String ts = UTF16.valueOf(j);
260                // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
261                // if (!decomp.equals(ts)) continue;
262
263                String rt = inverse.transliterate(ts);
264                // String flag = "";
265                char group = 0x80;
266
267                if (!sourceSetPlusAnyways.containsAll(rt)) {
268                    group |= 8;
269                }
270                if (!privateUse.containsNone(rt)) {
271                    group |= 16;
272                }
273
274                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.NFKD)) + ts,
275                    "<td class='s'>-</td><td class='t'>" + ts + "<br><tt>" + hex(ts)
276                    + "</tt></td><td class='r'>"
277                    + rt + "<br><tt>" + hex(rt) + "</tt></td>");
278            //}
279        }
280
281        // make file name and open
282        File f = new File("transliteration/chart_" + id.replace('/', '_') + ".html");
283        String filename = f.getCanonicalFile().toString();
284        PrintWriter out = new PrintWriter(
285            new OutputStreamWriter(
286                new FileOutputStream(filename), "UTF-8"));
287        //out.print('\uFEFF'); // BOM
288
289        System.out.println("Writing " + filename);
290
291        try {
292            out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
293            out.println("<HTML><HEAD>");
294            out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
295            out.println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>");
296
297            out.println("<BODY>");
298            out.println("<h1>Transliteration Samples for '" + Transliterator.getDisplayName(id) + "'</h1>");
299            out.println("<p>This file illustrates the transliterations of " + Transliterator.getDisplayName(id) + ".");
300            out.println("The samples are mechanically generated, and only include single characters");
301            out.println("from the source set. Thus it will <i>not</i> contain examples where the transliteration");
302            out.println("depends on the context around the character. For a more detailed -- and interactive -- example, see the");
303            out.println("<a href='http://demo.icu-project.org/icu-bin/translit'>Transliteration Demo</a></p><hr>");
304
305            // set up the headers
306            int columnCount = 3;
307            String headerBase = "<th>Source</th><th>Target</th><th>Return</th>";
308            String headers = headerBase;
309            for (int i = columnCount - 1; i > 0; --i) {
310                if (i != columnCount - 1) headers += "<th>&nbsp;</th>";
311                headers += headerBase;
312            }
313
314            String tableHeader = "<p><table border='1'><tr>" + headers + "</tr>";
315            String tableFooter = "</table></p>";
316            out.println("<h2>Round Trip</h2>");
317            out.println(tableHeader);
318
319            Iterator it = map.keySet().iterator();
320            char lastGroup = 0;
321            int count = 0;
322            int column = 0;
323            while (it.hasNext()) {
324                String key = (String) it.next();
325                char group = key.charAt(0);
326                if (group != lastGroup || count++ > 50) {
327                    lastGroup = group;
328                    count = 0;
329                    if (column != 0) {
330                        out.println("</tr>");
331                        column = 0;
332                    }
333                    out.println(tableFooter);
334
335                    // String title = "";
336                    if ((group & 0x100) != 0) out.println("<hr><h2>Duals</h2>");
337                    else if ((group & 0x80) != 0) out.println("<hr><h2>Completeness</h2>");
338                    else out.println("<hr><h2>Round Trip</h2>");
339                    if ((group & 16) != 0) out.println("<h3>Errors: Contains Private Use Characters</h3>");
340                    if ((group & 8) != 0) out.println("<h3>Possible Errors: Return not in Source Set</h3>");
341                    if ((group & 4) != 0) out.println("<h3>One-Way Mapping: Return not equal to Source</h3>");
342                    if ((group & 2) != 0) out.println("<h3>Errors: Return not in Source Set</h3>");
343                    if ((group & 1) != 0) out.println("<h3>Errors: Target not in Target Set</h3>");
344
345                    out.println(tableHeader);
346                    column = 0;
347                }
348                String value = (String) map.get(key);
349                if (column++ == 0) out.print("<tr>");
350                else out.print("<th>&nbsp;</th>");
351                out.println(value);
352                if (column == 3) {
353                    out.println("</tr>");
354                    column = 0;
355                }
356            }
357            if (column != 0) {
358                out.println("</tr>");
359                column = 0;
360            }
361            out.println(tableFooter + "</BODY></HTML>");
362
363        } finally {
364            out.close();
365        }
366    }
367
368    public static String hex(String s) {
369        int cp;
370        StringBuffer results = new StringBuffer();
371        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
372            cp = UTF16.charAt(s, i);
373            if (i != 0) results.append(' ');
374            results.append(Integer.toHexString(cp));
375        }
376        return results.toString().toUpperCase();
377    }
378
379    static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
380
381    /*
382    // tests whether a string is in a set. Also checks for Common and Inherited
383    public static boolean isIn(String s, UnicodeSet set) {
384        int cp;
385        for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
386            cp = UTF16.charAt(s, i);
387            if (set.contains(cp)) continue;
388            if (okAnyway.contains(cp)) continue;
389            return false;
390        }
391        return true;
392    }
393    */
394
395}
396