1aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 4aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/* 5aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin ******************************************************************************* 6aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Copyright (C) 1996-2010, International Business Machines Corporation and * 7aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * others. All Rights Reserved. * 8aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin ******************************************************************************* 9aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 10aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 11aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinpackage android.icu.dev.test.translit; 12aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.io.File; 13aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.io.FileOutputStream; 14aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.io.IOException; 15aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.io.OutputStreamWriter; 16aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.io.PrintWriter; 17aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.Enumeration; 18aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.Iterator; 19aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.Map; 20aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.Set; 21aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.TreeMap; 22aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.TreeSet; 23aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 24aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.lang.UCharacter; 25aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.lang.UScript; 26aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.text.Normalizer; 27aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.text.Transliterator; 28aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.text.UTF16; 29aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.text.UnicodeSet; 30aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.text.UnicodeSetIterator; 31aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 32aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinpublic class WriteCharts { 33aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static void main(String[] args) throws IOException { 34aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (false) { 35aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin printSet("[[\u0000-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]"); 36aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 37aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String testSet = ""; 38aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (args.length == 0) args = getAllScripts(); 39aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < args.length; ++i) { 40aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // Enumeration enum = Transliterator.getAvailableIDs(); 41aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (args[i].startsWith("[")) { 42aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin testSet = args[i]; 43aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } else { 44aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin print(testSet, args[i]); 45aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin testSet = ""; 46aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 47aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 48aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 49aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 50aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static void printSet(String source) { 51aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet s = new UnicodeSet(source); 52aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("Printout for '" + source + "'"); 53aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int count = s.getRangeCount(); 54aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < count; ++i) { 55aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int start = s.getRangeStart(i); 56aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int end = s.getRangeEnd(i); 57aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16)); 58aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 59aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 60aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 61aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static String[] getAllScripts() { 62aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Set set = new TreeSet(); 63aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int scripts[]; 64aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Enumeration sources = Transliterator.getAvailableSources(); 65aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin while(sources.hasMoreElements()) { 66aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String source = (String) sources.nextElement(); 67aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin scripts = UScript.getCode(source); 68aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (scripts == null) { 69aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("[Skipping " + source + "]"); 70aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin continue; 71aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 72aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int sourceScript = scripts[0]; 73aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("Source: " + source + ";\tScripts: " + showScripts(scripts)); 74aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Enumeration targets = Transliterator.getAvailableTargets(source); 75aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin while(targets.hasMoreElements()) { 76aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String target = (String) targets.nextElement(); 77aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin scripts = UScript.getCode(target); 78aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (scripts == null 79aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin || priority(scripts[0]) < priority(sourceScript)) { 80aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // skip doing both directions 81aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("[Skipping '" + source + "-" + target + "']"); 82aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin continue; 83aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 84aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("\tTarget: " + target + ";\tScripts: " + showScripts(scripts)); 85aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Enumeration variants = Transliterator.getAvailableVariants(source, target); 86aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin while(variants.hasMoreElements()) { 87aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String variant = (String) variants.nextElement(); 88aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String id = source + "-" + target; 89aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (variant.length() != 0) { 90aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin id += "/" + variant; 91aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (false) { 92aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("SKIPPING VARIANT, SINCE IT CURRENTLY BREAKS!\t" + id); 93aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin continue; 94aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 95aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 96aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("\t\t\t\tAdding: '" + id + "'"); 97aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin set.add(id); 98aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 99aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 100aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 101aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String[] results = new String[set.size()]; 102aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin set.toArray(results); 103aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return results; 104aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 105aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 106aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin static public int priority(int script) { 107aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (script == UScript.LATIN) return -2; 108aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return script; 109aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 110aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 111aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static String showScripts(int[] scripts) { 112aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin StringBuffer results = new StringBuffer(); 113aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < scripts.length; ++i) { 114aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (i != 0) results.append(", "); 115aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin results.append(UScript.getName(scripts[i])); 116aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 117aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return results.toString(); 118aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 119aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 120aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static void print(String testSet, String rawId) throws IOException { 121aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("Processing " + rawId); 122aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Transliterator t = Transliterator.getInstance(rawId); 123aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String id = t.getID(); 124aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 125aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // clean up IDs. Ought to be API for getting source, target, variant 126aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int minusPos = id.indexOf('-'); 127aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String source = id.substring(0,minusPos); 128aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String target = id.substring(minusPos+1); 129aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int slashPos = target.indexOf('/'); 130aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (slashPos >= 0) target = target.substring(0,slashPos); 131aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 132aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // check that the source is a script 133aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (testSet.equals("")) { 134aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int[] scripts = UScript.getCode(source); 135aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (scripts == null) { 136aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("FAILED: " 137aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + Transliterator.getDisplayName(id) 138aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + " does not have a script as the source"); 139aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return; 140aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } else { 141aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin testSet = "[:" + source + ":]"; 142aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (source.equalsIgnoreCase("katakana")) { 143aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin testSet = "[" + testSet + "\u30FC]"; 144aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin printSet(testSet); 145aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 146aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 147aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 148aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet sourceSet = new UnicodeSet(testSet); 149aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 150aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // check that the target is a script 151aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int[] scripts = UScript.getCode(target); 152aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (scripts == null) { 153aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin target = "[:Latin:]"; 154aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } else { 155aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin target = "[:" + target + ":]"; 156aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 157aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet targetSet = new UnicodeSet(target); 158aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 159aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Transliterator inverse = t.getInverse(); 160aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 161aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin //Transliterator hex = Transliterator.getInstance("Any-Hex"); 162aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 163aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 164aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // iterate through script 165aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("Transliterating " + sourceSet.toPattern(true) 166aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + " with " + Transliterator.getDisplayName(id)); 167aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 168aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet leftOverSet = new UnicodeSet(targetSet); 169aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet privateUse = new UnicodeSet("[:private use:]"); 170aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 171aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Map map = new TreeMap(); 172aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 173aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet); 174aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin targetSetPlusAnyways.addAll(okAnyway); 175aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 176aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet); 177aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin sourceSetPlusAnyways.addAll(okAnyway); 178aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 179aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UnicodeSetIterator usi = new UnicodeSetIterator(sourceSet); 180aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 181aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin while (usi.next()) { 182aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int j = usi.codepoint; 183aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /* 184aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int count = sourceSet.getRangeCount(); 185aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < count; ++i) { 186aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int end = sourceSet.getRangeEnd(i); 187aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int j = sourceSet.getRangeStart(i); j <= end; ++j) { 188aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 189aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // String flag = ""; 190aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ss = UTF16.valueOf(j); 191aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ts = t.transliterate(ss); 192aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin char group = 0; 193aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (!targetSetPlusAnyways.containsAll(ts)) { 194aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group |= 1; 195aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 196aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (UTF16.countCodePoint(ts) == 1) { 197aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin leftOverSet.remove(UTF16.charAt(ts,0)); 198aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 199aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String rt = inverse.transliterate(ts); 200aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (!sourceSetPlusAnyways.containsAll(rt)) { 201aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group |= 2; 202aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } else if (!ss.equals(rt)) { 203aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group |= 4; 204aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 205aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 206aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (!privateUse.containsNone(ts) || !privateUse.containsNone(rt)) { 207aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group |= 16; 208aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 209aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 210aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.NFKD)) 211aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "\u0000" + ss, 212aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin "<td class='s'>" + ss + "<br><tt>" + hex(ss) 213aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "</tt></td><td class='t'>" + ts + "<br><tt>" + hex(ts) 214aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "</tt></td><td class='r'>" + rt + "<br><tt>" + hex(rt) + "</tt></td>" ); 215aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 216aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // Check Duals 217aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /* 218aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int maxDual = 200; 219aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin dual: 220aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i2 = 0; i2 < count; ++i2) { 221aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int end2 = sourceSet.getRangeEnd(i2); 222aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int j2 = sourceSet.getRangeStart(i2); j2 <= end; ++j2) { 223aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ss2 = UTF16.valueOf(j2); 224aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ts2 = t.transliterate(ss2); 225aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String rt2 = inverse.transliterate(ts2); 226aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 227aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ss12 = ss + ss2; 228aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ts12 = t.transliterate(ss + ss12); 229aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String rt12 = inverse.transliterate(ts12); 230aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (ts12.equals(ts + ts2) && rt12.equals(rt + rt2)) continue; 231aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (--maxDual < 0) break dual; 232aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 233aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // transliteration of whole differs from that of parts 234aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group = 0x100; 235aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss12, Normalizer.DECOMP_COMPAT, 0)) 236aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "\u0000" + ss12, 237aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin "<td class='s'>" + ss12 + "<br><tt>" + hex(ss12) 238aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "</tt></td><td class='t'>" + ts12 + "<br><tt>" + hex(ts12) 239aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "</tt></td><td class='r'>" + rt12 + "<br><tt>" + hex(rt12) + "</tt></td>" ); 240aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 241aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 242aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 243aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin //} 244aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 245aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 246aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 247aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin leftOverSet.remove(0x0100,0x02FF); // remove extended & IPA 248aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 249aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /*int count = leftOverSet.getRangeCount(); 250aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < count; ++i) { 251aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int end = leftOverSet.getRangeEnd(i); 252aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) { 253aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 254aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 255aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin usi.reset(leftOverSet); 256aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin while (usi.next()) { 257aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int j = usi.codepoint; 258aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 259aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String ts = UTF16.valueOf(j); 260aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0); 261aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // if (!decomp.equals(ts)) continue; 262aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 263aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String rt = inverse.transliterate(ts); 264aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // String flag = ""; 265aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin char group = 0x80; 266aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 267aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (!sourceSetPlusAnyways.containsAll(rt)) { 268aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group |= 8; 269aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 270aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (!privateUse.containsNone(rt)) { 271aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin group |= 16; 272aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 273aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 274aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.NFKD)) + ts, 275aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin "<td class='s'>-</td><td class='t'>" + ts + "<br><tt>" + hex(ts) 276aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + "</tt></td><td class='r'>" 277aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin + rt + "<br><tt>" + hex(rt) + "</tt></td>"); 278aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin //} 279aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 280aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 281aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // make file name and open 282aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin File f = new File("transliteration/chart_" + id.replace('/', '_') + ".html"); 283aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String filename = f.getCanonicalFile().toString(); 284aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin PrintWriter out = new PrintWriter( 285aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin new OutputStreamWriter( 286aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin new FileOutputStream(filename), "UTF-8")); 287aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin //out.print('\uFEFF'); // BOM 288aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 289aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin System.out.println("Writing " + filename); 290aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 291aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin try { 292aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">"); 293aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<HTML><HEAD>"); 294aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>"); 295aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>"); 296aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 297aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<BODY>"); 298aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<h1>Transliteration Samples for '" + Transliterator.getDisplayName(id) + "'</h1>"); 299aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<p>This file illustrates the transliterations of " + Transliterator.getDisplayName(id) + "."); 300aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("The samples are mechanically generated, and only include single characters"); 301aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("from the source set. Thus it will <i>not</i> contain examples where the transliteration"); 302aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("depends on the context around the character. For a more detailed -- and interactive -- example, see the"); 303aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<a href='http://demo.icu-project.org/icu-bin/translit'>Transliteration Demo</a></p><hr>"); 304aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 305aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // set up the headers 306aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int columnCount = 3; 307aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String headerBase = "<th>Source</th><th>Target</th><th>Return</th>"; 308aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String headers = headerBase; 309aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = columnCount - 1; i > 0; --i) { 310aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (i != columnCount - 1) headers += "<th> </th>"; 311aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin headers += headerBase; 312aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 313aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 314aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String tableHeader = "<p><table border='1'><tr>" + headers + "</tr>"; 315aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String tableFooter = "</table></p>"; 316aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("<h2>Round Trip</h2>"); 317aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println(tableHeader); 318aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 319aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin Iterator it = map.keySet().iterator(); 320aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin char lastGroup = 0; 321aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int count = 0; 322aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int column = 0; 323aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin while (it.hasNext()) { 324aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String key = (String) it.next(); 325aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin char group = key.charAt(0); 326aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (group != lastGroup || count++ > 50) { 327aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin lastGroup = group; 328aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin count = 0; 329aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (column != 0) { 330aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("</tr>"); 331aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin column = 0; 332aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 333aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println(tableFooter); 334aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 335aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // String title = ""; 336aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if ((group & 0x100) != 0) out.println("<hr><h2>Duals</h2>"); 337aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin else if ((group & 0x80) != 0) out.println("<hr><h2>Completeness</h2>"); 338aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin else out.println("<hr><h2>Round Trip</h2>"); 339aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if ((group & 16) != 0) out.println("<h3>Errors: Contains Private Use Characters</h3>"); 340aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if ((group & 8) != 0) out.println("<h3>Possible Errors: Return not in Source Set</h3>"); 341aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if ((group & 4) != 0) out.println("<h3>One-Way Mapping: Return not equal to Source</h3>"); 342aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if ((group & 2) != 0) out.println("<h3>Errors: Return not in Source Set</h3>"); 343aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if ((group & 1) != 0) out.println("<h3>Errors: Target not in Target Set</h3>"); 344aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 345aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println(tableHeader); 346aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin column = 0; 347aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 348aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String value = (String) map.get(key); 349aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (column++ == 0) out.print("<tr>"); 350aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin else out.print("<th> </th>"); 351aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println(value); 352aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (column == 3) { 353aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("</tr>"); 354aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin column = 0; 355aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 356aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 357aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (column != 0) { 358aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println("</tr>"); 359aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin column = 0; 360aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 361aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.println(tableFooter + "</BODY></HTML>"); 362aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 363aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } finally { 364aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin out.close(); 365aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 366aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 367aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 368aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static String hex(String s) { 369aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int cp; 370aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin StringBuffer results = new StringBuffer(); 371aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 372aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin cp = UTF16.charAt(s, i); 373aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (i != 0) results.append(' '); 374aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin results.append(Integer.toHexString(cp)); 375aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 376aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return results.toString().toUpperCase(); 377aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 378aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 379aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]"); 380aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 381aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /* 382aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // tests whether a string is in a set. Also checks for Common and Inherited 383aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static boolean isIn(String s, UnicodeSet set) { 384aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin int cp; 385aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { 386aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin cp = UTF16.charAt(s, i); 387aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (set.contains(cp)) continue; 388aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (okAnyway.contains(cp)) continue; 389aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return false; 390aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 391aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return true; 392aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 393aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 394aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 395aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin} 396aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin