1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5 ******************************************************************************* 6 * Copyright (C) 1996-2010, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11package android.icu.dev.test.translit; 12import java.io.File; 13import java.io.FileOutputStream; 14import java.io.IOException; 15import java.io.OutputStreamWriter; 16import java.io.PrintWriter; 17import java.util.Enumeration; 18import java.util.Iterator; 19import java.util.Map; 20import java.util.Set; 21import java.util.TreeMap; 22import java.util.TreeSet; 23 24import android.icu.lang.UCharacter; 25import android.icu.lang.UScript; 26import android.icu.text.Normalizer; 27import android.icu.text.Transliterator; 28import android.icu.text.UTF16; 29import android.icu.text.UnicodeSet; 30import android.icu.text.UnicodeSetIterator; 31 32public class WriteCharts { 33 public static void main(String[] args) throws IOException { 34 if (false) { 35 printSet("[[\u0000-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]"); 36 } 37 String testSet = ""; 38 if (args.length == 0) args = getAllScripts(); 39 for (int i = 0; i < args.length; ++i) { 40 // Enumeration enum = Transliterator.getAvailableIDs(); 41 if (args[i].startsWith("[")) { 42 testSet = args[i]; 43 } else { 44 print(testSet, args[i]); 45 testSet = ""; 46 } 47 } 48 } 49 50 public static void printSet(String source) { 51 UnicodeSet s = new UnicodeSet(source); 52 System.out.println("Printout for '" + source + "'"); 53 int count = s.getRangeCount(); 54 for (int i = 0; i < count; ++i) { 55 int start = s.getRangeStart(i); 56 int end = s.getRangeEnd(i); 57 System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16)); 58 } 59 } 60 61 public static String[] getAllScripts() { 62 Set set = new TreeSet(); 63 int scripts[]; 64 Enumeration sources = Transliterator.getAvailableSources(); 65 while(sources.hasMoreElements()) { 66 String source = (String) sources.nextElement(); 67 scripts = UScript.getCode(source); 68 if (scripts == null) { 69 System.out.println("[Skipping " + source + "]"); 70 continue; 71 } 72 int sourceScript = scripts[0]; 73 System.out.println("Source: " + source + ";\tScripts: " + showScripts(scripts)); 74 Enumeration targets = Transliterator.getAvailableTargets(source); 75 while(targets.hasMoreElements()) { 76 String target = (String) targets.nextElement(); 77 scripts = UScript.getCode(target); 78 if (scripts == null 79 || priority(scripts[0]) < priority(sourceScript)) { 80 // skip doing both directions 81 System.out.println("[Skipping '" + source + "-" + target + "']"); 82 continue; 83 } 84 System.out.println("\tTarget: " + target + ";\tScripts: " + showScripts(scripts)); 85 Enumeration variants = Transliterator.getAvailableVariants(source, target); 86 while(variants.hasMoreElements()) { 87 String variant = (String) variants.nextElement(); 88 String id = source + "-" + target; 89 if (variant.length() != 0) { 90 id += "/" + variant; 91 if (false) { 92 System.out.println("SKIPPING VARIANT, SINCE IT CURRENTLY BREAKS!\t" + id); 93 continue; 94 } 95 } 96 System.out.println("\t\t\t\tAdding: '" + id + "'"); 97 set.add(id); 98 } 99 } 100 } 101 String[] results = new String[set.size()]; 102 set.toArray(results); 103 return results; 104 } 105 106 static public int priority(int script) { 107 if (script == UScript.LATIN) return -2; 108 return script; 109 } 110 111 public static String showScripts(int[] scripts) { 112 StringBuffer results = new StringBuffer(); 113 for (int i = 0; i < scripts.length; ++i) { 114 if (i != 0) results.append(", "); 115 results.append(UScript.getName(scripts[i])); 116 } 117 return results.toString(); 118 } 119 120 public static void print(String testSet, String rawId) throws IOException { 121 System.out.println("Processing " + rawId); 122 Transliterator t = Transliterator.getInstance(rawId); 123 String id = t.getID(); 124 125 // clean up IDs. Ought to be API for getting source, target, variant 126 int minusPos = id.indexOf('-'); 127 String source = id.substring(0,minusPos); 128 String target = id.substring(minusPos+1); 129 int slashPos = target.indexOf('/'); 130 if (slashPos >= 0) target = target.substring(0,slashPos); 131 132 // check that the source is a script 133 if (testSet.equals("")) { 134 int[] scripts = UScript.getCode(source); 135 if (scripts == null) { 136 System.out.println("FAILED: " 137 + Transliterator.getDisplayName(id) 138 + " does not have a script as the source"); 139 return; 140 } else { 141 testSet = "[:" + source + ":]"; 142 if (source.equalsIgnoreCase("katakana")) { 143 testSet = "[" + testSet + "\u30FC]"; 144 printSet(testSet); 145 } 146 } 147 } 148 UnicodeSet sourceSet = new UnicodeSet(testSet); 149 150 // check that the target is a script 151 int[] scripts = UScript.getCode(target); 152 if (scripts == null) { 153 target = "[:Latin:]"; 154 } else { 155 target = "[:" + target + ":]"; 156 } 157 UnicodeSet targetSet = new UnicodeSet(target); 158 159 Transliterator inverse = t.getInverse(); 160 161 //Transliterator hex = Transliterator.getInstance("Any-Hex"); 162 163 164 // iterate through script 165 System.out.println("Transliterating " + sourceSet.toPattern(true) 166 + " with " + Transliterator.getDisplayName(id)); 167 168 UnicodeSet leftOverSet = new UnicodeSet(targetSet); 169 UnicodeSet privateUse = new UnicodeSet("[:private use:]"); 170 171 Map map = new TreeMap(); 172 173 UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet); 174 targetSetPlusAnyways.addAll(okAnyway); 175 176 UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet); 177 sourceSetPlusAnyways.addAll(okAnyway); 178 179 UnicodeSetIterator usi = new UnicodeSetIterator(sourceSet); 180 181 while (usi.next()) { 182 int j = usi.codepoint; 183 /* 184 int count = sourceSet.getRangeCount(); 185 for (int i = 0; i < count; ++i) { 186 int end = sourceSet.getRangeEnd(i); 187 for (int j = sourceSet.getRangeStart(i); j <= end; ++j) { 188 */ 189 // String flag = ""; 190 String ss = UTF16.valueOf(j); 191 String ts = t.transliterate(ss); 192 char group = 0; 193 if (!targetSetPlusAnyways.containsAll(ts)) { 194 group |= 1; 195 } 196 if (UTF16.countCodePoint(ts) == 1) { 197 leftOverSet.remove(UTF16.charAt(ts,0)); 198 } 199 String rt = inverse.transliterate(ts); 200 if (!sourceSetPlusAnyways.containsAll(rt)) { 201 group |= 2; 202 } else if (!ss.equals(rt)) { 203 group |= 4; 204 } 205 206 if (!privateUse.containsNone(ts) || !privateUse.containsNone(rt)) { 207 group |= 16; 208 } 209 210 map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.NFKD)) 211 + "\u0000" + ss, 212 "<td class='s'>" + ss + "<br><tt>" + hex(ss) 213 + "</tt></td><td class='t'>" + ts + "<br><tt>" + hex(ts) 214 + "</tt></td><td class='r'>" + rt + "<br><tt>" + hex(rt) + "</tt></td>" ); 215 216 // Check Duals 217 /* 218 int maxDual = 200; 219 dual: 220 for (int i2 = 0; i2 < count; ++i2) { 221 int end2 = sourceSet.getRangeEnd(i2); 222 for (int j2 = sourceSet.getRangeStart(i2); j2 <= end; ++j2) { 223 String ss2 = UTF16.valueOf(j2); 224 String ts2 = t.transliterate(ss2); 225 String rt2 = inverse.transliterate(ts2); 226 227 String ss12 = ss + ss2; 228 String ts12 = t.transliterate(ss + ss12); 229 String rt12 = inverse.transliterate(ts12); 230 if (ts12.equals(ts + ts2) && rt12.equals(rt + rt2)) continue; 231 if (--maxDual < 0) break dual; 232 233 // transliteration of whole differs from that of parts 234 group = 0x100; 235 map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss12, Normalizer.DECOMP_COMPAT, 0)) 236 + "\u0000" + ss12, 237 "<td class='s'>" + ss12 + "<br><tt>" + hex(ss12) 238 + "</tt></td><td class='t'>" + ts12 + "<br><tt>" + hex(ts12) 239 + "</tt></td><td class='r'>" + rt12 + "<br><tt>" + hex(rt12) + "</tt></td>" ); 240 } 241 } 242 */ 243 //} 244 } 245 246 247 leftOverSet.remove(0x0100,0x02FF); // remove extended & IPA 248 249 /*int count = leftOverSet.getRangeCount(); 250 for (int i = 0; i < count; ++i) { 251 int end = leftOverSet.getRangeEnd(i); 252 for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) { 253 */ 254 255 usi.reset(leftOverSet); 256 while (usi.next()) { 257 int j = usi.codepoint; 258 259 String ts = UTF16.valueOf(j); 260 // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0); 261 // if (!decomp.equals(ts)) continue; 262 263 String rt = inverse.transliterate(ts); 264 // String flag = ""; 265 char group = 0x80; 266 267 if (!sourceSetPlusAnyways.containsAll(rt)) { 268 group |= 8; 269 } 270 if (!privateUse.containsNone(rt)) { 271 group |= 16; 272 } 273 274 map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.NFKD)) + ts, 275 "<td class='s'>-</td><td class='t'>" + ts + "<br><tt>" + hex(ts) 276 + "</tt></td><td class='r'>" 277 + rt + "<br><tt>" + hex(rt) + "</tt></td>"); 278 //} 279 } 280 281 // make file name and open 282 File f = new File("transliteration/chart_" + id.replace('/', '_') + ".html"); 283 String filename = f.getCanonicalFile().toString(); 284 PrintWriter out = new PrintWriter( 285 new OutputStreamWriter( 286 new FileOutputStream(filename), "UTF-8")); 287 //out.print('\uFEFF'); // BOM 288 289 System.out.println("Writing " + filename); 290 291 try { 292 out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">"); 293 out.println("<HTML><HEAD>"); 294 out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>"); 295 out.println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>"); 296 297 out.println("<BODY>"); 298 out.println("<h1>Transliteration Samples for '" + Transliterator.getDisplayName(id) + "'</h1>"); 299 out.println("<p>This file illustrates the transliterations of " + Transliterator.getDisplayName(id) + "."); 300 out.println("The samples are mechanically generated, and only include single characters"); 301 out.println("from the source set. Thus it will <i>not</i> contain examples where the transliteration"); 302 out.println("depends on the context around the character. For a more detailed -- and interactive -- example, see the"); 303 out.println("<a href='http://demo.icu-project.org/icu-bin/translit'>Transliteration Demo</a></p><hr>"); 304 305 // set up the headers 306 int columnCount = 3; 307 String headerBase = "<th>Source</th><th>Target</th><th>Return</th>"; 308 String headers = headerBase; 309 for (int i = columnCount - 1; i > 0; --i) { 310 if (i != columnCount - 1) headers += "<th> </th>"; 311 headers += headerBase; 312 } 313 314 String tableHeader = "<p><table border='1'><tr>" + headers + "</tr>"; 315 String tableFooter = "</table></p>"; 316 out.println("<h2>Round Trip</h2>"); 317 out.println(tableHeader); 318 319 Iterator it = map.keySet().iterator(); 320 char lastGroup = 0; 321 int count = 0; 322 int column = 0; 323 while (it.hasNext()) { 324 String key = (String) it.next(); 325 char group = key.charAt(0); 326 if (group != lastGroup || count++ > 50) { 327 lastGroup = group; 328 count = 0; 329 if (column != 0) { 330 out.println("</tr>"); 331 column = 0; 332 } 333 out.println(tableFooter); 334 335 // String title = ""; 336 if ((group & 0x100) != 0) out.println("<hr><h2>Duals</h2>"); 337 else if ((group & 0x80) != 0) out.println("<hr><h2>Completeness</h2>"); 338 else out.println("<hr><h2>Round Trip</h2>"); 339 if ((group & 16) != 0) out.println("<h3>Errors: Contains Private Use Characters</h3>"); 340 if ((group & 8) != 0) out.println("<h3>Possible Errors: Return not in Source Set</h3>"); 341 if ((group & 4) != 0) out.println("<h3>One-Way Mapping: Return not equal to Source</h3>"); 342 if ((group & 2) != 0) out.println("<h3>Errors: Return not in Source Set</h3>"); 343 if ((group & 1) != 0) out.println("<h3>Errors: Target not in Target Set</h3>"); 344 345 out.println(tableHeader); 346 column = 0; 347 } 348 String value = (String) map.get(key); 349 if (column++ == 0) out.print("<tr>"); 350 else out.print("<th> </th>"); 351 out.println(value); 352 if (column == 3) { 353 out.println("</tr>"); 354 column = 0; 355 } 356 } 357 if (column != 0) { 358 out.println("</tr>"); 359 column = 0; 360 } 361 out.println(tableFooter + "</BODY></HTML>"); 362 363 } finally { 364 out.close(); 365 } 366 } 367 368 public static String hex(String s) { 369 int cp; 370 StringBuffer results = new StringBuffer(); 371 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 372 cp = UTF16.charAt(s, i); 373 if (i != 0) results.append(' '); 374 results.append(Integer.toHexString(cp)); 375 } 376 return results.toString().toUpperCase(); 377 } 378 379 static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]"); 380 381 /* 382 // tests whether a string is in a set. Also checks for Common and Inherited 383 public static boolean isIn(String s, UnicodeSet set) { 384 int cp; 385 for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { 386 cp = UTF16.charAt(s, i); 387 if (set.contains(cp)) continue; 388 if (okAnyway.contains(cp)) continue; 389 return false; 390 } 391 return true; 392 } 393 */ 394 395} 396