1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/** 4 ******************************************************************************* 5 * Copyright (C) 2001-2010, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9package com.ibm.icu.dev.demo.translit; 10import java.util.Enumeration; 11import java.util.HashMap; 12import java.util.HashSet; 13import java.util.Iterator; 14import java.util.Set; 15import java.util.TreeSet; 16 17import com.ibm.icu.lang.UScript; 18import com.ibm.icu.text.Replaceable; 19import com.ibm.icu.text.Transliterator; 20import com.ibm.icu.text.UTF16; 21import com.ibm.icu.text.UnicodeFilter; 22 23public class AnyTransliterator extends Transliterator { 24 25 static final boolean DEBUG = false; 26 private String targetName; 27 private RunIterator it; 28 private Position run; 29 30 31 public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){ 32 super("Any-" + targetName, filter); 33 this.targetName = targetName; 34 this.it = it; 35 run = new Position(); 36 } 37 38 public AnyTransliterator(String targetName, UnicodeFilter filter){ 39 this(targetName, filter, new ScriptRunIterator()); 40 } 41 42 static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex"); 43 44 protected void handleTransliterate(Replaceable text, 45 Position offsets, boolean isIncremental) { 46 if (DEBUG) { 47 System.out.println("- handleTransliterate " + hex.transliterate(text.toString()) 48 + ", " + toString(offsets)); 49 } 50 it.reset(text, offsets); 51 52 while (it.next(run)) { 53 if (targetName.equalsIgnoreCase(it.getName())) { 54 if (DEBUG) System.out.println("Skipping identical: " + targetName); 55 run.start = run.limit; // show we processed 56 continue; // skip if same 57 } 58 59 Transliterator t; 60 String id = it.getName() + '-' + targetName; 61 try { 62 t = Transliterator.getInstance(id); 63 } catch (IllegalArgumentException ex) { 64 if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot"); 65 id = it.getName() + "-Latin; Latin-" + targetName; 66 try { 67 t = Transliterator.getInstance(id); 68 } catch (IllegalArgumentException ex2) { 69 if (DEBUG) System.out.println("Couldn't find: " + id); 70 continue; 71 } 72 } 73 // TODO catch error later!! 74 75 if (DEBUG) { 76 System.out.println(t.getID()); 77 System.out.println("input: " + hex.transliterate(text.toString()) 78 + ", " + toString(run)); 79 } 80 81 if (isIncremental && it.atEnd()) { 82 t.transliterate(text, run); 83 } else { 84 t.finishTransliteration(text, run); 85 } 86 // adjust the offsets in line with the changes 87 it.adjust(run.limit); 88 89 if (DEBUG) { 90 System.out.println("output: " + hex.transliterate(text.toString()) 91 + ", " + toString(run)); 92 } 93 } 94 95 // show how far we got! 96 it.getExpanse(offsets); 97 if (run.start == run.limit) offsets.start = offsets.limit; 98 else offsets.start = run.start; 99 if (DEBUG) { 100 System.out.println("+ handleTransliterate: " + ", " + toString(offsets)); 101 System.out.println(); 102 } 103 } 104 105 // should be method on Position 106 public static String toString(Position offsets) { 107 return "[cs: " + offsets.contextStart 108 + ", s: " + offsets.start 109 + ", l: " + offsets.limit 110 + ", cl: " + offsets.contextLimit 111 + "]"; 112 } 113 114 public interface RunIterator { 115 public void reset(Replaceable text, Position expanse); 116 public void getExpanse(Position run); 117 public void reset(); 118 public boolean next(Position run); 119 public void getCurrent(Position run); 120 public String getName(); 121 public void adjust(int newCurrentLimit); 122 public boolean atEnd(); 123 } 124 125 /** 126 * Returns a series of ranges corresponding to scripts. They will be of the form: 127 * ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second 128 *| | - first run 129 * | | - second run 130 * That is, the runs will overlap. The reason for this is so that a transliterator can 131 * consider common characters both before and after the scripts. 132 * The only time that contextStart != start is for the first run 133 * (the context is the start context of the entire expanse) 134 * The only time that contextLimit != limit is for the last run 135 * (the context is the end context of the entire expanse) 136 */ 137 public static class ScriptRunIterator implements RunIterator { 138 private Replaceable text; 139 private Position expanse = new Position(); 140 private Position current = new Position(); 141 private int script; 142 private boolean done = true; 143 144 145 public void reset(Replaceable repText, Position expansePos) { 146 set(this.expanse, expansePos); 147 this.text = repText; 148 reset(); 149 } 150 151 public void reset() { 152 done = false; 153 //this.expanse = expanse; 154 script = UScript.INVALID_CODE; 155 // set up first range to be empty, at beginning 156 current.contextStart = expanse.contextStart; 157 current.start = current.limit = current.contextLimit = expanse.start; 158 } 159 160 public boolean next(Position run) { 161 if (done) return false; 162 if (DEBUG) { 163 System.out.println("+cs: " + current.contextStart 164 + ", s: " + current.start 165 + ", l: " + current.limit 166 + ", cl: " + current.contextLimit); 167 } 168 // reset start context run to the last end 169 current.start = current.limit; 170 171 // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script. 172 int i, cp; 173 int limit = expanse.start; 174 for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) { 175 cp = text.char32At(i); 176 int scrpt = UScript.getScript(cp); 177 if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break; 178 } 179 current.start = i; 180 current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start 181 182 // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit 183 int lastScript = UScript.COMMON; 184 //int veryLastScript = UScript.COMMON; 185 limit = expanse.limit; 186 for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) { 187 cp = text.char32At(i); 188 int scrpt = UScript.getScript(cp); 189 if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON; 190 if (scrpt != UScript.COMMON) { 191 // if we find a real script: 192 // if we already had a script, bail 193 // otherwise set our script 194 if (lastScript == UScript.COMMON) lastScript = scrpt; 195 else if (lastScript != scrpt) break; 196 } 197 } 198 current.limit = i; 199 current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end 200 done = (i == limit); 201 script = lastScript; 202 203 if (DEBUG) { 204 System.out.println("-cs: " + current.contextStart 205 + ", s: " + current.start 206 + ", l: " + current.limit 207 + ", cl: " + current.contextLimit); 208 } 209 210 set(run, current); 211 return true; 212 } 213 214 // SHOULD BE METHOD ON POSITION 215 public static void set(Position run, Position current) { 216 run.contextStart = current.contextStart; 217 run.start = current.start; 218 run.limit = current.limit; 219 run.contextLimit = current.contextLimit; 220 } 221 222 public boolean atEnd() { 223 return current.limit == expanse.limit; 224 } 225 226 public void getCurrent(Position run) { 227 set(run, current); 228 } 229 230 public void getExpanse(Position run) { 231 set(run, expanse); 232 } 233 234 public String getName() { 235 return UScript.getName(script); 236 } 237 238 public void adjust(int newCurrentLimit) { 239 if (expanse == null) { 240 throw new IllegalArgumentException("Must reset() before calling"); 241 } 242 int delta = newCurrentLimit - current.limit; 243 current.limit += delta; 244 current.contextLimit += delta; 245 expanse.limit += delta; 246 expanse.contextLimit += delta; 247 } 248 249 // register Any-Script for every script. 250 251 private static Set scriptList = new HashSet(); 252 253 public static void registerAnyToScript() { 254 synchronized (scriptList) { 255 Enumeration sources = Transliterator.getAvailableSources(); 256 while(sources.hasMoreElements()) { 257 String source = (String) sources.nextElement(); 258 if (source.equals("Any")) continue; // to keep from looping 259 260 Enumeration targets = Transliterator.getAvailableTargets(source); 261 while(targets.hasMoreElements()) { 262 String target = (String) targets.nextElement(); 263 if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale) 264 if (scriptList.contains(target)) continue; // already encountered 265 scriptList.add(target); // otherwise add for later testing 266 267 Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target)); 268 if (variantSet.size() < 2) { 269 AnyTransliterator at = new AnyTransliterator(target, null); 270 DummyFactory.add(at.getID(), at); 271 } else { 272 Iterator variants = variantSet.iterator(); 273 while(variants.hasNext()) { 274 String variant = (String) variants.next(); 275 AnyTransliterator at = new AnyTransliterator( 276 (variant.length() > 0) ? target + "/" + variant : target, null); 277 DummyFactory.add(at.getID(), at); 278 } 279 } 280 } 281 } 282 } 283 } 284 285 static class DummyFactory implements Transliterator.Factory { 286 static DummyFactory singleton = new DummyFactory(); 287 static HashMap m = new HashMap(); 288 289 // Since Transliterators are immutable, we don't have to clone on set & get 290 static void add(String ID, Transliterator t) { 291 m.put(ID, t); 292 System.out.println("Registering: " + ID + ", " + t.toRules(true)); 293 Transliterator.registerFactory(ID, singleton); 294 } 295 public Transliterator getInstance(String ID) { 296 return (Transliterator) m.get(ID); 297 } 298 } 299 300 // Nice little Utility for converting Enumeration to collection 301 static Set add(Set s, Enumeration enumeration) { 302 while(enumeration.hasMoreElements()) { 303 s.add(enumeration.nextElement()); 304 } 305 return s; 306 } 307 308 309 } 310} 311