1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/**
4 *******************************************************************************
5 * Copyright (C) 2001-2010, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 */
9package com.ibm.icu.dev.demo.translit;
10import java.util.Enumeration;
11import java.util.HashMap;
12import java.util.HashSet;
13import java.util.Iterator;
14import java.util.Set;
15import java.util.TreeSet;
16
17import com.ibm.icu.lang.UScript;
18import com.ibm.icu.text.Replaceable;
19import com.ibm.icu.text.Transliterator;
20import com.ibm.icu.text.UTF16;
21import com.ibm.icu.text.UnicodeFilter;
22
23public class AnyTransliterator extends Transliterator {
24
25    static final boolean DEBUG = false;
26    private String targetName;
27    private RunIterator it;
28    private Position run;
29
30
31    public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
32        super("Any-" + targetName, filter);
33        this.targetName = targetName;
34        this.it = it;
35        run = new Position();
36    }
37
38    public AnyTransliterator(String targetName, UnicodeFilter filter){
39        this(targetName, filter, new ScriptRunIterator());
40    }
41
42    static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
43
44    protected void handleTransliterate(Replaceable text,
45                                       Position offsets, boolean isIncremental) {
46        if (DEBUG) {
47            System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
48                + ", " + toString(offsets));
49        }
50        it.reset(text, offsets);
51
52        while (it.next(run)) {
53            if (targetName.equalsIgnoreCase(it.getName())) {
54                if (DEBUG) System.out.println("Skipping identical: " + targetName);
55                run.start = run.limit; // show we processed
56                continue; // skip if same
57            }
58
59            Transliterator t;
60            String id = it.getName() + '-' + targetName;
61            try {
62                t = Transliterator.getInstance(id);
63            } catch (IllegalArgumentException ex) {
64                if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
65                id = it.getName() + "-Latin; Latin-" + targetName;
66                try {
67                    t = Transliterator.getInstance(id);
68                } catch (IllegalArgumentException ex2) {
69                    if (DEBUG) System.out.println("Couldn't find: " + id);
70                    continue;
71                }
72            }
73            // TODO catch error later!!
74
75            if (DEBUG) {
76                System.out.println(t.getID());
77                System.out.println("input: " + hex.transliterate(text.toString())
78                 + ", " + toString(run));
79            }
80
81            if (isIncremental && it.atEnd()) {
82                t.transliterate(text, run);
83            } else {
84                t.finishTransliteration(text, run);
85            }
86            // adjust the offsets in line with the changes
87            it.adjust(run.limit);
88
89            if (DEBUG) {
90                System.out.println("output: " + hex.transliterate(text.toString())
91                 + ", " + toString(run));
92            }
93        }
94
95        // show how far we got!
96        it.getExpanse(offsets);
97        if (run.start == run.limit) offsets.start = offsets.limit;
98        else offsets.start = run.start;
99        if (DEBUG) {
100            System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
101            System.out.println();
102        }
103    }
104
105    // should be method on Position
106    public static String toString(Position offsets) {
107        return "[cs: " + offsets.contextStart
108                + ", s: " + offsets.start
109                + ", l: " + offsets.limit
110                + ", cl: " + offsets.contextLimit
111                + "]";
112    }
113
114    public interface RunIterator {
115        public void reset(Replaceable text, Position expanse);
116        public void getExpanse(Position run);
117        public void reset();
118        public boolean next(Position run);
119        public void getCurrent(Position run);
120        public String getName();
121        public void adjust(int newCurrentLimit);
122        public boolean atEnd();
123    }
124
125    /**
126     * Returns a series of ranges corresponding to scripts. They will be of the form:
127     * ccccSScSSccccTTcTcccc    - where c is common, S is the first script and T is the second
128     *|            |            - first run
129     *         |            |    - second run
130     * That is, the runs will overlap. The reason for this is so that a transliterator can
131     * consider common characters both before and after the scripts.
132     * The only time that contextStart != start is for the first run
133     *    (the context is the start context of the entire expanse)
134     * The only time that contextLimit != limit is for the last run
135     *    (the context is the end context of the entire expanse)
136     */
137    public static class ScriptRunIterator implements RunIterator {
138        private Replaceable text;
139        private Position expanse = new Position();
140        private Position current = new Position();
141        private int script;
142        private boolean done = true;
143
144
145        public void reset(Replaceable repText, Position expansePos) {
146            set(this.expanse, expansePos);
147            this.text = repText;
148            reset();
149        }
150
151        public void reset() {
152            done = false;
153            //this.expanse = expanse;
154            script = UScript.INVALID_CODE;
155            // set up first range to be empty, at beginning
156            current.contextStart = expanse.contextStart;
157            current.start = current.limit = current.contextLimit = expanse.start;
158        }
159
160        public boolean next(Position run) {
161            if (done) return false;
162            if (DEBUG) {
163                System.out.println("+cs: " + current.contextStart
164                    + ", s: " + current.start
165                    + ", l: " + current.limit
166                    + ", cl: " + current.contextLimit);
167            }
168            // reset start context run to the last end
169            current.start = current.limit;
170
171            // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
172            int i, cp;
173            int limit = expanse.start;
174            for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
175                cp = text.char32At(i);
176                int scrpt = UScript.getScript(cp);
177                if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
178            }
179            current.start = i;
180            current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
181
182            // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
183            int lastScript = UScript.COMMON;
184            //int veryLastScript = UScript.COMMON;
185            limit = expanse.limit;
186            for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
187                cp = text.char32At(i);
188                int scrpt = UScript.getScript(cp);
189                if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
190                if (scrpt != UScript.COMMON) {
191                    // if we find a real script:
192                    //   if we already had a script, bail
193                    //   otherwise set our script
194                    if (lastScript == UScript.COMMON) lastScript = scrpt;
195                    else if (lastScript != scrpt) break;
196                }
197            }
198            current.limit = i;
199            current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
200            done = (i == limit);
201            script = lastScript;
202
203            if (DEBUG) {
204                System.out.println("-cs: " + current.contextStart
205                    + ", s: " + current.start
206                    + ", l: " + current.limit
207                    + ", cl: " + current.contextLimit);
208            }
209
210            set(run, current);
211            return true;
212        }
213
214        // SHOULD BE METHOD ON POSITION
215        public static void set(Position run, Position current) {
216            run.contextStart = current.contextStart;
217            run.start = current.start;
218            run.limit = current.limit;
219            run.contextLimit = current.contextLimit;
220        }
221
222        public boolean atEnd() {
223            return current.limit == expanse.limit;
224        }
225
226        public void getCurrent(Position run) {
227            set(run, current);
228        }
229
230        public void getExpanse(Position run) {
231            set(run, expanse);
232        }
233
234        public String getName() {
235            return UScript.getName(script);
236        }
237
238        public void adjust(int newCurrentLimit) {
239            if (expanse == null) {
240                throw new IllegalArgumentException("Must reset() before calling");
241            }
242            int delta = newCurrentLimit - current.limit;
243            current.limit += delta;
244            current.contextLimit += delta;
245            expanse.limit += delta;
246            expanse.contextLimit += delta;
247        }
248
249        // register Any-Script for every script.
250
251        private static Set scriptList = new HashSet();
252
253        public static void registerAnyToScript() {
254            synchronized (scriptList) {
255                Enumeration sources = Transliterator.getAvailableSources();
256                while(sources.hasMoreElements()) {
257                    String source = (String) sources.nextElement();
258                    if (source.equals("Any")) continue; // to keep from looping
259
260                    Enumeration targets = Transliterator.getAvailableTargets(source);
261                    while(targets.hasMoreElements()) {
262                        String target = (String) targets.nextElement();
263                        if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
264                        if (scriptList.contains(target)) continue; // already encountered
265                        scriptList.add(target); // otherwise add for later testing
266
267                        Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
268                        if (variantSet.size() < 2) {
269                            AnyTransliterator at = new AnyTransliterator(target, null);
270                            DummyFactory.add(at.getID(), at);
271                        } else {
272                            Iterator variants = variantSet.iterator();
273                            while(variants.hasNext()) {
274                                String variant = (String) variants.next();
275                                AnyTransliterator at = new AnyTransliterator(
276                                    (variant.length() > 0) ? target + "/" + variant : target, null);
277                                DummyFactory.add(at.getID(), at);
278                            }
279                        }
280                    }
281                }
282            }
283        }
284
285        static class DummyFactory implements Transliterator.Factory {
286            static DummyFactory singleton = new DummyFactory();
287            static HashMap m = new HashMap();
288
289            // Since Transliterators are immutable, we don't have to clone on set & get
290            static void add(String ID, Transliterator t) {
291                m.put(ID, t);
292                System.out.println("Registering: " + ID + ", " + t.toRules(true));
293                Transliterator.registerFactory(ID, singleton);
294            }
295            public Transliterator getInstance(String ID) {
296                return (Transliterator) m.get(ID);
297            }
298        }
299
300        // Nice little Utility for converting Enumeration to collection
301        static Set add(Set s, Enumeration enumeration) {
302            while(enumeration.hasMoreElements()) {
303                s.add(enumeration.nextElement());
304            }
305            return s;
306        }
307
308
309    }
310}
311