1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2010-2011, Google, International Business Machines            *
6 * Corporation and others. All Rights Reserved.                                *
7 *******************************************************************************
8 */
9package com.ibm.icu.text;
10
11import java.util.HashSet;
12import java.util.Set;
13
14import com.ibm.icu.lang.CharSequences;
15
16/**
17 * Simple internal utility class for helping with getSource/TargetSet
18 */
19class SourceTargetUtility {
20    final Transform<String, String> transform;
21    final UnicodeSet sourceCache;
22    final Set<String> sourceStrings;
23    static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
24    static Normalizer2 NFC = Normalizer2.getNFCInstance();
25    //static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();
26
27    public SourceTargetUtility(Transform<String, String> transform) {
28        this(transform, null);
29    }
30
31    public SourceTargetUtility(Transform<String, String> transform, Normalizer2 normalizer) {
32        this.transform = transform;
33        if (normalizer != null) {
34//            synchronized (SourceTargetUtility.class) {
35//                if (NFC == null) {
36//                    NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
37//                    for (int i = 0; i <= 0x10FFFF; ++i) {
38//                        String d = NFC.getDecomposition(i);
39//                        if (d == null) {
40//                            continue;
41//                        }
42//                        String s = NFC.normalize(d);
43//                        if (!CharSequences.equals(i, s)) {
44//                            continue;
45//                        }
46//                        // composes
47//                        boolean first = false;
48//                        for (int trailing : CharSequences.codePoints(d)) {
49//                            if (first) {
50//                                first = false;
51//                            } else {
52//                                TRAILING_COMBINING.add(trailing);
53//                            }
54//                        }
55//                    }
56//                }
57//            }
58            sourceCache = new UnicodeSet("[:^ccc=0:]");
59        } else {
60            sourceCache = new UnicodeSet();
61        }
62        sourceStrings = new HashSet<String>();
63        for (int i = 0; i <= 0x10FFFF; ++i) {
64            String s = transform.transform(UTF16.valueOf(i));
65            boolean added = false;
66            if (!CharSequences.equals(i, s)) {
67                sourceCache.add(i);
68                added = true;
69            }
70            if (normalizer == null) {
71                continue;
72            }
73            String d = NFC.getDecomposition(i);
74            if (d == null) {
75                continue;
76            }
77            s = transform.transform(d);
78            if (!d.equals(s)) {
79                sourceStrings.add(d);
80            }
81            if (added) {
82                continue;
83            }
84            if (!normalizer.isInert(i)) {
85                sourceCache.add(i);
86                continue;
87            }
88            // see if any of the non-starters change s; if so, add i
89//            for (String ns : TRAILING_COMBINING) {
90//                String s2 = transform.transform(s + ns);
91//                if (!s2.startsWith(s)) {
92//                    sourceCache.add(i);
93//                    break;
94//                }
95//            }
96
97            // int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
98            // if (endOfFirst >= d.length()) {
99            // continue;
100            // }
101            // // now add all initial substrings
102            // for (int j = 1; j < d.length(); ++j) {
103            // if (!CharSequences.onCharacterBoundary(d, j)) {
104            // continue;
105            // }
106            // String dd = d.substring(0,j);
107            // s = transform.transform(dd);
108            // if (!dd.equals(s)) {
109            // sourceStrings.add(dd);
110            // }
111            // }
112        }
113        sourceCache.freeze();
114    }
115
116    public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
117            UnicodeSet targetSet) {
118        UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter);
119        UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter);
120        sourceSet.addAll(affectedCharacters);
121        for (String s : affectedCharacters) {
122            targetSet.addAll(transform.transform(s));
123        }
124        for (String s : sourceStrings) {
125            if (myFilter.containsAll(s)) {
126                String t = transform.transform(s);
127                if (!s.equals(t)) {
128                    targetSet.addAll(t);
129                    sourceSet.addAll(s);
130                }
131            }
132        }
133    }
134}
135