1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5*******************************************************************************
6*   Copyright (C) 2009-2014, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*******************************************************************************
9*/
10package android.icu.text;
11
12import java.io.IOException;
13
14import android.icu.util.ICUUncheckedIOException;
15
16/**
17 * Normalization filtered by a UnicodeSet.
18 * Normalizes portions of the text contained in the filter set and leaves
19 * portions not contained in the filter set unchanged.
20 * Filtering is done via UnicodeSet.span(..., UnicodeSet.SpanCondition.SIMPLE).
21 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
22 * This class implements all of (and only) the Normalizer2 API.
23 * An instance of this class is unmodifiable/immutable.
24 * @author Markus W. Scherer
25 * @hide Only a subset of ICU is exposed in Android
26 */
27public class FilteredNormalizer2 extends Normalizer2 {
28    /**
29     * Constructs a filtered normalizer wrapping any Normalizer2 instance
30     * and a filter set.
31     * Both are aliased and must not be modified or deleted while this object
32     * is used.
33     * The filter set should be frozen; otherwise the performance will suffer greatly.
34     * @param n2 wrapped Normalizer2 instance
35     * @param filterSet UnicodeSet which determines the characters to be normalized
36     */
37    public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) {
38        norm2=n2;
39        set=filterSet;
40    }
41
42    /**
43     * {@inheritDoc}
44     */
45    @Override
46    public StringBuilder normalize(CharSequence src, StringBuilder dest) {
47        if(dest==src) {
48            throw new IllegalArgumentException();
49        }
50        dest.setLength(0);
51        normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
52        return dest;
53    }
54    /**
55     * {@inheritDoc}
56     */
57    @Override
58    public Appendable normalize(CharSequence src, Appendable dest) {
59        if(dest==src) {
60            throw new IllegalArgumentException();
61        }
62        return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
63    }
64
65    /**
66     * {@inheritDoc}
67     */
68    @Override
69    public StringBuilder normalizeSecondAndAppend(
70            StringBuilder first, CharSequence second) {
71        return normalizeSecondAndAppend(first, second, true);
72    }
73    /**
74     * {@inheritDoc}
75     */
76    @Override
77    public StringBuilder append(StringBuilder first, CharSequence second) {
78        return normalizeSecondAndAppend(first, second, false);
79    }
80
81    /**
82     * {@inheritDoc}
83     */
84    @Override
85    public String getDecomposition(int c) {
86        return set.contains(c) ? norm2.getDecomposition(c) : null;
87    }
88
89    /**
90     * {@inheritDoc}
91     */
92    @Override
93    public String getRawDecomposition(int c) {
94        return set.contains(c) ? norm2.getRawDecomposition(c) : null;
95    }
96
97    /**
98     * {@inheritDoc}
99     */
100    @Override
101    public int composePair(int a, int b) {
102        return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : -1;
103    }
104
105    /**
106     * {@inheritDoc}
107     */
108    @Override
109    public int getCombiningClass(int c) {
110        return set.contains(c) ? norm2.getCombiningClass(c) : 0;
111    }
112
113    /**
114     * {@inheritDoc}
115     */
116    @Override
117    public boolean isNormalized(CharSequence s) {
118        UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
119        for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
120            int spanLimit=set.span(s, prevSpanLimit, spanCondition);
121            if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
122                spanCondition=UnicodeSet.SpanCondition.SIMPLE;
123            } else {
124                if(!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) {
125                    return false;
126                }
127                spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
128            }
129            prevSpanLimit=spanLimit;
130        }
131        return true;
132    }
133
134    /**
135     * {@inheritDoc}
136     */
137    @Override
138    public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
139        Normalizer.QuickCheckResult result=Normalizer.YES;
140        UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
141        for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
142            int spanLimit=set.span(s, prevSpanLimit, spanCondition);
143            if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
144                spanCondition=UnicodeSet.SpanCondition.SIMPLE;
145            } else {
146                Normalizer.QuickCheckResult qcResult=
147                    norm2.quickCheck(s.subSequence(prevSpanLimit, spanLimit));
148                if(qcResult==Normalizer.NO) {
149                    return qcResult;
150                } else if(qcResult==Normalizer.MAYBE) {
151                    result=qcResult;
152                }
153                spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
154            }
155            prevSpanLimit=spanLimit;
156        }
157        return result;
158    }
159    /**
160     * {@inheritDoc}
161     */
162    @Override
163    public int spanQuickCheckYes(CharSequence s) {
164        UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
165        for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
166            int spanLimit=set.span(s, prevSpanLimit, spanCondition);
167            if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
168                spanCondition=UnicodeSet.SpanCondition.SIMPLE;
169            } else {
170                int yesLimit=
171                    prevSpanLimit+
172                    norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit));
173                if(yesLimit<spanLimit) {
174                    return yesLimit;
175                }
176                spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
177            }
178            prevSpanLimit=spanLimit;
179        }
180        return s.length();
181    }
182
183    /**
184     * {@inheritDoc}
185     */
186    @Override
187    public boolean hasBoundaryBefore(int c) {
188        return !set.contains(c) || norm2.hasBoundaryBefore(c);
189    }
190
191    /**
192     * {@inheritDoc}
193     */
194    @Override
195    public boolean hasBoundaryAfter(int c) {
196        return !set.contains(c) || norm2.hasBoundaryAfter(c);
197    }
198
199    /**
200     * {@inheritDoc}
201     */
202    @Override
203    public boolean isInert(int c) {
204        return !set.contains(c) || norm2.isInert(c);
205    }
206
207    // Internal: No argument checking, and appends to dest.
208    // Pass as input spanCondition the one that is likely to yield a non-zero
209    // span length at the start of src.
210    // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
211    // UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src
212    // and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue after
213    // an in-filter prefix.
214    private Appendable normalize(CharSequence src, Appendable dest,
215                                 UnicodeSet.SpanCondition spanCondition) {
216        // Don't throw away destination buffer between iterations.
217        StringBuilder tempDest=new StringBuilder();
218        try {
219            for(int prevSpanLimit=0; prevSpanLimit<src.length();) {
220                int spanLimit=set.span(src, prevSpanLimit, spanCondition);
221                int spanLength=spanLimit-prevSpanLimit;
222                if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
223                    if(spanLength!=0) {
224                        dest.append(src, prevSpanLimit, spanLimit);
225                    }
226                    spanCondition=UnicodeSet.SpanCondition.SIMPLE;
227                } else {
228                    if(spanLength!=0) {
229                        // Not norm2.normalizeSecondAndAppend() because we do not want
230                        // to modify the non-filter part of dest.
231                        dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest));
232                    }
233                    spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
234                }
235                prevSpanLimit=spanLimit;
236            }
237        } catch(IOException e) {
238            throw new ICUUncheckedIOException(e);
239        }
240        return dest;
241    }
242
243    private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second,
244                                                   boolean doNormalize) {
245        if(first==second) {
246            throw new IllegalArgumentException();
247        }
248        if(first.length()==0) {
249            if(doNormalize) {
250                return normalize(second, first);
251            } else {
252                return first.append(second);
253            }
254        }
255        // merge the in-filter suffix of the first string with the in-filter prefix of the second
256        int prefixLimit=set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE);
257        if(prefixLimit!=0) {
258            CharSequence prefix=second.subSequence(0, prefixLimit);
259            int suffixStart=set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE);
260            if(suffixStart==0) {
261                if(doNormalize) {
262                    norm2.normalizeSecondAndAppend(first, prefix);
263                } else {
264                    norm2.append(first, prefix);
265                }
266            } else {
267                StringBuilder middle=new StringBuilder(
268                        first.subSequence(suffixStart, first.length()));
269                if(doNormalize) {
270                    norm2.normalizeSecondAndAppend(middle, prefix);
271                } else {
272                    norm2.append(middle, prefix);
273                }
274                first.delete(suffixStart, 0x7fffffff).append(middle);
275            }
276        }
277        if(prefixLimit<second.length()) {
278            CharSequence rest=second.subSequence(prefixLimit, second.length());
279            if(doNormalize) {
280                normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED);
281            } else {
282                first.append(rest);
283            }
284        }
285        return first;
286    }
287
288    private Normalizer2 norm2;
289    private UnicodeSet set;
290};
291