1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5*******************************************************************************
6* Copyright (C) 2012-2014, International Business Machines
7* Corporation and others.  All Rights Reserved.
8*******************************************************************************
9* FCDIterCollationIterator.java, ported from uitercollationiterator.h/.cpp
10*
11* C++ version created on: 2012sep23 (from utf16collationiterator.h)
12* created by: Markus W. Scherer
13*/
14
15package android.icu.impl.coll;
16
17import android.icu.impl.Normalizer2Impl;
18import android.icu.text.UCharacterIterator;
19
20/**
21 * Incrementally checks the input text for FCD and normalizes where necessary.
22 * @hide Only a subset of ICU is exposed in Android
23 */
24public final class FCDIterCollationIterator extends IterCollationIterator {
25    public FCDIterCollationIterator(CollationData data, boolean numeric,
26            UCharacterIterator ui, int startIndex) {
27        super(data, numeric, ui);
28        state = State.ITER_CHECK_FWD;
29        start = startIndex;
30        nfcImpl = data.nfcImpl;
31    }
32
33    @Override
34    public void resetToOffset(int newOffset) {
35        super.resetToOffset(newOffset);
36        start = newOffset;
37        state = State.ITER_CHECK_FWD;
38    }
39
40    @Override
41    public int getOffset() {
42        if(state.compareTo(State.ITER_CHECK_BWD) <= 0) {
43            return iter.getIndex();
44        } else if(state == State.ITER_IN_FCD_SEGMENT) {
45            return pos;
46        } else if(pos == 0) {
47            return start;
48        } else {
49            return limit;
50        }
51    }
52
53    @Override
54    public int nextCodePoint() {
55        int c;
56        for(;;) {
57            if(state == State.ITER_CHECK_FWD) {
58                c = iter.next();
59                if(c < 0) {
60                    return c;
61                }
62                if(CollationFCD.hasTccc(c)) {
63                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
64                            CollationFCD.hasLccc(iter.current())) {
65                        iter.previous();
66                        if(!nextSegment()) {
67                            return Collation.SENTINEL_CP;
68                        }
69                        continue;
70                    }
71                }
72                if(isLeadSurrogate(c)) {
73                    int trail = iter.next();
74                    if(isTrailSurrogate(trail)) {
75                        return Character.toCodePoint((char)c, (char)trail);
76                    } else if(trail >= 0) {
77                        iter.previous();
78                    }
79                }
80                return c;
81            } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) {
82                c = iter.nextCodePoint();
83                pos += Character.charCount(c);
84                assert(c >= 0);
85                return c;
86            } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 &&
87                    pos != normalized.length()) {
88                c = normalized.codePointAt(pos);
89                pos += Character.charCount(c);
90                return c;
91            } else {
92                switchToForward();
93            }
94        }
95    }
96
97    @Override
98    public int previousCodePoint() {
99        int c;
100        for(;;) {
101            if(state == State.ITER_CHECK_BWD) {
102                c = iter.previous();
103                if(c < 0) {
104                    start = pos = 0;
105                    state = State.ITER_IN_FCD_SEGMENT;
106                    return Collation.SENTINEL_CP;
107                }
108                if(CollationFCD.hasLccc(c)) {
109                    int prev = Collation.SENTINEL_CP;
110                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
111                            CollationFCD.hasTccc(prev = iter.previous())) {
112                        iter.next();
113                        if(prev >= 0) {
114                            iter.next();
115                        }
116                        if(!previousSegment()) {
117                            return Collation.SENTINEL_CP;
118                        }
119                        continue;
120                    }
121                    // hasLccc(trail)=true for all trail surrogates
122                    if(isTrailSurrogate(c)) {
123                        if(prev < 0) {
124                            prev = iter.previous();
125                        }
126                        if(isLeadSurrogate(prev)) {
127                            return Character.toCodePoint((char)prev, (char)c);
128                        }
129                    }
130                    if(prev >= 0) {
131                        iter.next();
132                    }
133                }
134                return c;
135            } else if(state == State.ITER_IN_FCD_SEGMENT && pos != start) {
136                c = iter.previousCodePoint();
137                pos -= Character.charCount(c);
138                assert(c >= 0);
139                return c;
140            } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos != 0) {
141                c = normalized.codePointBefore(pos);
142                pos -= Character.charCount(c);
143                return c;
144            } else {
145                switchToBackward();
146            }
147        }
148    }
149
150    @Override
151    protected long handleNextCE32() {
152        int c;
153        for(;;) {
154            if(state == State.ITER_CHECK_FWD) {
155                c = iter.next();
156                if(c < 0) {
157                    return NO_CP_AND_CE32;
158                }
159                if(CollationFCD.hasTccc(c)) {
160                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
161                            CollationFCD.hasLccc(iter.current())) {
162                        iter.previous();
163                        if(!nextSegment()) {
164                            c = Collation.SENTINEL_CP;
165                            return Collation.FALLBACK_CE32;
166                        }
167                        continue;
168                    }
169                }
170                break;
171            } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) {
172                c = iter.next();
173                ++pos;
174                assert(c >= 0);
175                break;
176            } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 &&
177                    pos != normalized.length()) {
178                c = normalized.charAt(pos++);
179                break;
180            } else {
181                switchToForward();
182            }
183        }
184        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead((char)c));
185    }
186
187    @Override
188    protected char handleGetTrailSurrogate() {
189        if(state.compareTo(State.ITER_IN_FCD_SEGMENT) <= 0) {
190            int trail = iter.next();
191            if(isTrailSurrogate(trail)) {
192                if(state == State.ITER_IN_FCD_SEGMENT) { ++pos; }
193            } else if(trail >= 0) {
194                iter.previous();
195            }
196            return (char)trail;
197        } else {
198            assert(pos < normalized.length());
199            char trail;
200            if(Character.isLowSurrogate(trail = normalized.charAt(pos))) { ++pos; }
201            return trail;
202        }
203    }
204
205    @Override
206    protected void forwardNumCodePoints(int num) {
207        // Specify the class to avoid a virtual-function indirection.
208        // In Java, we would declare this class final.
209        while(num > 0 && nextCodePoint() >= 0) {
210            --num;
211        }
212    }
213
214    @Override
215    protected void backwardNumCodePoints(int num) {
216        // Specify the class to avoid a virtual-function indirection.
217        // In Java, we would declare this class final.
218        while(num > 0 && previousCodePoint() >= 0) {
219            --num;
220        }
221    }
222
223    /**
224     * Switches to forward checking if possible.
225     */
226    private void switchToForward() {
227        assert(state == State.ITER_CHECK_BWD ||
228                (state == State.ITER_IN_FCD_SEGMENT && pos == limit) ||
229                (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == normalized.length()));
230        if(state == State.ITER_CHECK_BWD) {
231            // Turn around from backward checking.
232            start = pos = iter.getIndex();
233            if(pos == limit) {
234                state = State.ITER_CHECK_FWD;  // Check forward.
235            } else {  // pos < limit
236                state = State.ITER_IN_FCD_SEGMENT;  // Stay in FCD segment.
237            }
238        } else {
239            // Reached the end of the FCD segment.
240            if(state == State.ITER_IN_FCD_SEGMENT) {
241                // The input text segment is FCD, extend it forward.
242            } else {
243                // The input text segment needed to be normalized.
244                // Switch to checking forward from it.
245                if(state == State.IN_NORM_ITER_AT_START) {
246                    iter.moveIndex(limit - start);
247                }
248                start = limit;
249            }
250            state = State.ITER_CHECK_FWD;
251        }
252    }
253
254    /**
255     * Extends the FCD text segment forward or normalizes around pos.
256     * @return true if success
257     */
258    private boolean nextSegment() {
259        assert(state == State.ITER_CHECK_FWD);
260        // The input text [start..(iter index)[ passes the FCD check.
261        pos = iter.getIndex();
262        // Collect the characters being checked, in case they need to be normalized.
263        if(s == null) {
264            s = new StringBuilder();
265        } else {
266            s.setLength(0);
267        }
268        int prevCC = 0;
269        for(;;) {
270            // Fetch the next character and its fcd16 value.
271            int c = iter.nextCodePoint();
272            if(c < 0) { break; }
273            int fcd16 = nfcImpl.getFCD16(c);
274            int leadCC = fcd16 >> 8;
275            if(leadCC == 0 && s.length() != 0) {
276                // FCD boundary before this character.
277                iter.previousCodePoint();
278                break;
279            }
280            s.appendCodePoint(c);
281            if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
282                // Fails FCD check. Find the next FCD boundary and normalize.
283                for(;;) {
284                    c = iter.nextCodePoint();
285                    if(c < 0) { break; }
286                    if(nfcImpl.getFCD16(c) <= 0xff) {
287                        iter.previousCodePoint();
288                        break;
289                    }
290                    s.appendCodePoint(c);
291                }
292                normalize(s);
293                start = pos;
294                limit = pos + s.length();
295                state = State.IN_NORM_ITER_AT_LIMIT;
296                pos = 0;
297                return true;
298            }
299            prevCC = fcd16 & 0xff;
300            if(prevCC == 0) {
301                // FCD boundary after the last character.
302                break;
303            }
304        }
305        limit = pos + s.length();
306        assert(pos != limit);
307        iter.moveIndex(-s.length());
308        state = State.ITER_IN_FCD_SEGMENT;
309        return true;
310    }
311
312    /**
313     * Switches to backward checking.
314     */
315    private void switchToBackward() {
316        assert(state == State.ITER_CHECK_FWD ||
317                (state == State.ITER_IN_FCD_SEGMENT && pos == start) ||
318                (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == 0));
319        if(state == State.ITER_CHECK_FWD) {
320            // Turn around from forward checking.
321            limit = pos = iter.getIndex();
322            if(pos == start) {
323                state = State.ITER_CHECK_BWD;  // Check backward.
324            } else {  // pos > start
325                state = State.ITER_IN_FCD_SEGMENT;  // Stay in FCD segment.
326            }
327        } else {
328            // Reached the start of the FCD segment.
329            if(state == State.ITER_IN_FCD_SEGMENT) {
330                // The input text segment is FCD, extend it backward.
331            } else {
332                // The input text segment needed to be normalized.
333                // Switch to checking backward from it.
334                if(state == State.IN_NORM_ITER_AT_LIMIT) {
335                    iter.moveIndex(start - limit);
336                }
337                limit = start;
338            }
339            state = State.ITER_CHECK_BWD;
340        }
341    }
342
343    /**
344     * Extends the FCD text segment backward or normalizes around pos.
345     * @return true if success
346     */
347    private boolean previousSegment() {
348        assert(state == State.ITER_CHECK_BWD);
349        // The input text [(iter index)..limit[ passes the FCD check.
350        pos = iter.getIndex();
351        // Collect the characters being checked, in case they need to be normalized.
352        if(s == null) {
353            s = new StringBuilder();
354        } else {
355            s.setLength(0);
356        }
357        int nextCC = 0;
358        for(;;) {
359            // Fetch the previous character and its fcd16 value.
360            int c = iter.previousCodePoint();
361            if(c < 0) { break; }
362            int fcd16 = nfcImpl.getFCD16(c);
363            int trailCC = fcd16 & 0xff;
364            if(trailCC == 0 && s.length() != 0) {
365                // FCD boundary after this character.
366                iter.nextCodePoint();
367                break;
368            }
369            s.appendCodePoint(c);
370            if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
371                                CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
372                // Fails FCD check. Find the previous FCD boundary and normalize.
373                while(fcd16 > 0xff) {
374                    c = iter.previousCodePoint();
375                    if(c < 0) { break; }
376                    fcd16 = nfcImpl.getFCD16(c);
377                    if(fcd16 == 0) {
378                        iter.nextCodePoint();
379                        break;
380                    }
381                    s.appendCodePoint(c);
382                }
383                s.reverse();
384                normalize(s);
385                limit = pos;
386                start = pos - s.length();
387                state = State.IN_NORM_ITER_AT_START;
388                pos = normalized.length();
389                return true;
390            }
391            nextCC = fcd16 >> 8;
392            if(nextCC == 0) {
393                // FCD boundary before the following character.
394                break;
395            }
396        }
397        start = pos - s.length();
398        assert(pos != start);
399        iter.moveIndex(s.length());
400        state = State.ITER_IN_FCD_SEGMENT;
401        return true;
402    }
403
404    private void normalize(CharSequence s) {
405        if(normalized == null) {
406            normalized = new StringBuilder();
407        }
408        // NFD without argument checking.
409        nfcImpl.decompose(s, normalized);
410    }
411
412    private enum State {
413        /**
414         * The input text [start..(iter index)[ passes the FCD check.
415         * Moving forward checks incrementally.
416         * pos & limit are undefined.
417         */
418        ITER_CHECK_FWD,
419        /**
420         * The input text [(iter index)..limit[ passes the FCD check.
421         * Moving backward checks incrementally.
422         * start & pos are undefined.
423         */
424        ITER_CHECK_BWD,
425        /**
426         * The input text [start..limit[ passes the FCD check.
427         * pos tracks the current text index.
428         */
429        ITER_IN_FCD_SEGMENT,
430        /**
431         * The input text [start..limit[ failed the FCD check and was normalized.
432         * pos tracks the current index in the normalized string.
433         * The text iterator is at the limit index.
434         */
435        IN_NORM_ITER_AT_LIMIT,
436        /**
437         * The input text [start..limit[ failed the FCD check and was normalized.
438         * pos tracks the current index in the normalized string.
439         * The text iterator is at the start index.
440         */
441        IN_NORM_ITER_AT_START
442    }
443
444    private State state;
445
446    private int start;
447    private int pos;
448    private int limit;
449
450    private final Normalizer2Impl nfcImpl;
451    private StringBuilder s;
452    private StringBuilder normalized;
453}
454