17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2010-2014, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* FCDUTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Incrementally checks the input text for FCD and normalizes where necessary.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class FCDUTF16CollationIterator extends UTF16CollationIterator {
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public FCDUTF16CollationIterator(CollationData d) {
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(d);
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl = d.nfcImpl;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p) {
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(data, numeric, s, p);
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawSeq = s;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentStart = p;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawLimit = s.length();
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl = data.nfcImpl;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 1;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object other) {
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Skip the UTF16CollationIterator and call its parent.
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!((CollationIterator)this).equals(other)) { return false; }
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FCDUTF16CollationIterator o = (FCDUTF16CollationIterator)other;
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Compare the iterator state but not the text: Assume that the caller does that.
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir != o.checkDir) { return false; }
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir == 0 && (seq == rawSeq) != (o.seq == o.rawSeq)) { return false; }
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir != 0 || seq == rawSeq) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (pos - rawStart) == (o.pos - /*o.*/ rawStart);
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (segmentStart - rawStart) == (o.segmentStart - /*o.*/ rawStart) &&
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (pos - start) == (o.pos - o.start);
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode() {
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert false : "hashCode not designed";
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 42; // any arbitrary constant will do
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void resetToOffset(int newOffset) {
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        seq = rawSeq;
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start = segmentStart = pos = rawStart + newOffset;
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit = rawLimit;
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 1;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getOffset() {
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir != 0 || seq == rawSeq) {
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return pos - rawStart;
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(pos == start) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return segmentStart - rawStart;
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return segmentLimit - rawStart;
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(boolean numeric, CharSequence s, int p) {
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setText(numeric, s, p);
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawSeq = s;
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentStart = p;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawLimit = limit = s.length();
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 1;
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int nextCodePoint() {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(checkDir > 0) {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(pos == limit) {
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return Collation.SENTINEL_CP;
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(CollationFCD.hasTccc(c)) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --pos;
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        nextSegment();
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = seq.charAt(pos++);
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(checkDir == 0 && pos != limit) {
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switchToForward();
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char trail;
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isHighSurrogate(c) && pos != limit &&
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.isLowSurrogate(trail = seq.charAt(pos))) {
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++pos;
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Character.toCodePoint(c, trail);
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previousCodePoint() {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(checkDir < 0) {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(pos == start) {
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return Collation.SENTINEL_CP;
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(--pos);
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(CollationFCD.hasLccc(c)) {
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) {
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++pos;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        previousSegment();
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = seq.charAt(--pos);
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(checkDir == 0 && pos != start) {
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(--pos);
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switchToBackward();
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char lead;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isLowSurrogate(c) && pos != start &&
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --pos;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Character.toCodePoint(lead, c);
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected long handleNextCE32() {
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(checkDir > 0) {
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(pos == limit) {
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return NO_CP_AND_CE32;
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(CollationFCD.hasTccc(c)) {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --pos;
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        nextSegment();
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = seq.charAt(pos++);
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(checkDir == 0 && pos != limit) {
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switchToForward();
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* boolean foundNULTerminator(); */
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void forwardNumCodePoints(int num) {
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Specify the class to avoid a virtual-function indirection.
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we would declare this class final.
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(num > 0 && nextCodePoint() >= 0) {
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --num;
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void backwardNumCodePoints(int num) {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Specify the class to avoid a virtual-function indirection.
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we would declare this class final.
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(num > 0 && previousCodePoint() >= 0) {
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --num;
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Switches to forward checking if possible.
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void switchToForward() {
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert((checkDir < 0 && seq == rawSeq) || (checkDir == 0 && pos == limit));
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir < 0) {
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Turn around from backward checking.
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            start = segmentStart = pos;
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(pos == segmentLimit) {
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = rawLimit;
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = 1;  // Check forward.
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {  // pos < segmentLimit
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = 0;  // Stay in FCD segment.
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Reached the end of the FCD segment.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(seq == rawSeq) {
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment is FCD, extend it forward.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment needed to be normalized.
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Switch to checking forward from it.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                seq = rawSeq;
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = start = segmentStart = segmentLimit;
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note: If this segment is at the end of the input text,
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // then it might help to return false to indicate that, so that
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // we do not have to re-check and normalize when we turn around and go backwards.
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // However, that would complicate the call sites for an optimization of an unusual case.
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limit = rawLimit;
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            checkDir = 1;
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Extend the FCD text segment forward or normalize around pos.
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir > 0 && pos != limit.
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir == 0 and pos != limit.
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void nextSegment() {
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(checkDir > 0 && seq == rawSeq && pos != limit);
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The input text [segmentStart..pos[ passes the FCD check.
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int p = pos;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevCC = 0;
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fetch the next character's fcd16 value.
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int q = p;
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = Character.codePointAt(seq, p);
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p += Character.charCount(c);
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int fcd16 = nfcImpl.getFCD16(c);
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int leadCC = fcd16 >> 8;
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(leadCC == 0 && q != pos) {
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary before the [q, p[ character.
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = segmentLimit = q;
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Fails FCD check. Find the next FCD boundary and normalize.
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    q = p;
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(p == rawLimit) { break; }
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = Character.codePointAt(seq, p);
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    p += Character.charCount(c);
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while(nfcImpl.getFCD16(c) > 0xff);
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                normalize(pos, q);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = start;
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevCC = fcd16 & 0xff;
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p == rawLimit || prevCC == 0) {
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary after the last character.
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = segmentLimit = p;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(pos != limit);
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 0;
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Switches to backward checking.
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void switchToBackward() {
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert((checkDir > 0 && seq == rawSeq) || (checkDir == 0 && pos == start));
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir > 0) {
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Turn around from forward checking.
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limit = segmentLimit = pos;
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(pos == segmentStart) {
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = rawStart;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = -1;  // Check backward.
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {  // pos > segmentStart
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = 0;  // Stay in FCD segment.
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Reached the start of the FCD segment.
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(seq == rawSeq) {
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment is FCD, extend it backward.
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment needed to be normalized.
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Switch to checking backward from it.
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                seq = rawSeq;
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = limit = segmentLimit = segmentStart;
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            start = rawStart;
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            checkDir = -1;
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Extend the FCD text segment backward or normalize around pos.
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir < 0 && pos != start.
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir == 0 and pos != start.
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void previousSegment() {
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(checkDir < 0 && seq == rawSeq && pos != start);
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The input text [pos..segmentLimit[ passes the FCD check.
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int p = pos;
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int nextCC = 0;
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fetch the previous character's fcd16 value.
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int q = p;
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = Character.codePointBefore(seq, p);
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p -= Character.charCount(c);
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int fcd16 = nfcImpl.getFCD16(c);
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int trailCC = fcd16 & 0xff;
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(trailCC == 0 && q != pos) {
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary after the [p, q[ character.
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = segmentStart = q;
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Fails FCD check. Find the previous FCD boundary and normalize.
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    q = p;
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(fcd16 <= 0xff || p == rawStart) { break; }
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = Character.codePointBefore(seq, p);
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    p -= Character.charCount(c);
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while((fcd16 = nfcImpl.getFCD16(c)) != 0);
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                normalize(q, pos);
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = limit;
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nextCC = fcd16 >> 8;
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p == rawStart || nextCC == 0) {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary before the following character.
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = segmentStart = p;
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(pos != start);
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 0;
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void normalize(int from, int to) {
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(normalized == null) {
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            normalized = new StringBuilder();
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // NFD without argument checking.
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl.decompose(rawSeq, from, to, normalized, to - from);
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Switch collation processing into the FCD buffer
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // with the result of normalizing [segmentStart, segmentLimit[.
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentStart = from;
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentLimit = to;
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        seq = normalized;
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start = 0;
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit = start + normalized.length();
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Text pointers: The input text is rawSeq[rawStart, rawLimit[.
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // (In C++, these are const UChar * pointers.
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // In Java, we use CharSequence rawSeq and the parent class' seq
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // together with int indexes.)
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // checkDir > 0:
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text rawSeq[segmentStart..pos[ passes the FCD check.
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Moving forward checks incrementally.
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // segmentLimit is undefined. seq == rawSeq. limit == rawLimit.
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // checkDir < 0:
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text rawSeq[pos..segmentLimit[ passes the FCD check.
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Moving backward checks incrementally.
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // segmentStart is undefined. seq == rawSeq. start == rawStart.
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // checkDir == 0:
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text rawSeq[segmentStart..segmentLimit[ is being processed.
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // These pointers are at FCD boundaries.
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Either this text segment already passes the FCD check
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // and seq==rawSeq && segmentStart==start<=pos<=limit==segmentLimit,
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // or the current segment had to be normalized so that
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // rawSeq[segmentStart..segmentLimit[ turned into the normalized string,
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // corresponding to seq==normalized && 0==start<=pos<=limit==start+normalized.length().
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CharSequence rawSeq;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int rawStart = 0;
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int segmentStart;
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int segmentLimit;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int rawLimit;
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final Normalizer2Impl nfcImpl;
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder normalized;
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Direction of incremental FCD check. See comments before rawStart.
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int checkDir;
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
414