12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2010-2014, International Business Machines
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* FCDUTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Incrementally checks the input text for FCD and normalizes where necessary.
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class FCDUTF16CollationIterator extends UTF16CollationIterator {
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public FCDUTF16CollationIterator(CollationData d) {
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(d);
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl = d.nfcImpl;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p) {
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(data, numeric, s, p);
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawSeq = s;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentStart = p;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawLimit = s.length();
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl = data.nfcImpl;
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 1;
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object other) {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Skip the UTF16CollationIterator and call its parent.
422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        if (!(other instanceof CollationIterator)
432d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            || !((CollationIterator)this).equals(other)
442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            || !(other instanceof FCDUTF16CollationIterator))
452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        {
462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            return false;
472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        }
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FCDUTF16CollationIterator o = (FCDUTF16CollationIterator)other;
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Compare the iterator state but not the text: Assume that the caller does that.
502d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        if (checkDir != o.checkDir) {
512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            return false;
522d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        }
532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        if (checkDir == 0 && (seq == rawSeq) != (o.seq == o.rawSeq)) {
542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            return false;
552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        }
562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        if (checkDir != 0 || seq == rawSeq) {
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (pos - rawStart) == (o.pos - /*o.*/ rawStart);
582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        }
592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        else {
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (segmentStart - rawStart) == (o.segmentStart - /*o.*/ rawStart) &&
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (pos - start) == (o.pos - o.start);
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode() {
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert false : "hashCode not designed";
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 42; // any arbitrary constant will do
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void resetToOffset(int newOffset) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        seq = rawSeq;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start = segmentStart = pos = rawStart + newOffset;
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit = rawLimit;
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 1;
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getOffset() {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir != 0 || seq == rawSeq) {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return pos - rawStart;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(pos == start) {
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return segmentStart - rawStart;
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return segmentLimit - rawStart;
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(boolean numeric, CharSequence s, int p) {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super.setText(numeric, s, p);
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawSeq = s;
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentStart = p;
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rawLimit = limit = s.length();
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 1;
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int nextCodePoint() {
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(checkDir > 0) {
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(pos == limit) {
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return Collation.SENTINEL_CP;
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(CollationFCD.hasTccc(c)) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --pos;
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        nextSegment();
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = seq.charAt(pos++);
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(checkDir == 0 && pos != limit) {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switchToForward();
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char trail;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isHighSurrogate(c) && pos != limit &&
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.isLowSurrogate(trail = seq.charAt(pos))) {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++pos;
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Character.toCodePoint(c, trail);
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previousCodePoint() {
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(checkDir < 0) {
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(pos == start) {
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return Collation.SENTINEL_CP;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(--pos);
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(CollationFCD.hasLccc(c)) {
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) {
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++pos;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        previousSegment();
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = seq.charAt(--pos);
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(checkDir == 0 && pos != start) {
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(--pos);
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switchToBackward();
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char lead;
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isLowSurrogate(c) && pos != start &&
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --pos;
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Character.toCodePoint(lead, c);
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected long handleNextCE32() {
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(checkDir > 0) {
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(pos == limit) {
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return NO_CP_AND_CE32;
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(CollationFCD.hasTccc(c)) {
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(CollationFCD.maybeTibetanCompositeVowel(c) ||
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --pos;
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        nextSegment();
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = seq.charAt(pos++);
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(checkDir == 0 && pos != limit) {
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = seq.charAt(pos++);
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switchToForward();
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* boolean foundNULTerminator(); */
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void forwardNumCodePoints(int num) {
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Specify the class to avoid a virtual-function indirection.
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we would declare this class final.
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(num > 0 && nextCodePoint() >= 0) {
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --num;
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void backwardNumCodePoints(int num) {
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Specify the class to avoid a virtual-function indirection.
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we would declare this class final.
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(num > 0 && previousCodePoint() >= 0) {
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --num;
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Switches to forward checking if possible.
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void switchToForward() {
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert((checkDir < 0 && seq == rawSeq) || (checkDir == 0 && pos == limit));
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir < 0) {
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Turn around from backward checking.
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            start = segmentStart = pos;
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(pos == segmentLimit) {
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = rawLimit;
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = 1;  // Check forward.
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {  // pos < segmentLimit
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = 0;  // Stay in FCD segment.
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Reached the end of the FCD segment.
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(seq == rawSeq) {
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment is FCD, extend it forward.
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment needed to be normalized.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Switch to checking forward from it.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                seq = rawSeq;
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = start = segmentStart = segmentLimit;
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Note: If this segment is at the end of the input text,
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // then it might help to return false to indicate that, so that
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // we do not have to re-check and normalize when we turn around and go backwards.
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // However, that would complicate the call sites for an optimization of an unusual case.
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limit = rawLimit;
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            checkDir = 1;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Extend the FCD text segment forward or normalize around pos.
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir > 0 && pos != limit.
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir == 0 and pos != limit.
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void nextSegment() {
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(checkDir > 0 && seq == rawSeq && pos != limit);
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The input text [segmentStart..pos[ passes the FCD check.
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int p = pos;
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevCC = 0;
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fetch the next character's fcd16 value.
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int q = p;
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = Character.codePointAt(seq, p);
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p += Character.charCount(c);
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int fcd16 = nfcImpl.getFCD16(c);
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int leadCC = fcd16 >> 8;
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(leadCC == 0 && q != pos) {
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary before the [q, p[ character.
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = segmentLimit = q;
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Fails FCD check. Find the next FCD boundary and normalize.
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    q = p;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(p == rawLimit) { break; }
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = Character.codePointAt(seq, p);
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    p += Character.charCount(c);
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while(nfcImpl.getFCD16(c) > 0xff);
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                normalize(pos, q);
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = start;
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevCC = fcd16 & 0xff;
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p == rawLimit || prevCC == 0) {
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary after the last character.
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = segmentLimit = p;
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(pos != limit);
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 0;
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Switches to backward checking.
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void switchToBackward() {
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert((checkDir > 0 && seq == rawSeq) || (checkDir == 0 && pos == start));
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(checkDir > 0) {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Turn around from forward checking.
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limit = segmentLimit = pos;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(pos == segmentStart) {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = rawStart;
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = -1;  // Check backward.
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {  // pos > segmentStart
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkDir = 0;  // Stay in FCD segment.
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Reached the start of the FCD segment.
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(seq == rawSeq) {
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment is FCD, extend it backward.
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The input text segment needed to be normalized.
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Switch to checking backward from it.
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                seq = rawSeq;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = limit = segmentLimit = segmentStart;
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            start = rawStart;
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            checkDir = -1;
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Extend the FCD text segment backward or normalize around pos.
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * To be called when checkDir < 0 && pos != start.
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns with checkDir == 0 and pos != start.
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void previousSegment() {
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(checkDir < 0 && seq == rawSeq && pos != start);
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The input text [pos..segmentLimit[ passes the FCD check.
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int p = pos;
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int nextCC = 0;
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fetch the previous character's fcd16 value.
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int q = p;
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = Character.codePointBefore(seq, p);
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p -= Character.charCount(c);
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int fcd16 = nfcImpl.getFCD16(c);
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int trailCC = fcd16 & 0xff;
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(trailCC == 0 && q != pos) {
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary after the [p, q[ character.
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = segmentStart = q;
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Fails FCD check. Find the previous FCD boundary and normalize.
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    q = p;
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(fcd16 <= 0xff || p == rawStart) { break; }
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = Character.codePointBefore(seq, p);
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    p -= Character.charCount(c);
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while((fcd16 = nfcImpl.getFCD16(c)) != 0);
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                normalize(q, pos);
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                pos = limit;
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nextCC = fcd16 >> 8;
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p == rawStart || nextCC == 0) {
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCD boundary before the following character.
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = segmentStart = p;
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(pos != start);
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkDir = 0;
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void normalize(int from, int to) {
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(normalized == null) {
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            normalized = new StringBuilder();
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // NFD without argument checking.
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl.decompose(rawSeq, from, to, normalized, to - from);
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Switch collation processing into the FCD buffer
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // with the result of normalizing [segmentStart, segmentLimit[.
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentStart = from;
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        segmentLimit = to;
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        seq = normalized;
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start = 0;
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit = start + normalized.length();
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Text pointers: The input text is rawSeq[rawStart, rawLimit[.
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // (In C++, these are const UChar * pointers.
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // In Java, we use CharSequence rawSeq and the parent class' seq
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // together with int indexes.)
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // checkDir > 0:
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text rawSeq[segmentStart..pos[ passes the FCD check.
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Moving forward checks incrementally.
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // segmentLimit is undefined. seq == rawSeq. limit == rawLimit.
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // checkDir < 0:
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text rawSeq[pos..segmentLimit[ passes the FCD check.
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Moving backward checks incrementally.
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // segmentStart is undefined. seq == rawSeq. start == rawStart.
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // checkDir == 0:
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text rawSeq[segmentStart..segmentLimit[ is being processed.
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // These pointers are at FCD boundaries.
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Either this text segment already passes the FCD check
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // and seq==rawSeq && segmentStart==start<=pos<=limit==segmentLimit,
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // or the current segment had to be normalized so that
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // rawSeq[segmentStart..segmentLimit[ turned into the normalized string,
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // corresponding to seq==normalized && 0==start<=pos<=limit==start+normalized.length().
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CharSequence rawSeq;
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int rawStart = 0;
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int segmentStart;
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int segmentLimit;
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int rawLimit;
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final Normalizer2Impl nfcImpl;
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder normalized;
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Direction of incremental FCD check. See comments before rawStart.
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int checkDir;
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
426