12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2010-2014, International Business Machines
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* UTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UTF-16 collation element and character iterator.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Handles normalized UTF-16 text, with length or NUL-terminated.
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unnormalized text is handled by a subclass.
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class UTF16CollationIterator extends CollationIterator {
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public UTF16CollationIterator(CollationData d) {
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(d);
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public UTF16CollationIterator(CollationData d, boolean numeric, CharSequence s, int p) {
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(d, numeric);
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        seq = s;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start = 0;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pos = p;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit = s.length();
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object other) {
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!super.equals(other)) { return false; }
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UTF16CollationIterator o = (UTF16CollationIterator)other;
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Compare the iterator state but not the text: Assume that the caller does that.
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (pos - start) == (o.pos - o.start);
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode() {
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert false : "hashCode not designed";
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 42; // any arbitrary constant will do
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void resetToOffset(int newOffset) {
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pos = start + newOffset;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getOffset() {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pos - start;
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(boolean numeric, CharSequence s, int p) {
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset(numeric);
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        seq = s;
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start = 0;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pos = p;
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit = s.length();
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int nextCodePoint() {
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(pos == limit) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Collation.SENTINEL_CP;
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c = seq.charAt(pos++);
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char trail;
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isHighSurrogate(c) && pos != limit &&
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.isLowSurrogate(trail = seq.charAt(pos))) {
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++pos;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Character.toCodePoint(c, trail);
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previousCodePoint() {
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(pos == start) {
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Collation.SENTINEL_CP;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c = seq.charAt(--pos);
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char lead;
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isLowSurrogate(c) && pos != start &&
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --pos;
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Character.toCodePoint(lead, c);
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected long handleNextCE32() {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(pos == limit) {
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return NO_CP_AND_CE32;
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c = seq.charAt(pos++);
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected char handleGetTrailSurrogate() {
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(pos == limit) { return 0; }
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char trail;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Character.isLowSurrogate(trail = seq.charAt(pos))) { ++pos; }
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return trail;
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* boolean foundNULTerminator(); */
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void forwardNumCodePoints(int num) {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(num > 0 && pos != limit) {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = seq.charAt(pos++);
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --num;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(Character.isHighSurrogate(c) && pos != limit &&
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    Character.isLowSurrogate(seq.charAt(pos))) {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++pos;
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void backwardNumCodePoints(int num) {
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(num > 0 && pos != start) {
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = seq.charAt(--pos);
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --num;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(Character.isLowSurrogate(c) && pos != start &&
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    Character.isHighSurrogate(seq.charAt(pos-1))) {
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --pos;
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected CharSequence seq;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int start;
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int pos;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int limit;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
150