17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2013, International Business Machines Corporation and         *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CharacterIteration {
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // disallow instantiation
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CharacterIteration() { }
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // 32 bit Char value returned from when an iterator has run out of range.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //     Positive value so fast case (not end, not surrogate) can be checked
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //     with a single test.
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int DONE32 = 0x7fffffff;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Move the iterator forward to the next code point, and return that code point,
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   leaving the iterator positioned at char returned.
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   For Supplementary chars, the iterator is left positioned at the lead surrogate.
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ci  The character iterator
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return    The next code point.
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int next32(CharacterIterator ci) {
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If the current position is at a surrogate pair, move to the trail surrogate
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   which leaves it in position for underlying iterator's next() to work.
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c= ci.current();
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = ci.next();
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert               c = ci.previous();
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For BMP chars, this next() is the real deal.
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c = ci.next();
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If we might have a lead surrogate, we need to peak ahead to get the trail
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //  even though we don't want to really be positioned there.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = nextTrail32(ci, c);
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We got a supplementary char.  Back the iterator up to the postion
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // of the lead surrogate.
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ci.previous();
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert   }
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Out-of-line portion of the in-line Next32 code.
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The call site does an initial ci.next() and calls this function
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //    if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // NOTE:  we leave the underlying char iterator positioned in the
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //        middle of a surrogate pair.  ci.next() will work correctly
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //        from there, but the ci.getIndex() will be wrong, and needs
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //        adjustment.
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int nextTrail32(CharacterIterator ci, int lead) {
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE32;
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int retVal = lead;
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char  cTrail = ci.next();
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (UTF16.isTrailSurrogate(cTrail)) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            UTF16.SUPPLEMENTARY_MIN_VALUE;
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ci.previous();
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return retVal;
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int previous32(CharacterIterator ci) {
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ci.getIndex() <= ci.getBeginIndex()) {
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE32;
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char trail = ci.previous();
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int retVal = trail;
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char lead = ci.previous();
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (UTF16.isLeadSurrogate(lead)) {
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                          ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                          UTF16.SUPPLEMENTARY_MIN_VALUE;
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ci.next();
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return retVal;
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int current32(CharacterIterator ci) {
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char  lead   = ci.current();
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int   retVal = lead;
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return retVal;
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (UTF16.isLeadSurrogate(lead)) {
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int  trail = (int)ci.next();
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ci.previous();
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (UTF16.isTrailSurrogate((char)trail)) {
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         UTF16.SUPPLEMENTARY_MIN_VALUE;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         } else {
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (lead == CharacterIterator.DONE) {
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ci.getIndex() >= ci.getEndIndex())   {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    retVal = DONE32;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         }
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return retVal;
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
126