12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   Copyright (C) 2009-2015, International Business Machines
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   Corporation and others.  All Rights Reserved.
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet.SpanCondition;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.OutputInt;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points.
2005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert *
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Latin-1: Look up bytes.
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2-byte characters: Bits organized vertically.
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges.
2405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Supplementary characters: Binary search over
2505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * the supplementary part of the parent set's inversion list.
26836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class BMPSet {
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * One boolean ('true' or 'false') per Latin-1 character.
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean[] latin1Contains;
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6}
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead)
4005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
4105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Bits for 0..FF are unused (0).
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] table7FF;
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12}
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * indicates if contains(c) for all code points in the 64-block. If the upper bit is 1, then the block is mixed
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and set.contains(c) must be called.
5105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
5205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Bits for 0..7FF are unused (0).
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] bmpBlockBits;
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000,
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points.
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] list4kStarts;
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * supplementary code points. The list is terminated with list[listLength-1]=0x110000.
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final int[] list;
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final int listLength; // length used; list may be longer to minimize reallocs
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public BMPSet(final int[] parentList, int parentListLength) {
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = parentList;
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        listLength = parentListLength;
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        latin1Contains = new boolean[0x100];
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        table7FF = new int[64];
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bmpBlockBits = new int[64];
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list4kStarts = new int[18];
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Set the list indexes for binary searches for U+0800, U+1000, U+2000, .., U+F000, U+10000. U+0800 is the
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * first 3-byte-UTF-8 code point. Lower code points are looked up in the bit tables. The last pair of
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * indexes is for finding supplementary code points.
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list4kStarts[0] = findCodePoint(0x800, 0, listLength - 1);
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 1; i <= 0x10; ++i) {
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list4kStarts[i] = findCodePoint(i << 12, list4kStarts[i - 1], listLength - 1);
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list4kStarts[0x11] = listLength - 1;
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        initBits();
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public BMPSet(final BMPSet otherBMPSet, final int[] newParentList, int newParentListLength) {
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = newParentList;
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        listLength = newParentListLength;
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        latin1Contains = otherBMPSet.latin1Contains.clone();
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        table7FF = otherBMPSet.table7FF.clone();
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bmpBlockBits = otherBMPSet.bmpBlockBits.clone();
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list4kStarts = otherBMPSet.list4kStarts.clone();
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean contains(int c) {
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c <= 0xff) {
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return (latin1Contains[c]);
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (c <= 0x7ff) {
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0);
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (c < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int lead = c >> 12;
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (twoBits <= 1) {
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // All 64 code points with the same bits 15..6
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // are either in the set or not.
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return (0 != twoBits);
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Look up the code point in its 4k block of code points.
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1]);
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (c <= 0x10ffff) {
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // surrogate or supplementary code point
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Out-of-range code points get false, consistent with long-standing
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // behavior of UnicodeSet.contains(c).
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Span the initial substring for which each character c has spanCondition==contains(c). It must be
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * spanCondition==0 or 1.
13105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start The start index
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param outCount If not null: Receives the number of code points in the span.
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the limit (exclusive end) of the span
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * as usual in ICU.
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final int span(CharSequence s, int start, SpanCondition spanCondition,
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            OutputInt outCount) {
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char c, c2;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = start;
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limit = s.length();
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int numSupplementary = 0;
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (SpanCondition.NOT_CONTAINED != spanCondition) {
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // span
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (i < limit) {
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(i);
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c <= 0xff) {
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (!latin1Contains[c]) {
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c <= 0x7ff) {
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) {
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c < 0xd800 ||
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) {
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int lead = c >> 12;
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (twoBits <= 1) {
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // All 64 code points with the same bits 15..6
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // are either in the set or not.
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (twoBits == 0) {
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Look up the code point in its 4k block of code points.
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // surrogate pair
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int supplementary = Character.toCodePoint(c, c2);
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++numSupplementary;
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++i;
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // span not
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (i < limit) {
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(i);
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c <= 0xff) {
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (latin1Contains[c]) {
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c <= 0x7ff) {
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) {
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c < 0xd800 ||
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) {
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int lead = c >> 12;
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (twoBits <= 1) {
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // All 64 code points with the same bits 15..6
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // are either in the set or not.
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (twoBits != 0) {
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Look up the code point in its 4k block of code points.
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // surrogate pair
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int supplementary = Character.toCodePoint(c, c2);
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++numSupplementary;
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++i;
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (outCount != null) {
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int spanLength = i - start;
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            outCount.value = spanLength - numSupplementary;  // number of code points
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return i;
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Symmetrical with span().
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >=
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * limit and spanCondition==0 or 1.
23605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The string index which starts the span (i.e. inclusive).
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) {
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char c, c2;
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (SpanCondition.NOT_CONTAINED != spanCondition) {
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // span
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (;;) {
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(--limit);
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c <= 0xff) {
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (!latin1Contains[c]) {
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c <= 0x7ff) {
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) {
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c < 0xd800 ||
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) {
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int lead = c >> 12;
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (twoBits <= 1) {
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // All 64 code points with the same bits 15..6
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // are either in the set or not.
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (twoBits == 0) {
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Look up the code point in its 4k block of code points.
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // surrogate pair
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int supplementary = Character.toCodePoint(c2, c);
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --limit;
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (0 == limit) {
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 0;
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // span not
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (;;) {
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(--limit);
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c <= 0xff) {
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (latin1Contains[c]) {
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c <= 0x7ff) {
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) {
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c < 0xd800 ||
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) {
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int lead = c >> 12;
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (twoBits <= 1) {
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // All 64 code points with the same bits 15..6
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // are either in the set or not.
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (twoBits != 0) {
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Look up the code point in its 4k block of code points.
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // surrogate pair
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int supplementary = Character.toCodePoint(c2, c);
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --limit;
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (0 == limit) {
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 0;
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return limit + 1;
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Set bits in a bit rectangle in "vertical" bit organization. start<limit<=0x800
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static void set32x64Bits(int[] table, int start, int limit) {
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert (64 == table.length);
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lead = start >> 6;  // Named for UTF-8 2-byte lead byte with upper 5 bits.
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int trail = start & 0x3f;  // Named for UTF-8 2-byte trail byte with lower 6 bits.
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set one bit indicating an all-one block.
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bits = 1 << lead;
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((start + 1) == limit) { // Single-character shortcut.
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            table[trail] |= bits;
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limitLead = limit >> 6;
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limitTrail = limit & 0x3f;
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (lead == limitLead) {
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Partial vertical bit column.
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (trail < limitTrail) {
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                table[trail++] |= bits;
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Partial vertical bit column,
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // followed by a bit rectangle,
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // followed by another partial vertical bit column.
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (trail > 0) {
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                do {
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    table[trail++] |= bits;
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } while (trail < 64);
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++lead;
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (lead < limitLead) {
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                bits = ~((1 << lead) - 1);
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (limitLead < 0x20) {
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    bits &= (1 << limitLead) - 1;
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (trail = 0; trail < 64; ++trail) {
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    table[trail] |= bits;
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // In that case, bits=1<<limitLead == 1<<0 == 1
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // (because Java << uses only the lower 5 bits of the shift operand)
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // but the bits value is not used because trail<limitTrail is already false.
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            bits = 1 << limitLead;
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (trail = 0; trail < limitTrail; ++trail) {
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                table[trail] |= bits;
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void initBits() {
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int start, limit;
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int listIndex = 0;
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set latin1Contains[].
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        do {
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start = list[listIndex++];
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (listIndex < listLength) {
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = list[listIndex++];
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = 0x110000;
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (start >= 0x100) {
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            do {
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                latin1Contains[start++] = true;
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } while (start < limit && start < 0x100);
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } while (limit <= 0x100);
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set table7FF[].
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (start < 0x800) {
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set32x64Bits(table7FF, start, limit <= 0x800 ? limit : 0x800);
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (limit > 0x800) {
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start = 0x800;
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start = list[listIndex++];
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (listIndex < listLength) {
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = list[listIndex++];
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = 0x110000;
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set bmpBlockBits[].
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minStart = 0x800;
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (start < 0x10000) {
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (limit > 0x10000) {
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = 0x10000;
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (start < minStart) {
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start = minStart;
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (start < limit) { // Else: Another range entirely in a known mixed-value block.
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (0 != (start & 0x3f)) {
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Mixed-value block of 64 code points.
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    start >>= 6;
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    bmpBlockBits[start & 0x3f] |= 0x10001 << (start >> 6);
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    start = (start + 1) << 6; // Round up to the next block boundary.
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    minStart = start; // Ignore further ranges in this block.
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (start < limit) {
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (start < (limit & ~0x3f)) {
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Multiple all-ones blocks of 64 code points each.
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        set32x64Bits(bmpBlockBits, start >> 6, limit >> 6);
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (0 != (limit & 0x3f)) {
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Mixed-value block of 64 code points.
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        limit >>= 6;
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        bmpBlockBits[limit & 0x3f] |= 0x10001 << (limit >> 6);
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        limit = (limit + 1) << 6; // Round up to the next block boundary.
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        minStart = limit; // Ignore further ranges in this block.
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (limit == 0x10000) {
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start = list[listIndex++];
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (listIndex < listLength) {
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = list[listIndex++];
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = 0x110000;
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Same as UnicodeSet.findCodePoint(int c) except that the binary search is restricted for finding code
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * points in a certain range.
46605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For restricting the search for finding in the range start..end, pass in lo=findCodePoint(start) and
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * hi=findCodePoint(end) with 0<=lo<=hi<len. findCodePoint(c) defaults to lo=0 and hi=len-1.
46905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *            a character in a subrange of MIN_VALUE..MAX_VALUE
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param lo
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *            The lowest index to be returned.
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param hi
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *            The highest index to be returned.
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the smallest integer i in the range lo..hi, inclusive, such that c < list[i]
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findCodePoint(int c, int lo, int hi) {
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Examples:
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                           findCodePoint(c)
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           set              list[]         c=0 1 3 4 7 8
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           ===              ==============   ===========
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           []               [110000]         0 0 0 0 0 0
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           [:Any:]          [0, 110000]      1 1 1 1 1 1
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Return the smallest i such that c < list[i]. Assume
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c < list[lo])
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return lo;
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // High runner test. c is often after the last range, so an
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // initial check for this condition pays off.
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (lo >= hi || c >= list[hi - 1])
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return hi;
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // invariant: c >= list[lo]
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // invariant: c < list[hi]
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int i = (lo + hi) >>> 1;
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i == lo) {
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break; // Found!
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (c < list[i]) {
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                hi = i;
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lo = i;
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hi;
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final boolean containsSlow(int c, int lo, int hi) {
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (0 != (findCodePoint(c, lo, hi) & 1));
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
516