12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ****************************************************************************** 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2009-2015, International Business Machines 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Corporation and others. All Rights Reserved. 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ****************************************************************************** 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet.SpanCondition; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.OutputInt; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points. 2005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Latin-1: Look up bytes. 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2-byte characters: Bits organized vertically. 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges. 2405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Supplementary characters: Binary search over 2505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * the supplementary part of the parent set's inversion list. 26836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class BMPSet { 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000); 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * One boolean ('true' or 'false') per Latin-1 character. 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean[] latin1Contains; 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6} 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead) 4005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 4105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Bits for 0..FF are unused (0). 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] table7FF; 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12} 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indicates if contains(c) for all code points in the 64-block. If the upper bit is 1, then the block is mixed 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and set.contains(c) must be called. 5105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 5205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Bits for 0..7FF are unused (0). 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] bmpBlockBits; 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000, 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points. 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] list4kStarts; 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * supplementary code points. The list is terminated with list[listLength-1]=0x110000. 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int[] list; 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int listLength; // length used; list may be longer to minimize reallocs 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public BMPSet(final int[] parentList, int parentListLength) { 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = parentList; 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller listLength = parentListLength; 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller latin1Contains = new boolean[0x100]; 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table7FF = new int[64]; 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bmpBlockBits = new int[64]; 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list4kStarts = new int[18]; 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Set the list indexes for binary searches for U+0800, U+1000, U+2000, .., U+F000, U+10000. U+0800 is the 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * first 3-byte-UTF-8 code point. Lower code points are looked up in the bit tables. The last pair of 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indexes is for finding supplementary code points. 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list4kStarts[0] = findCodePoint(0x800, 0, listLength - 1); 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = 1; i <= 0x10; ++i) { 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list4kStarts[i] = findCodePoint(i << 12, list4kStarts[i - 1], listLength - 1); 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list4kStarts[0x11] = listLength - 1; 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller initBits(); 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public BMPSet(final BMPSet otherBMPSet, final int[] newParentList, int newParentListLength) { 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = newParentList; 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller listLength = newParentListLength; 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller latin1Contains = otherBMPSet.latin1Contains.clone(); 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table7FF = otherBMPSet.table7FF.clone(); 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bmpBlockBits = otherBMPSet.bmpBlockBits.clone(); 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list4kStarts = otherBMPSet.list4kStarts.clone(); 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int c) { 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c <= 0xff) { 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (latin1Contains[c]); 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c <= 0x7ff) { 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0); 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < 0xd800 || (c >= 0xe000 && c <= 0xffff)) { 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lead = c >> 12; 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits <= 1) { 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All 64 code points with the same bits 15..6 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are either in the set or not. 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (0 != twoBits); 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look up the code point in its 4k block of code points. 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1]); 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c <= 0x10ffff) { 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // surrogate or supplementary code point 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]); 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Out-of-range code points get false, consistent with long-standing 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // behavior of UnicodeSet.contains(c). 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Span the initial substring for which each character c has spanCondition==contains(c). It must be 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * spanCondition==0 or 1. 13105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start The start index 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param outCount If not null: Receives the number of code points in the span. 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the limit (exclusive end) of the span 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * as usual in ICU. 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int span(CharSequence s, int start, SpanCondition spanCondition, 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller OutputInt outCount) { 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c, c2; 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = start; 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit = s.length(); 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numSupplementary = 0; 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (SpanCondition.NOT_CONTAINED != spanCondition) { 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // span 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (i < limit) { 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(i); 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c <= 0xff) { 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!latin1Contains[c]) { 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c <= 0x7ff) { 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < 0xd800 || 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) { 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lead = c >> 12; 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits <= 1) { 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All 64 code points with the same bits 15..6 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are either in the set or not. 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits == 0) { 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look up the code point in its 4k block of code points. 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // surrogate pair 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int supplementary = Character.toCodePoint(c, c2); 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++numSupplementary; 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // span not 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (i < limit) { 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(i); 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c <= 0xff) { 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (latin1Contains[c]) { 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c <= 0x7ff) { 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < 0xd800 || 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) { 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lead = c >> 12; 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits <= 1) { 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All 64 code points with the same bits 15..6 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are either in the set or not. 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits != 0) { 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look up the code point in its 4k block of code points. 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // surrogate pair 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int supplementary = Character.toCodePoint(c, c2); 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++numSupplementary; 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (outCount != null) { 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int spanLength = i - start; 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller outCount.value = spanLength - numSupplementary; // number of code points 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Symmetrical with span(). 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >= 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * limit and spanCondition==0 or 1. 23605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The string index which starts the span (i.e. inclusive). 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) { 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c, c2; 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (SpanCondition.NOT_CONTAINED != spanCondition) { 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // span 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(--limit); 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c <= 0xff) { 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!latin1Contains[c]) { 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c <= 0x7ff) { 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < 0xd800 || 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) { 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lead = c >> 12; 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits <= 1) { 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All 64 code points with the same bits 15..6 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are either in the set or not. 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits == 0) { 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look up the code point in its 4k block of code points. 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // surrogate pair 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int supplementary = Character.toCodePoint(c2, c); 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --limit; 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (0 == limit) { 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // span not 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(--limit); 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c <= 0xff) { 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (latin1Contains[c]) { 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c <= 0x7ff) { 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < 0xd800 || 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) { 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lead = c >> 12; 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits <= 1) { 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All 64 code points with the same bits 15..6 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are either in the set or not. 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (twoBits != 0) { 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look up the code point in its 4k block of code points. 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // surrogate pair 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int supplementary = Character.toCodePoint(c2, c); 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --limit; 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (0 == limit) { 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return limit + 1; 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Set bits in a bit rectangle in "vertical" bit organization. start<limit<=0x800 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static void set32x64Bits(int[] table, int start, int limit) { 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert (64 == table.length); 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lead = start >> 6; // Named for UTF-8 2-byte lead byte with upper 5 bits. 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int trail = start & 0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits. 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set one bit indicating an all-one block. 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bits = 1 << lead; 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((start + 1) == limit) { // Single-character shortcut. 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[trail] |= bits; 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limitLead = limit >> 6; 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limitTrail = limit & 0x3f; 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lead == limitLead) { 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Partial vertical bit column. 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (trail < limitTrail) { 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[trail++] |= bits; 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Partial vertical bit column, 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // followed by a bit rectangle, 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // followed by another partial vertical bit column. 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (trail > 0) { 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[trail++] |= bits; 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (trail < 64); 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++lead; 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lead < limitLead) { 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bits = ~((1 << lead) - 1); 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (limitLead < 0x20) { 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bits &= (1 << limitLead) - 1; 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (trail = 0; trail < 64; ++trail) { 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[trail] |= bits; 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0. 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In that case, bits=1<<limitLead == 1<<0 == 1 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (because Java << uses only the lower 5 bits of the shift operand) 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but the bits value is not used because trail<limitTrail is already false. 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bits = 1 << limitLead; 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (trail = 0; trail < limitTrail; ++trail) { 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[trail] |= bits; 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void initBits() { 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start, limit; 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int listIndex = 0; 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set latin1Contains[]. 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = list[listIndex++]; 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (listIndex < listLength) { 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = list[listIndex++]; 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = 0x110000; 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start >= 0x100) { 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller latin1Contains[start++] = true; 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (start < limit && start < 0x100); 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (limit <= 0x100); 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set table7FF[]. 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (start < 0x800) { 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set32x64Bits(table7FF, start, limit <= 0x800 ? limit : 0x800); 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (limit > 0x800) { 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = 0x800; 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = list[listIndex++]; 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (listIndex < listLength) { 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = list[listIndex++]; 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = 0x110000; 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set bmpBlockBits[]. 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int minStart = 0x800; 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (start < 0x10000) { 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (limit > 0x10000) { 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = 0x10000; 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < minStart) { 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = minStart; 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < limit) { // Else: Another range entirely in a known mixed-value block. 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (0 != (start & 0x3f)) { 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Mixed-value block of 64 code points. 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start >>= 6; 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bmpBlockBits[start & 0x3f] |= 0x10001 << (start >> 6); 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = (start + 1) << 6; // Round up to the next block boundary. 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minStart = start; // Ignore further ranges in this block. 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < limit) { 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < (limit & ~0x3f)) { 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Multiple all-ones blocks of 64 code points each. 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set32x64Bits(bmpBlockBits, start >> 6, limit >> 6); 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (0 != (limit & 0x3f)) { 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Mixed-value block of 64 code points. 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit >>= 6; 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bmpBlockBits[limit & 0x3f] |= 0x10001 << (limit >> 6); 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = (limit + 1) << 6; // Round up to the next block boundary. 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minStart = limit; // Ignore further ranges in this block. 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (limit == 0x10000) { 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = list[listIndex++]; 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (listIndex < listLength) { 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = list[listIndex++]; 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = 0x110000; 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Same as UnicodeSet.findCodePoint(int c) except that the binary search is restricted for finding code 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * points in a certain range. 46605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For restricting the search for finding in the range start..end, pass in lo=findCodePoint(start) and 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * hi=findCodePoint(end) with 0<=lo<=hi<len. findCodePoint(c) defaults to lo=0 and hi=len-1. 46905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a character in a subrange of MIN_VALUE..MAX_VALUE 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param lo 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The lowest index to be returned. 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param hi 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The highest index to be returned. 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the smallest integer i in the range lo..hi, inclusive, such that c < list[i] 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findCodePoint(int c, int lo, int hi) { 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Examples: 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findCodePoint(c) 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set list[] c=0 1 3 4 7 8 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller === ============== =========== 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [] [110000] 0 0 0 0 0 0 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [:Any:] [0, 110000] 1 1 1 1 1 1 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return the smallest i such that c < list[i]. Assume 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // list[len - 1] == HIGH and that c is legal (0..HIGH-1). 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < list[lo]) 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return lo; 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // High runner test. c is often after the last range, so an 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // initial check for this condition pays off. 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lo >= hi || c >= list[hi - 1]) 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return hi; 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // invariant: c >= list[lo] 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // invariant: c < list[hi] 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = (lo + hi) >>> 1; 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i == lo) { 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; // Found! 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < list[i]) { 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hi = i; 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lo = i; 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return hi; 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean containsSlow(int c, int lo, int hi) { 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (0 != (findCodePoint(c, lo, hi) & 1)); 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 516