12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 2010-2015, International Business Machines 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Corporation and others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* CollationData.java, ported from collationdata.h/.cpp 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C++ version created on: 2010oct27 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* created by: Markus W. Scherer 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl.coll; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Trie2_32; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Collator; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUException; 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Collation data container. 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Immutable data created by a CollationDataBuilder, or loaded from a file, 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or deserialized from API-provided binary data. 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Includes data for the collation base (root/default), aliased if this is not the base. 30836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class CollationData { 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: The ucadata.icu loader could discover the reserved ranges by setting an array 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // parallel with the ranges, and resetting ranges that are indexed. 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The reordering builder code could clone the resulting template array. 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int REORDER_RESERVED_BEFORE_LATIN = Collator.ReorderCodes.FIRST + 14; 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int REORDER_RESERVED_AFTER_LATIN = Collator.ReorderCodes.FIRST + 15; 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int MAX_NUM_SPECIAL_REORDER_CODES = 8; 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationData(Normalizer2Impl nfc) { 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nfcImpl = nfc; 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getCE32(int c) { 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return trie.get(c); 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getCE32FromSupplementary(int c) { 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return trie.get(c); // TODO: port UTRIE2_GET32_FROM_SUPP(trie, c) to Java? 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isDigit(int c) { 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c < 0x660 ? c <= 0x39 && 0x30 <= c : 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collation.hasCE32Tag(getCE32(c), Collation.DIGIT_TAG); 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isUnsafeBackward(int c, boolean numeric) { 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return unsafeBackwardSet.contains(c) || (numeric && isDigit(c)); 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isCompressibleLeadByte(int b) { 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compressibleBytes[b]; 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isCompressiblePrimary(long p) { 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isCompressibleLeadByte((int)p >>> 24); 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the CE32 from two contexts words. 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Access to the defaultCE32 for contraction and prefix matching. 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getCE32FromContexts(int index) { 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((int)contexts.charAt(index) << 16) | contexts.charAt(index + 1); 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG). 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Requires that ce32 is special. 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getIndirectCE32(int ce32) { 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(Collation.isSpecialCE32(ce32)); 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int tag = Collation.tagFromCE32(ce32); 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(tag == Collation.DIGIT_TAG) { 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch the non-numeric-collation CE32. 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = ce32s[Collation.indexFromCE32(ce32)]; 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(tag == Collation.LEAD_SURROGATE_TAG) { 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = Collation.UNASSIGNED_CE32; 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(tag == Collation.U0000_TAG) { 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch the normal ce32 for U+0000. 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = ce32s[0]; 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG), 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if ce32 is special. 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getFinalCE32(int ce32) { 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Collation.isSpecialCE32(ce32)) { 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = getIndirectCE32(ce32); 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Computes a CE from c's ce32 which has the OFFSET_TAG. 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long getCEFromOffsetCE32(int c, int ce32) { 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long dataCE = ces[Collation.indexFromCE32(ce32)]; 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.makeCE(Collation.getThreeBytePrimaryForOffsetData(c, dataCE)); 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the single CE that c maps to. 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Throws UnsupportedOperationException if c does not map to a single CE. 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long getSingleCE(int c) { 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationData d; 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ce32 = getCE32(c); 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.FALLBACK_CE32) { 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = base; 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = base.getCE32(c); 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller d = this; 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(Collation.isSpecialCE32(ce32)) { 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(Collation.tagFromCE32(ce32)) { 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LATIN_EXPANSION_TAG: 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.BUILDER_DATA_TAG: 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.PREFIX_TAG: 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.CONTRACTION_TAG: 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.HANGUL_TAG: 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LEAD_SURROGATE_TAG: 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException(String.format( 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "there is not exactly one collation element for U+%04X (CE32 0x%08x)", 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c, ce32)); 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.FALLBACK_TAG: 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.RESERVED_TAG_3: 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new AssertionError(String.format( 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "unexpected CE32 tag for U+%04X (CE32 0x%08x)", c, ce32)); 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LONG_PRIMARY_TAG: 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.ceFromLongPrimaryCE32(ce32); 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.LONG_SECONDARY_TAG: 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.ceFromLongSecondaryCE32(ce32); 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.EXPANSION32_TAG: 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Collation.lengthFromCE32(ce32) == 1) { 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.ce32s[Collation.indexFromCE32(ce32)]; 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException(String.format( 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "there is not exactly one collation element for U+%04X (CE32 0x%08x)", 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c, ce32)); 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.EXPANSION_TAG: { 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Collation.lengthFromCE32(ce32) == 1) { 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return d.ces[Collation.indexFromCE32(ce32)]; 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException(String.format( 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "there is not exactly one collation element for U+%04X (CE32 0x%08x)", 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c, ce32)); 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.DIGIT_TAG: 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch the non-numeric-collation CE32 and continue. 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.ce32s[Collation.indexFromCE32(ce32)]; 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.U0000_TAG: 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(c == 0); 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetch the normal ce32 for U+0000 and continue. 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = d.ce32s[0]; 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.OFFSET_TAG: 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return d.getCEFromOffsetCE32(c, ce32); 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case Collation.IMPLICIT_TAG: 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.unassignedCEFromCodePoint(c); 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.ceFromSimpleCE32(ce32); 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the FCD16 value for code point c. c must be >= 0. 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getFCD16(int c) { 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return nfcImpl.getFCD16(c); 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the first primary for the script's reordering group. 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the primary with only the first primary lead byte of the group 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (not necessarily an actual root collator primary weight), 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or 0 if the script is unknown 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long getFirstPrimaryForGroup(int script) { 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = getScriptIndex(script); 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index == 0 ? 0 : (long)scriptStarts[index] << 16; 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the last primary for the script's reordering group. 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the last primary of the group 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (not an actual root collator primary weight), 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or 0 if the script is unknown 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public long getLastPrimaryForGroup(int script) { 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = getScriptIndex(script); 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index == 0) { 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long limit = scriptStarts[index + 1]; 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (limit << 16) - 1; 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Finds the reordering group which contains the primary weight. 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the first script of the group, or -1 if the weight is beyond the last group 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getGroupForPrimary(long p) { 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p >>= 16; 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p < scriptStarts[1] || scriptStarts[scriptStarts.length - 1] <= p) { 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = 1; 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p >= scriptStarts[index + 1]) { ++index; } 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < numScripts; ++i) { 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(scriptsIndex[i] == index) { 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) { 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(scriptsIndex[numScripts + i] == index) { 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collator.ReorderCodes.FIRST + i; 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getScriptIndex(int script) { 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script < 0) { 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(script < numScripts) { 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return scriptsIndex[script]; 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(script < Collator.ReorderCodes.FIRST) { 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller script -= Collator.ReorderCodes.FIRST; 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script < MAX_NUM_SPECIAL_REORDER_CODES) { 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return scriptsIndex[numScripts + script]; 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int[] getEquivalentScripts(int script) { 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = getScriptIndex(script); 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index == 0) { return EMPTY_INT_ARRAY; } 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script >= Collator.ReorderCodes.FIRST) { 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Special groups have no aliases. 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new int[] { script }; 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = 0; 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < numScripts; ++i) { 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(scriptsIndex[i] == index) { 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++length; 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] dest = new int[length]; 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length == 1) { 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dest[0] = script; 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return dest; 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length = 0; 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < numScripts; ++i) { 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(scriptsIndex[i] == index) { 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dest[length++] = i; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return dest; 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Writes the permutation of primary-weight ranges 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the given reordering of scripts and groups. 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The caller checks for illegal arguments and 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * takes care of [DEFAULT] and memory allocation. 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Each list element will be a (limit, offset) pair as described 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the CollationSettings.reorderRanges. 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The list will be empty if no ranges are reordered. 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void makeReorderRanges(int[] reorder, UVector32 ranges) { 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeReorderRanges(reorder, false, ranges); 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void makeReorderRanges(int[] reorder, boolean latinMustMove, UVector32 ranges) { 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ranges.removeAllElements(); 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = reorder.length; 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length == 0 || (length == 1 && reorder[0] == UScript.UNKNOWN)) { 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Maps each script-or-group range to a new lead byte. 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short[] table = new short[scriptStarts.length - 1]; // C++: uint8_t[] 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set "don't care" values for reserved ranges. 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = scriptsIndex[ 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numScripts + REORDER_RESERVED_BEFORE_LATIN - Collator.ReorderCodes.FIRST]; 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index != 0) { 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[index] = 0xff; 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = scriptsIndex[ 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numScripts + REORDER_RESERVED_AFTER_LATIN - Collator.ReorderCodes.FIRST]; 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index != 0) { 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[index] = 0xff; 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Never reorder special low and high primary lead bytes. 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(scriptStarts.length >= 2); 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(scriptStarts[0] == 0); 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lowStart = scriptStarts[1]; 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(lowStart == ((Collation.MERGE_SEPARATOR_BYTE + 1) << 8)); 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int highLimit = scriptStarts[scriptStarts.length - 1]; 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(highLimit == (Collation.TRAIL_WEIGHT_BYTE << 8)); 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Get the set of special reorder codes in the input list. 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This supports a fixed number of special reorder codes; 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // it works for data with codes beyond Collator.ReorderCodes.LIMIT. 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int specials = 0; 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < length; ++i) { 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int reorderCode = reorder[i] - Collator.ReorderCodes.FIRST; 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) { 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller specials |= 1 << reorderCode; 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Start the reordering with the special low reorder codes that do not occur in the input. 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) { 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = scriptsIndex[numScripts + i]; 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index != 0 && (specials & (1 << i)) == 0) { 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart = addLowScriptRange(table, index, lowStart); 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip the reserved range before Latin if Latin is the first script, 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so that we do not move it unnecessarily. 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int skippedReserved = 0; 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(specials == 0 && reorder[0] == UScript.LATIN && !latinMustMove) { 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = scriptsIndex[UScript.LATIN]; 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(index != 0); 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = scriptStarts[index]; 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(lowStart <= start); 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller skippedReserved = start - lowStart; 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart = start; 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reorder according to the input scripts, continuing from the bottom of the primary range. 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean hasReorderToEnd = false; 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < length;) { 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int script = reorder[i++]; 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script == UScript.UNKNOWN) { 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Put the remaining scripts at the top. 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hasReorderToEnd = true; 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(i < length) { 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller script = reorder[--length]; 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script == UScript.UNKNOWN) { // Must occur at most once. 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "setReorderCodes(): duplicate UScript.UNKNOWN"); 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script == Collator.ReorderCodes.DEFAULT) { 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "setReorderCodes(): UScript.DEFAULT together with other scripts"); 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = getScriptIndex(script); 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index == 0) { continue; } 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(table[index] != 0) { // Duplicate or equivalent script. 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "setReorderCodes(): duplicate or equivalent script " + 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller scriptCodeString(script)); 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller highLimit = addHighScriptRange(table, index, highLimit); 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script == Collator.ReorderCodes.DEFAULT) { 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The default code must be the only one in the list, and that is handled by the caller. 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Otherwise it must not be used. 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "setReorderCodes(): UScript.DEFAULT together with other scripts"); 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = getScriptIndex(script); 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(index == 0) { continue; } 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(table[index] != 0) { // Duplicate or equivalent script. 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "setReorderCodes(): duplicate or equivalent script " + 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller scriptCodeString(script)); 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart = addLowScriptRange(table, index, lowStart); 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Put all remaining scripts into the middle. 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 1; i < scriptStarts.length - 1; ++i) { 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leadByte = table[i]; 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(leadByte != 0) { continue; } 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = scriptStarts[i]; 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!hasReorderToEnd && start > lowStart) { 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No need to move this script. 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart = start; 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart = addLowScriptRange(table, i, lowStart); 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lowStart > highLimit) { 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((lowStart - (skippedReserved & 0xff00)) <= highLimit) { 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Try not skipping the before-Latin reserved range. 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeReorderRanges(reorder, true, ranges); 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We need more primary lead bytes than available, despite the reserved ranges. 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUException( 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "setReorderCodes(): reordering too many partial-primary-lead-byte scripts"); 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Turn lead bytes into a list of (limit, offset) pairs. 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Encode each pair in one list element: 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Upper 16 bits = limit, lower 16 = signed lead byte offset. 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int offset = 0; 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 1;; ++i) { 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextOffset = offset; 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(i < scriptStarts.length - 1) { 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newLeadByte = table[i]; 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(newLeadByte == 0xff) { 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "Don't care" lead byte for reserved range, continue with current offset. 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextOffset = newLeadByte - (scriptStarts[i] >> 8); 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextOffset != offset) { break; } 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(offset != 0 || i < scriptStarts.length - 1) { 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ranges.addElement(((int)scriptStarts[i] << 16) | (offset & 0xffff)); 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i == scriptStarts.length - 1) { break; } 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset = nextOffset; 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int addLowScriptRange(short[] table, int index, int lowStart) { 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = scriptStarts[index]; 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((start & 0xff) < (lowStart & 0xff)) { 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart += 0x100; 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[index] = (short)(lowStart >> 8); 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit = scriptStarts[index + 1]; 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff); 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return lowStart; 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int addHighScriptRange(short[] table, int index, int highLimit) { 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit = scriptStarts[index + 1]; 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((limit & 0xff) > (highLimit & 0xff)) { 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller highLimit -= 0x100; 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = scriptStarts[index]; 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff); 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller table[index] = (short)(highLimit >> 8); 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return highLimit; 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static String scriptCodeString(int script) { 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do not use the script name here: We do not want to depend on that data. 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (script < Collator.ReorderCodes.FIRST) ? 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Integer.toString(script) : "0x" + Integer.toHexString(script); 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int[] EMPTY_INT_ARRAY = new int[0]; 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** @see jamoCE32s */ 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int JAMO_CE32S_LENGTH = 19 + 21 + 27; 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Main lookup trie. */ 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2_32 trie; 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Array of CE32 values. 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * At index 0 there must be CE32(U+0000) 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to support U+0000's special-tag for NUL-termination handling. 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] ce32s; 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Array of CE values for expansions and OFFSET_TAG. */ 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] ces; 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Array of prefix and contraction-suffix matching data. */ 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String contexts; 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Base collation data, or null if this data itself is a base. */ 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CollationData base; 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Simple array of JAMO_CE32S_LENGTH=19+21+27 CE32s, one per canonical Jamo L/V/T. 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * They are normally simple CE32s, rarely expansions. 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For fast handling of HANGUL_TAG. 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] jamoCE32s = new int[JAMO_CE32S_LENGTH]; 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl nfcImpl; 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** The single-byte primary weight (xx000000) for numeric collation. */ 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long numericPrimary = 0x12000000; 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 256 flags for which primary-weight lead bytes are compressible. */ 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean[] compressibleBytes; 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Set of code points that are unsafe for starting string comparison after an identical prefix, 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or in backwards CE iteration. 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet unsafeBackwardSet; 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Fast Latin table for common-Latin-text string comparisons. 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Data structure see class CollationFastLatin. 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public char[] fastLatinTable; 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Header portion of the fastLatinTable. 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In C++, these are one array, and the header is skipped for mapping characters. 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In Java, two arrays work better. 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char[] fastLatinTableHeader; 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Data for scripts and reordering groups. 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Uses include building a reordering permutation table and 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * providing script boundaries to AlphabeticIndex. 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numScripts; 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The length of scriptsIndex is numScripts+16. 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It maps from a UScriptCode or a special reorder code to an entry in scriptStarts. 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 16 special reorder codes (not all used) are mapped starting at numScripts. 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit. 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * There are special codes at the end for reorder-reserved primary ranges. 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Multiple scripts may share a range and index, for example Hira & Kana. 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char[] scriptsIndex; 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Start primary weight (top 16 bits only) for a group/script/reserved range 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indexed by scriptsIndex. 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The first range (separators & terminators) and the last range (trailing weights) 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * are not reorderable, and no scriptsIndex entry points to them. 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char[] scriptStarts; 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Collation elements in the root collator. 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Used by the CollationRootElements class. The data structure is described there. 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * null in a tailoring. 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public long[] rootElements; 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 561