12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 2013-2015, International Business Machines 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Corporation and others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* CollationBuilder.java, ported from collationbuilder.h/.cpp 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C++ version created on: 2013may06 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* created by: Markus W. Scherer 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl.coll; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParseException; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Norm2AllModes; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl.Hangul; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript; 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.CanonicalIterator; 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Collator; 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Normalizer2; 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet; 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSetIterator; 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 301537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/** 311537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android 32836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */ 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class CollationBuilder extends CollationRuleParser.Sink { 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final boolean DEBUG = false; 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final class BundleImporter implements CollationRuleParser.Importer { 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller BundleImporter() {} 37f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String getRules(String localeID, String collationType) { 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return CollationLoader.loadRules(new ULocale(localeID), collationType); 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CollationBuilder(CollationTailoring b) { 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nfd = Normalizer2.getNFDInstance(); 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fcd = Norm2AllModes.getFCDNormalizer2(); 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nfcImpl = Norm2AllModes.getNFCInstance().impl; 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller base = b; 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller baseData = b.data; 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rootElements = new CollationRootElements(b.data.rootElements); 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller variableTop = 0; 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder = new CollationDataBuilder(); 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fastLatinEnabled = true; 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = 0; 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rootPrimaryIndexes = new UVector32(); 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodes = new UVector64(); 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nfcImpl.ensureCanonIterData(); 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder.initForTailoring(baseData); 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CollationTailoring parseAndBuild(String ruleString) throws ParseException { 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(baseData.rootElements == null) { 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ U_MISSING_RESOURCE_ERROR 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "missing root elements data, tailoring not supported"); 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationTailoring tailoring = new CollationTailoring(base.settings); 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationRuleParser parser = new CollationRuleParser(baseData); 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: This always bases &[last variable] and &[first regular] 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // on the root collator's maxVariable/variableTop. 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we wanted this to change after [maxVariable x], then we would keep 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the tailoring.settings pointer here and read its variableTop when we need it. 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // See http://unicode.org/cldr/trac/ticket/6070 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller variableTop = base.settings.readOnly().variableTop; 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parser.setSink(this); 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In Java, there is only one Importer implementation. 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In C++, the importer is a parameter for this method. 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parser.setImporter(new BundleImporter()); 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationSettings ownedSettings = tailoring.settings.copyOnWrite(); 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parser.parse(ruleString, ownedSettings); 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(dataBuilder.hasMappings()) { 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeTailoredCEs(); 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closeOverComposites(); 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller finalizeCEs(); 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Copy all of ASCII, and Latin-1 letters, into each tailoring. 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller optimizeSet.add(0, 0x7f); 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller optimizeSet.add(0xc0, 0xff); 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul is decomposed on the fly during collation, 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and the tailoring data is always built with HANGUL_TAG specials. 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller optimizeSet.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END); 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder.optimize(optimizeSet); 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tailoring.ensureOwnedData(); 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fastLatinEnabled) { dataBuilder.enableFastLatin(); } 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder.build(tailoring.ownedData); 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ tailoring.builder = dataBuilder; 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder = null; 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tailoring.data = baseData; 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ownedSettings.fastLatinOptions = CollationFastLatin.getOptions( 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tailoring.data, ownedSettings, 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ownedSettings.fastLatinPrimaries); 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tailoring.setRules(ruleString); 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In Java, we do not have a rules version. 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In C++, the genrb build tool reads and supplies one, 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and the rulesVersion is a parameter for this method. 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tailoring.setVersion(base.version, 0 /* rulesVersion */); 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return tailoring; 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Implements CollationRuleParser.Sink. */ 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Override 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void addReset(int strength, CharSequence str) { 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(str.length() != 0); 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(str.charAt(0) == CollationRuleParser.POS_LEAD) { 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ces[0] = getSpecialResetPosition(str); 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = 1; 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert((ces[0] & Collation.CASE_AND_QUATERNARY_MASK) == 0); 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // normal reset to a character or string 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String nfdString = nfd.normalize(str); 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = dataBuilder.getCEs(nfdString, ces, 0); 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cesLength > Collation.MAX_EXPANSION_LENGTH) { 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset position maps to too many collation elements (more than 31)"); 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.IDENTICAL) { return; } // simple reset-at-position 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // &[before strength]position 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(Collator.PRIMARY <= strength && strength <= Collator.TERTIARY); 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = findOrInsertNodeForCEs(strength); 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the index is for a "weaker" node, 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // then skip backwards over this and further "weaker" nodes. 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(strengthFromNode(node) > strength) { 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = previousIndexFromNode(node); 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find or insert a node whose index we will put into a temporary CE. 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) == strength && isTailoredNode(node)) { 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reset to just before this same-strength tailored node. 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = previousIndexFromNode(node); 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(strength == Collator.PRIMARY) { 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // root primary node (has no previous index) 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long p = weight32FromNode(node); 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p == 0) { 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset primary-before ignorable not possible"); 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p <= rootElements.getFirstPrimary()) { 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // There is no primary gap between ignorables and the space-first-primary. 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset primary-before first non-ignorable not supported"); 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p == Collation.FIRST_TRAILING_PRIMARY) { 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do not support tailoring to an unassigned-implicit CE. 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset primary-before [first trailing] not supported"); 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = rootElements.getPrimaryBefore(p, baseData.isCompressiblePrimary(p)); 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findOrInsertNodeForPrimary(p); 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Go to the last node in this list: 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Tailor after the last node between adjacent root nodes. 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextIndex = nextIndexFromNode(node); 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextIndex == 0) { break; } 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndex; 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // &[before 2] or &[before 3] 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findCommonNode(index, Collator.SECONDARY); 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength >= Collator.TERTIARY) { 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findCommonNode(index, Collator.TERTIARY); 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // findCommonNode() stayed on the stronger node or moved to 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // an explicit common-weight node of the reset-before strength. 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) == strength) { 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Found a same-strength node with an explicit weight. 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int weight16 = weight16FromNode(node); 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(weight16 == 0) { 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (strength == Collator.SECONDARY) ? 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset secondary-before secondary ignorable not possible" : 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset tertiary-before completely ignorable not possible"); 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(weight16 > Collation.BEFORE_WEIGHT16); 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reset to just before this node. 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert the preceding same-level explicit weight if it is not there already. 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Which explicit weight immediately precedes this one? 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller weight16 = getWeight16Before(index, node, strength); 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Does this preceding weight have a node? 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int previousWeight16; 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int previousIndex = previousIndexFromNode(node); 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = previousIndex;; i = previousIndexFromNode(node)) { 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(i); 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int previousStrength = strengthFromNode(node); 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(previousStrength < strength) { 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(weight16 >= Collation.COMMON_WEIGHT16 || i == previousIndex); 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Either the reset element has an above-common weight and 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the parent node provides the implied common weight, 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or the reset element has a weight<=common in the node 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // right after the parent, and we need to insert the preceding weight. 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller previousWeight16 = Collation.COMMON_WEIGHT16; 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(previousStrength == strength && !isTailoredNode(node)) { 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller previousWeight16 = weight16FromNode(node); 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip weaker nodes and same-level tailored nodes. 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(previousWeight16 == weight16) { 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The preceding weight has a node, 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // maybe with following weaker or tailored nodes. 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reset to the last of them. 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = previousIndex; 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert a node with the preceding weight, reset to that. 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodeFromWeight16(weight16) | nodeFromStrength(strength); 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = insertNodeBetween(previousIndex, index, node); 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Found a stronger node with implied strength-common weight. 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int weight16 = getWeight16Before(index, node, strength); 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findOrInsertWeakNode(index, weight16, strength); 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Strength of the temporary CE = strength of its reset position. 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Code above raises an error if the before-strength is stronger. 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = ceStrength(ces[cesLength - 1]); 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength); 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the secondary or tertiary weight preceding the current node's weight. 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * node=nodes[index]. 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getWeight16Before(int index, long node, int level) { 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(strengthFromNode(node) < level || !isTailoredNode(node)); 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Collect the root CE weights if this node is for a root CE. 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If it is not, then return the low non-primary boundary for a tailored CE. 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int t; 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) == Collator.TERTIARY) { 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = weight16FromNode(node); 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = Collation.COMMON_WEIGHT16; // Stronger node with implied common weight. 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(strengthFromNode(node) > Collator.SECONDARY) { 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = previousIndexFromNode(node); 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.BEFORE_WEIGHT16; 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int s; 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) == Collator.SECONDARY) { 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = weight16FromNode(node); 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = Collation.COMMON_WEIGHT16; // Stronger node with implied common weight. 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(strengthFromNode(node) > Collator.PRIMARY) { 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = previousIndexFromNode(node); 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.BEFORE_WEIGHT16; 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [p, s, t] is a root CE. Return the preceding weight for the requested level. 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long p = weight32FromNode(node); 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int weight16; 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(level == Collator.SECONDARY) { 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller weight16 = rootElements.getSecondaryBefore(p, s); 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller weight16 = rootElements.getTertiaryBefore(p, s, t); 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0); 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return weight16; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private long getSpecialResetPosition(CharSequence str) { 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(str.length() == 2); 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long ce; 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int strength = Collator.PRIMARY; 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isBoundary = false; 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationRuleParser.Position pos = 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationRuleParser.POSITION_VALUES[str.charAt(1) - CollationRuleParser.POS_BASE]; 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(pos) { 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_TERTIARY_IGNORABLE: 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Quaternary CEs are not supported. 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Non-zero quaternary weights are possible only on tertiary or stronger CEs. 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_TERTIARY_IGNORABLE: 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_SECONDARY_IGNORABLE: { 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for a tailored tertiary node after [0, 0, 0]. 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = findOrInsertNodeForRootCE(0, Collator.TERTIARY); 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((index = nextIndexFromNode(node)) != 0) { 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(strengthFromNode(node) <= Collator.TERTIARY); 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node) && strengthFromNode(node) == Collator.TERTIARY) { 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return tempCEFromIndexAndStrength(index, Collator.TERTIARY); 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return rootElements.getFirstTertiaryCE(); 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No need to look for nodeHasAnyBefore() on a tertiary node. 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_SECONDARY_IGNORABLE: 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.getLastTertiaryCE(); 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.TERTIARY; 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_PRIMARY_IGNORABLE: { 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for a tailored secondary node after [0, 0, *]. 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = findOrInsertNodeForRootCE(0, Collator.SECONDARY); 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while((index = nextIndexFromNode(node)) != 0) { 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = strengthFromNode(node); 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength < Collator.SECONDARY) { break; } 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.SECONDARY) { 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nodeHasBefore3(node)) { 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node))); 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(isTailoredNode(nodes.elementAti(index))); 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return tempCEFromIndexAndStrength(index, Collator.SECONDARY); 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.getFirstSecondaryCE(); 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.SECONDARY; 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_PRIMARY_IGNORABLE: 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.getLastSecondaryCE(); 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.SECONDARY; 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_VARIABLE: 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.getFirstPrimaryCE(); 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isBoundary = true; // FractionalUCA.txt: FDD1 00A0, SPACE first primary 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_VARIABLE: 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.lastCEWithPrimaryBefore(variableTop + 1); 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_REGULAR: 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.firstCEWithPrimaryAtLeast(variableTop + 1); 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isBoundary = true; // FractionalUCA.txt: FDD1 263A, SYMBOL first primary 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_REGULAR: 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Use the Hani-first-primary rather than the actual last "regular" CE before it, 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for backward compatibility with behavior before the introduction of 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // script-first-primary CEs in the root collator. 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = rootElements.firstCEWithPrimaryAtLeast( 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller baseData.getFirstPrimaryForGroup(UScript.HAN)); 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_IMPLICIT: 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = baseData.getSingleCE(0x4e00); 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_IMPLICIT: 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do not support tailoring to an unassigned-implicit CE. 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "reset to [last implicit] not supported"); 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case FIRST_TRAILING: 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = Collation.makeCE(Collation.FIRST_TRAILING_PRIMARY); 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isBoundary = true; // trailing first primary (there is no mapping for it) 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST_TRAILING: 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("LDML forbids tailoring to U+FFFF"); 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(false); 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = findOrInsertNodeForRootCE(ce, strength); 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((pos.ordinal() & 1) == 0) { 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // even pos = [first xyz] 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!nodeHasAnyBefore(node) && isBoundary) { 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // A <group> first primary boundary is artificially added to FractionalUCA.txt. 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It is reachable via its special contraction, but is not normally used. 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the first character tailored after the boundary CE, 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or the first real root CE after it. 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((index = nextIndexFromNode(node)) != 0) { 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there is a following node, then it must be tailored 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // because there are no root CEs with a boundary primary 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and non-common secondary/tertiary weights. 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(isTailoredNode(node)); 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = tempCEFromIndexAndStrength(index, strength); 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(strength == Collator.PRIMARY); 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long p = ce >>> 32; 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pIndex = rootElements.findPrimary(p); 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isCompressible = baseData.isCompressiblePrimary(p); 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = rootElements.getPrimaryAfter(p, pIndex, isCompressible); 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = Collation.makeCE(p); 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findOrInsertNodeForRootCE(ce, Collator.PRIMARY); 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nodeHasAnyBefore(node)) { 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Get the first node that was tailored before this one at a weaker strength. 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nodeHasBefore2(node)) { 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node))); 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nodeHasBefore3(node)) { 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node))); 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(isTailoredNode(nodes.elementAti(index))); 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = tempCEFromIndexAndStrength(index, strength); 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // odd pos = [last xyz] 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the last node that was tailored after the [last xyz] 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at a strength no greater than the position's strength. 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextIndex = nextIndexFromNode(node); 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextIndex == 0) { break; } 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long nextNode = nodes.elementAti(nextIndex); 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(nextNode) < strength) { break; } 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndex; 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nextNode; 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do not make a temporary CE for a root node. 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This last node might be the node for the root CE itself, 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or a node with a common secondary or tertiary weight. 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = tempCEFromIndexAndStrength(index, strength); 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce; 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Implements CollationRuleParser.Sink. */ 441f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) { 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String nfdPrefix; 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prefix.length() == 0) { 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nfdPrefix = ""; 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nfdPrefix = nfd.normalize(prefix); 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String nfdString = nfd.normalize(str); 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The runtime code decomposes Hangul syllables on the fly, 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // with recursive processing but without making the Jamo pieces visible for matching. 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It does not work with certain types of contextual mappings. 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nfdLength = nfdString.length(); 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nfdLength >= 2) { 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = nfdString.charAt(0); 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Hangul.isJamoL(c) || Hangul.isJamoV(c)) { 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // While handling a Hangul syllable, contractions starting with Jamo L or V 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // would not see the following Jamo of that syllable. 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "contractions starting with conjoining Jamo L or V not supported"); 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = nfdString.charAt(nfdLength - 1); 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Hangul.isJamoL(c) || 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (Hangul.isJamoV(c) && Hangul.isJamoL(nfdString.charAt(nfdLength - 2)))) { 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // A contraction ending with Jamo L or L+V would require 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // generating Hangul syllables in addTailComposites() (588 for a Jamo L), 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or decomposing a following Hangul syllable on the fly, during contraction matching. 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "contractions ending with conjoining Jamo L or L+V not supported"); 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // A Hangul syllable completely inside a contraction is ok. 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: If there is a prefix, then the parser checked that 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // both the prefix and the string beging with NFC boundaries (not Jamo V or T). 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0)) 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (While handling a Hangul syllable, prefixes on Jamo V or T 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // would not see the previous Jamo of that syllable.) 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength != Collator.IDENTICAL) { 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the node index after which we insert the new tailored node. 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = findOrInsertNodeForCEs(strength); 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(cesLength > 0); 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long ce = ces[cesLength - 1]; 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.PRIMARY && !isTempCE(ce) && (ce >>> 32) == 0) { 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // There is no primary gap between ignorables and the space-first-primary. 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "tailoring primary after ignorables not supported"); 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.QUATERNARY && ce == 0) { 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The CE data structure does not support non-zero quaternary weights 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // on tertiary ignorables. 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "tailoring quaternary after tertiary ignorables not supported"); 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert the new tailored node. 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = insertTailoredNodeAfter(index, strength); 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Strength of the temporary CE: 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The new relation may yield a stronger CE but not a weaker one. 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int tempStrength = ceStrength(ce); 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength < tempStrength) { tempStrength = strength; } 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ces[cesLength - 1] = tempCEFromIndexAndStrength(index, tempStrength); 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setCaseBits(nfdString); 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cesLengthBeforeExtension = cesLength; 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(extension.length() != 0) { 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String nfdExtension = nfd.normalize(extension); 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = dataBuilder.getCEs(nfdExtension, ces, cesLength); 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cesLength > Collation.MAX_EXPANSION_LENGTH) { 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException( 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "extension string adds too many collation elements (more than 31 total)"); 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ce32 = Collation.UNASSIGNED_CE32; 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((!nfdPrefix.contentEquals(prefix) || !nfdString.contentEquals(str)) && 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller !ignorePrefix(prefix) && !ignoreString(str)) { 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Map from the original input to the CEs. 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do this in case the canonical closure is incomplete, 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so that it is possible to explicitly provide the missing mappings. 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = addIfDifferent(prefix, str, ces, cesLength, ce32); 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addWithClosure(nfdPrefix, nfdString, ces, cesLength, ce32); 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = cesLengthBeforeExtension; 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Picks one of the current CEs and finds or inserts a node in the graph 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the CE + strength. 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findOrInsertNodeForCEs(int strength) { 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(Collator.PRIMARY <= strength && strength <= Collator.QUATERNARY); 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the last CE that is at least as "strong" as the requested difference. 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: Stronger is smaller (Collator.PRIMARY=0). 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long ce; 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;; --cesLength) { 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cesLength == 0) { 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = ces[0] = 0; 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = 1; 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce = ces[cesLength - 1]; 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ceStrength(ce) <= strength) { break; } 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTempCE(ce)) { 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No need to findCommonNode() here for lower levels 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // because insertTailoredNodeAfter() will do that anyway. 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return indexFromTempCE(ce); 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // root CE 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((int)(ce >>> 56) == Collation.UNASSIGNED_IMPLICIT_BYTE) { 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException( 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "tailoring relative to an unassigned code point not supported"); 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return findOrInsertNodeForRootCE(ce, strength); 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findOrInsertNodeForRootCE(long ce, int strength) { 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert((int)(ce >>> 56) != Collation.UNASSIGNED_IMPLICIT_BYTE); 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find or insert the node for each of the root CE's weights, 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // down to the requested level/strength. 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Root CEs must have common=zero quaternary weights (for which we never insert any nodes). 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert((ce & 0xc0) == 0); 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = findOrInsertNodeForPrimary(ce >>> 32); 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength >= Collator.SECONDARY) { 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lower32 = (int)ce; 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY); 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength >= Collator.TERTIARY) { 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findOrInsertWeakNode(index, lower32 & Collation.ONLY_TERTIARY_MASK, 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collator.TERTIARY); 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Like Java Collections.binarySearch(List, key, Comparator). 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the index>=0 where the item was found, 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or the index<0 for inserting the string at ~index in sorted order 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (index into rootPrimaryIndexes) 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int binarySearchForRootPrimaryNode( 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] rootPrimaryIndexes, int length, long[] nodes, long p) { 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length == 0) { return ~0; } 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = 0; 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit = length; 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 595f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int i = (int)(((long)start + (long)limit) / 2); 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes[rootPrimaryIndexes[i]]; 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long nodePrimary = node >>> 32; // weight32FromNode(node) 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p == nodePrimary) { 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (p < nodePrimary) { 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i == start) { 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ~start; // insert s before i 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = i; 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i == start) { 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ~(start + 1); // insert s after i 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = i; 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Finds or inserts the node for a root CE's primary weight. */ 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findOrInsertNodeForPrimary(long p) { 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int rootIndex = binarySearchForRootPrimaryNode( 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rootPrimaryIndexes.getBuffer(), rootPrimaryIndexes.size(), nodes.getBuffer(), p); 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rootIndex >= 0) { 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return rootPrimaryIndexes.elementAti(rootIndex); 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Start a new list of nodes with this primary. 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = nodes.size(); 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodes.addElement(nodeFromWeight32(p)); 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rootPrimaryIndexes.insertElementAt(index, ~rootIndex); 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Finds or inserts the node for a secondary or tertiary weight. */ 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findOrInsertWeakNode(int index, int weight16, int level) { 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(0 <= index && index < nodes.size()); 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(Collator.SECONDARY <= level && level <= Collator.TERTIARY); 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(weight16 == Collation.COMMON_WEIGHT16) { 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return findCommonNode(index, level); 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If this will be the first below-common weight for the parent node, 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // then we will also need to insert a common weight after it. 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(strengthFromNode(node) < level); // parent node is stronger 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(weight16 != 0 && weight16 < Collation.COMMON_WEIGHT16) { 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int hasThisLevelBefore = level == Collator.SECONDARY ? HAS_BEFORE2 : HAS_BEFORE3; 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((node & hasThisLevelBefore) == 0) { 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The parent node has an implied level-common weight. 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long commonNode = 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodeFromWeight16(Collation.COMMON_WEIGHT16) | nodeFromStrength(level); 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(level == Collator.SECONDARY) { 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Move the HAS_BEFORE3 flag from the parent node 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to the new secondary common node. 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller commonNode |= node & HAS_BEFORE3; 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node &= ~(long)HAS_BEFORE3; 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodes.setElementAt(node | hasThisLevelBefore, index); 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert below-common-weight node. 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextIndex = nextIndexFromNode(node); 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodeFromWeight16(weight16) | nodeFromStrength(level); 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = insertNodeBetween(index, nextIndex, node); 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert common-weight node. 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller insertNodeBetween(index, nextIndex, commonNode); 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return index of below-common-weight node. 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the root CE's weight for this level. 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Postpone insertion if not found: 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert the new root node before the next stronger node, 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or before the next root node with the same strength and a larger weight. 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextIndex; 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while((nextIndex = nextIndexFromNode(node)) != 0) { 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(nextIndex); 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextStrength = strengthFromNode(node); 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextStrength <= level) { 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert before a stronger node. 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextStrength < level) { break; } 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // nextStrength == level 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!isTailoredNode(node)) { 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextWeight16 = weight16FromNode(node); 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextWeight16 == weight16) { 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Found the node for the root CE up to this level. 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return nextIndex; 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert before a node with a larger same-strength weight. 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextWeight16 > weight16) { break; } 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip the next node. 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndex; 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodeFromWeight16(weight16) | nodeFromStrength(level); 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return insertNodeBetween(index, nextIndex, node); 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Makes and inserts a new tailored node into the list, after the one at index. 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Skips over nodes of weaker strength to maintain collation order 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ("postpone insertion"). 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the new node's index 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int insertTailoredNodeAfter(int index, int strength) { 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(0 <= index && index < nodes.size()); 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength >= Collator.SECONDARY) { 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findCommonNode(index, Collator.SECONDARY); 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength >= Collator.TERTIARY) { 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = findCommonNode(index, Collator.TERTIARY); 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Postpone insertion: 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Insert the new node before the next one with a strength at least as strong. 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextIndex; 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while((nextIndex = nextIndexFromNode(node)) != 0) { 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(nextIndex); 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) <= strength) { break; } 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip the next node which has a weaker (larger) strength than the new one. 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndex; 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = IS_TAILORED | nodeFromStrength(strength); 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return insertNodeBetween(index, nextIndex, node); 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Inserts a new node into the list, between list-adjacent items. 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The node's previous and next indexes must not be set yet. 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the new node's index 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int insertNodeBetween(int index, int nextIndex, long node) { 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(previousIndexFromNode(node) == 0); 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(nextIndexFromNode(node) == 0); 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(nextIndexFromNode(nodes.elementAti(index)) == nextIndex); 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Append the new node and link it to the existing nodes. 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newIndex = nodes.size(); 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node |= nodeFromPreviousIndex(index) | nodeFromNextIndex(nextIndex); 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodes.addElement(node); 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // nodes[index].nextIndex = newIndex 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodes.setElementAt(changeNodeNextIndex(node, newIndex), index); 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // nodes[nextIndex].previousIndex = newIndex 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextIndex != 0) { 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(nextIndex); 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodes.setElementAt(changeNodePreviousIndex(node, newIndex), nextIndex); 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return newIndex; 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Finds the node which implies or contains a common=05 weight of the given strength 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (secondary or tertiary), if the current node is stronger. 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Skips weaker nodes and tailored nodes if the current node is stronger 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and is followed by an explicit-common-weight node. 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Always returns the input index if that node is no stronger than the given strength. 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findCommonNode(int index, int strength) { 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(Collator.SECONDARY <= strength && strength <= Collator.TERTIARY); 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodes.elementAti(index); 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) >= strength) { 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The current node is no stronger. 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node)) { 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The current node implies the strength-common weight. 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndexFromNode(node); 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(!isTailoredNode(node) && strengthFromNode(node) == strength && 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller weight16FromNode(node) < Collation.COMMON_WEIGHT16); 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip to the explicit common node. 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = nextIndexFromNode(node); 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodes.elementAti(index); 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(strengthFromNode(node) >= strength); 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(isTailoredNode(node) || strengthFromNode(node) > strength || 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller weight16FromNode(node) < Collation.COMMON_WEIGHT16); 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(weight16FromNode(node) == Collation.COMMON_WEIGHT16); 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void setCaseBits(CharSequence nfdString) { 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numTailoredPrimaries = 0; 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < cesLength; ++i) { 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ceStrength(ces[i]) == Collator.PRIMARY) { ++numTailoredPrimaries; } 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We should not be able to get too many case bits because 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // cesLength<=31==MAX_EXPANSION_LENGTH. 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 31 pairs of case bits fit into an long without setting its sign bit. 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(numTailoredPrimaries <= 31); 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long cases = 0; 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numTailoredPrimaries > 0) { 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharSequence s = nfdString; 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UTF16CollationIterator baseCEs = new UTF16CollationIterator(baseData, false, s, 0); 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int baseCEsLength = baseCEs.fetchCEs() - 1; 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(baseCEsLength >= 0 && baseCEs.getCE(baseCEsLength) == Collation.NO_CE); 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastCase = 0; 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numBasePrimaries = 0; 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < baseCEsLength; ++i) { 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long ce = baseCEs.getCE(i); 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((ce >>> 32) != 0) { 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++numBasePrimaries; 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = ((int)ce >> 14) & 3; 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(c == 0 || c == 2); // lowercase or uppercase, no mixed case in any base CE 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numBasePrimaries < numTailoredPrimaries) { 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cases |= (long)c << ((numBasePrimaries - 1) * 2); 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(numBasePrimaries == numTailoredPrimaries) { 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCase = c; 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c != lastCase) { 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // There are more base primary CEs than tailored primaries. 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set mixed case if the case bits of the remainder differ. 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCase = 1; 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Nothing more can change. 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numBasePrimaries >= numTailoredPrimaries) { 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cases |= (long)lastCase << ((numTailoredPrimaries - 1) * 2); 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < cesLength; ++i) { 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long ce = ces[i] & 0xffffffffffff3fffL; // clear old case bits 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int strength = ceStrength(ce); 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.PRIMARY) { 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce |= (cases & 3) << 14; 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cases >>>= 2; 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(strength == Collator.TERTIARY) { 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Tertiary CEs must have uppercase bits. 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // See the LDML spec, and comments in class CollationCompare. 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce |= 0x8000; 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Tertiary ignorable CEs must have 0 case bits. 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We set 0 case bits for secondary CEs too 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // since currently only U+0345 is cased and maps to a secondary CE, 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and it is lowercase. Other secondaries are uncased. 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // See [[:Cased:]&[:uca1=:]] where uca1 queries the root primary weight. 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ces[i] = ce; 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Implements CollationRuleParser.Sink. */ 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Override 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void suppressContractions(UnicodeSet set) { 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder.suppressContractions(set); 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Implements CollationRuleParser.Sink. */ 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Override 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void optimize(UnicodeSet set) { 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller optimizeSet.addAll(set); 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds the mapping and its canonical closure. 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Takes ce32=dataBuilder.encodeCEs(...) so that the data builder 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * need not re-encode the CEs multiple times. 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int addWithClosure(CharSequence nfdPrefix, CharSequence nfdString, 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] newCEs, int newCEsLength, int ce32) { 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Map from the NFD input to the CEs. 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = addIfDifferent(nfdPrefix, nfdString, newCEs, newCEsLength, ce32); 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = addOnlyClosure(nfdPrefix, nfdString, newCEs, newCEsLength, ce32); 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addTailComposites(nfdPrefix, nfdString); 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString, 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] newCEs, int newCEsLength, int ce32) { 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Map from canonically equivalent input to the CEs. (But not from the all-NFD input.) 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: make CanonicalIterator work with CharSequence, or maybe change arguments here to String 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nfdPrefix.length() == 0) { 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString()); 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String prefix = ""; 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String str = stringIter.next(); 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(str == null) { break; } 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ignoreString(str) || str.contentEquals(nfdString)) { continue; } 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32); 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CanonicalIterator prefixIter = new CanonicalIterator(nfdPrefix.toString()); 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString()); 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String prefix = prefixIter.next(); 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prefix == null) { break; } 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ignorePrefix(prefix)) { continue; } 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean samePrefix = prefix.contentEquals(nfdPrefix); 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String str = stringIter.next(); 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(str == null) { break; } 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ignoreString(str) || (samePrefix && str.contentEquals(nfdString))) { continue; } 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32); 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller stringIter.reset(); 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) { 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for the last starter in the NFD string. 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastStarter; 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int indexAfterLastStarter = nfdString.length(); 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(indexAfterLastStarter == 0) { return; } // no starter at all 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter); 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nfd.getCombiningClass(lastStarter) == 0) { break; } 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexAfterLastStarter -= Character.charCount(lastStarter); 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No closure to Hangul syllables since we decompose them on the fly. 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Hangul.isJamoL(lastStarter)) { return; } 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Are there any composites whose decomposition starts with the lastStarter? 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters. 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We might find some more equivalent mappings here if it did. 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet composites = new UnicodeSet(); 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!nfcImpl.getCanonStartSet(lastStarter, composites)) { return; } 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder(); 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH]; 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSetIterator iter = new UnicodeSetIterator(composites); 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(iter.next()) { 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(iter.codepoint != UnicodeSetIterator.IS_STRING); 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int composite = iter.codepoint; 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String decomp = nfd.getDecomposition(composite); 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp, 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString, newString)) { 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0); 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(newCEsLength > Collation.MAX_EXPANSION_LENGTH) { 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Ignore mappings that we cannot store. 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: It is possible that the newCEs do not make use of the mapping 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for which we are adding the tail composites, in which case we might be adding 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // unnecessary mappings. 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For example, when we add tail composites for ae^ (^=combining circumflex), 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UCA discontiguous-contraction matching does not find any matches 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for ae_^ (_=any combining diacritic below) *unless* there is also 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // a contraction mapping for ae. 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Thus, if there is no ae contraction, then the ae^ mapping is ignored 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // while fetching the newCEs for ae_^. 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Try to detect this effectively. 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (Alternatively, print a warning when prefix contractions are missing.) 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do not need an explicit mapping for the NFD strings. 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It is fine if the NFD input collates like this via a sequence of mappings. 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It also saves a little bit of space, and may reduce the set of characters with contractions. 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ce32 = addIfDifferent(nfdPrefix, newString, 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newCEs, newCEsLength, Collation.UNASSIGNED_CE32); 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 != Collation.UNASSIGNED_CE32) { 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // was different, was added 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32); 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean mergeCompositeIntoString(CharSequence nfdString, int indexAfterLastStarter, 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int composite, CharSequence decomp, 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder newNFDString, StringBuilder newString) { 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(Character.codePointBefore(nfdString, indexAfterLastStarter) == 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Character.codePointAt(decomp, 0)); 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastStarterLength = Character.offsetByCodePoints(decomp, 0, 1); 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastStarterLength == decomp.length()) { 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Singleton decompositions should be found by addWithClosure() 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and the CanonicalIterator, so we can ignore them here. 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(equalSubSequences(nfdString, indexAfterLastStarter, decomp, lastStarterLength)) { 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // same strings, nothing new to be found here 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Make new FCD strings that combine a composite, or its decomposition, 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // into the nfdString's last starter and the combining marks following it. 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Make an NFD version, and a version with the composite. 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString.setLength(0); 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString.append(nfdString, 0, indexAfterLastStarter); 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newString.setLength(0); 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newString.append(nfdString, 0, indexAfterLastStarter - lastStarterLength) 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller .appendCodePoint(composite); 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The following is related to discontiguous contraction matching, 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but builds only FCD strings (or else returns false). 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceIndex = indexAfterLastStarter; 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int decompIndex = lastStarterLength; 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Small optimization: We keep the source character across loop iterations 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // because we do not always consume it, 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and then need not fetch it again nor look up its combining class again. 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceChar = Collation.SENTINEL_CP; 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The cc variables need to be declared before the loop so that at the end 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // they are set to the last combining classes seen. 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceCC = 0; 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int decompCC = 0; 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sourceChar < 0) { 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sourceIndex >= nfdString.length()) { break; } 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceChar = Character.codePointAt(nfdString, sourceIndex); 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceCC = nfd.getCombiningClass(sourceChar); 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(sourceCC != 0); 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We consume a decomposition character in each iteration. 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(decompIndex >= decomp.length()) { break; } 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int decompChar = Character.codePointAt(decomp, decompIndex); 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompCC = nfd.getCombiningClass(decompChar); 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Compare the two characters and their combining classes. 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(decompCC == 0) { 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Unable to merge because the source contains a non-zero combining mark 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but the composite's decomposition contains another starter. 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The strings would not be equivalent. 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(sourceCC < decompCC) { 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Composite + sourceChar would not be FCD. 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(decompCC < sourceCC) { 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString.appendCodePoint(decompChar); 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompIndex += Character.charCount(decompChar); 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(decompChar != sourceChar) { 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Blocked because same combining class. 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // match: decompChar == sourceChar 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString.appendCodePoint(decompChar); 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompIndex += Character.charCount(decompChar); 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceIndex += Character.charCount(decompChar); 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceChar = Collation.SENTINEL_CP; 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We are at the end of at least one of the two inputs. 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sourceChar >= 0) { // more characters from nfdString but not from decomp 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sourceCC < decompCC) { 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Appending the next source character to the composite would not be FCD. 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString.append(nfdString, sourceIndex, nfdString.length()); 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newString.append(nfdString, sourceIndex, nfdString.length()); 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(decompIndex < decomp.length()) { // more characters from decomp, not from nfdString 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newNFDString.append(decomp, decompIndex, decomp.length()); 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(nfd.isNormalized(newNFDString)); 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(fcd.isNormalized(newString)); 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(nfd.normalize(newString).equals(newNFDString.toString())); // canonically equivalent 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean equalSubSequences(CharSequence left, int leftStart, CharSequence right, int rightStart) { 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ UnicodeString::compare(leftStart, 0x7fffffff, right, rightStart, 0x7fffffff) == 0 10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leftLength = left.length(); 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((leftLength - leftStart) != (right.length() - rightStart)) { return false; } 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(leftStart < leftLength) { 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(left.charAt(leftStart++) != right.charAt(rightStart++)) { 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean ignorePrefix(CharSequence s) { 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do not map non-FCD prefixes. 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !isFCD(s); 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean ignoreString(CharSequence s) { 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do not map non-FCD strings. 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do not map strings that start with Hangul syllables: We decompose those on the fly. 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !isFCD(s) || Hangul.isHangul(s.charAt(0)); 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isFCD(CharSequence s) { 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fcd.isNormalized(s); 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final UnicodeSet COMPOSITES = new UnicodeSet("[:NFD_QC=N:]"); 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static { 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul is decomposed on the fly during collation. 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller COMPOSITES.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END); 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void closeOverComposites() { 10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String prefix = ""; // empty 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSetIterator iter = new UnicodeSetIterator(COMPOSITES); 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(iter.next()) { 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(iter.codepoint != UnicodeSetIterator.IS_STRING); 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String nfdString = nfd.getDecomposition(iter.codepoint); 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cesLength = dataBuilder.getCEs(nfdString, ces, 0); 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cesLength > Collation.MAX_EXPANSION_LENGTH) { 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Too many CEs from the decomposition (unusual), ignore this composite. 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We could add a capacity parameter to getCEs() and reallocate if necessary. 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // However, this can only really happen in contrived cases. 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String composite = iter.getString(); 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addIfDifferent(prefix, composite, ces, cesLength, Collation.UNASSIGNED_CE32); 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int addIfDifferent(CharSequence prefix, CharSequence str, 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] newCEs, int newCEsLength, int ce32) { 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] oldCEs = new long[Collation.MAX_EXPANSION_LENGTH]; 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int oldCEsLength = dataBuilder.getCEs(prefix, str, oldCEs, 0); 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!sameCEs(newCEs, newCEsLength, oldCEs, oldCEsLength)) { 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ce32 == Collation.UNASSIGNED_CE32) { 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce32 = dataBuilder.encodeCEs(newCEs, newCEsLength); 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder.addCE32(prefix, str, ce32); 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ce32; 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean sameCEs(long[] ces1, int ces1Length, 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] ces2, int ces2Length) { 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ces1Length != ces2Length) { 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(ces1Length <= Collation.MAX_EXPANSION_LENGTH); 11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < ces1Length; ++i) { 11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ces1[i] != ces2[i]) { return false; } 11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int alignWeightRight(int w) { 11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(w != 0) { 11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while((w & 0xff) == 0) { w >>>= 8; } 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return w; 11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Walks the tailoring graph and overwrites tailored nodes with new CEs. 11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * After this, the graph is destroyed. 11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The nodes array can then be used only as a source of tailored CEs. 11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void makeTailoredCEs() { 11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationWeights primaries = new CollationWeights(); 11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationWeights secondaries = new CollationWeights(); 11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationWeights tertiaries = new CollationWeights(); 11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long[] nodesArray = nodes.getBuffer(); 11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("\nCollationBuilder.makeTailoredCEs()"); 11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) { 11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = rootPrimaryIndexes.elementAti(rpi); 11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodesArray[i]; 11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long p = weight32FromNode(node); 11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int s = p == 0 ? 0 : Collation.COMMON_WEIGHT16; 11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int t = s; 11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int q = 0; 11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean pIsTailored = false; 11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean sIsTailored = false; 11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean tIsTailored = false; 11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.printf("\nprimary %x\n", alignWeightRight((int)p)); 11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pIndex = p == 0 ? 0 : rootElements.findPrimary(p); 11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextIndex = nextIndexFromNode(node); 11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(nextIndex != 0) { 11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = nextIndex; 11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = nodesArray[i]; 11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextIndex = nextIndexFromNode(node); 11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int strength = strengthFromNode(node); 11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.QUATERNARY) { 11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(isTailoredNode(node)); 11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print(" quat+ "); 11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(q == 3) { 11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ U_BUFFER_OVERFLOW_ERROR 11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException("quaternary tailoring gap too small"); 11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++q; 11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.TERTIARY) { 11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print(" ter+ "); 11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!tIsTailored) { 11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // First tailored tertiary node for [p, s]. 11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int tCount = countTailoredNodes(nodesArray, nextIndex, 11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collator.TERTIARY) + 1; 11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int tLimit; 11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(t == 0) { 11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Gap at the beginning of the tertiary CE range. 11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = rootElements.getTertiaryBoundary() - 0x100; 11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tLimit = (int)rootElements.getFirstTertiaryCE() & Collation.ONLY_TERTIARY_MASK; 11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!pIsTailored && !sIsTailored) { 11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // p and s are root weights. 11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tLimit = rootElements.getTertiaryAfter(pIndex, s, t); 11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(t == Collation.BEFORE_WEIGHT16) { 11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tLimit = Collation.COMMON_WEIGHT16; 11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [p, s] is tailored. 11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(t == Collation.COMMON_WEIGHT16); 11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tLimit = rootElements.getTertiaryBoundary(); 11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(tLimit == 0x4000 || (tLimit & ~Collation.ONLY_TERTIARY_MASK) == 0); 11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tertiaries.initForTertiary(); 11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!tertiaries.allocWeights(t, tLimit, tCount)) { 12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ U_BUFFER_OVERFLOW_ERROR 12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException("tertiary tailoring gap too small"); 12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tIsTailored = true; 12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = (int)tertiaries.nextWeight(); 12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(t != 0xffffffff); 12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = weight16FromNode(node); 12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tIsTailored = false; 12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.printf(" ter %x\n", alignWeightRight(t)); 12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength == Collator.SECONDARY) { 12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print(" sec+ "); 12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!sIsTailored) { 12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // First tailored secondary node for p. 12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sCount = countTailoredNodes(nodesArray, nextIndex, 12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collator.SECONDARY) + 1; 12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sLimit; 12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s == 0) { 12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Gap at the beginning of the secondary CE range. 12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = rootElements.getSecondaryBoundary() - 0x100; 12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sLimit = (int)(rootElements.getFirstSecondaryCE() >> 16); 12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!pIsTailored) { 12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // p is a root primary. 12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sLimit = rootElements.getSecondaryAfter(pIndex, s); 12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(s == Collation.BEFORE_WEIGHT16) { 12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sLimit = Collation.COMMON_WEIGHT16; 12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // p is a tailored primary. 12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(s == Collation.COMMON_WEIGHT16); 12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sLimit = rootElements.getSecondaryBoundary(); 12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s == Collation.COMMON_WEIGHT16) { 12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do not tailor into the getSortKey() range of 12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compressed common secondaries. 12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = rootElements.getLastCommonSecondary(); 12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller secondaries.initForSecondary(); 12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!secondaries.allocWeights(s, sLimit, sCount)) { 12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ U_BUFFER_OVERFLOW_ERROR 12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.printf("!secondaries.allocWeights(%x, %x, sCount=%d)\n", 12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller alignWeightRight(s), alignWeightRight(sLimit), 12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller alignWeightRight(sCount)); 12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException("secondary tailoring gap too small"); 12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sIsTailored = true; 12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = (int)secondaries.nextWeight(); 12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(s != 0xffffffff); 12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = weight16FromNode(node); 12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sIsTailored = false; 12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.printf(" sec %x\n", alignWeightRight(s)); 12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* Collator.PRIMARY */ { 12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(isTailoredNode(node)); 12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print("pri+ "); 12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!pIsTailored) { 12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // First tailored primary node in this list. 12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pCount = countTailoredNodes(nodesArray, nextIndex, 12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collator.PRIMARY) + 1; 12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isCompressible = baseData.isCompressiblePrimary(p); 12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long pLimit = 12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rootElements.getPrimaryAfter(p, pIndex, isCompressible); 12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller primaries.initForPrimary(isCompressible); 12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!primaries.allocWeights(p, pLimit, pCount)) { 12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ U_BUFFER_OVERFLOW_ERROR // TODO: introduce a more specific UErrorCode? 12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException("primary tailoring gap too small"); 12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pIsTailored = true; 12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = primaries.nextWeight(); 12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(p != 0xffffffffL); 12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s = Collation.COMMON_WEIGHT16; 12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sIsTailored = false; 12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t = s == 0 ? 0 : Collation.COMMON_WEIGHT16; 12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tIsTailored = false; 12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller q = 0; 12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nodesArray[i] = Collation.makeCE(p, s, t, q); 12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(DEBUG) { 12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.printf("%016x\n", nodesArray[i]); 12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Counts the tailored nodes of the given strength up to the next node 13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * which is either stronger or has an explicit weight of this strength. 13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int countTailoredNodes(long[] nodesArray, int i, int strength) { 13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = 0; 13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i == 0) { break; } 13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long node = nodesArray[i]; 13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) < strength) { break; } 13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strengthFromNode(node) == strength) { 13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isTailoredNode(node)) { 13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++count; 13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = nextIndexFromNode(node); 13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return count; 13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final class CEFinalizer implements CollationDataBuilder.CEModifier { 13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CEFinalizer(long[] ces) { 13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller finalCEs = ces; 13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1330f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public long modifyCE32(int ce32) { 13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(!Collation.isSpecialCE32(ce32)); 13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(CollationBuilder.isTempCE32(ce32)) { 13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // retain case bits 13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return finalCEs[CollationBuilder.indexFromTempCE32(ce32)] | ((ce32 & 0xc0) << 8); 13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.NO_CE; 13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1340f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public long modifyCE(long ce) { 13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(CollationBuilder.isTempCE(ce)) { 13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // retain case bits 13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return finalCEs[CollationBuilder.indexFromTempCE(ce)] | (ce & 0xc000); 13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collation.NO_CE; 13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private long[] finalCEs; 13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Replaces temporary CEs with the final CEs they point to. */ 13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void finalizeCEs() { 13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationDataBuilder newBuilder = new CollationDataBuilder(); 13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newBuilder.initForTailoring(baseData); 13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CEFinalizer finalizer = new CEFinalizer(nodes.getBuffer()); 13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newBuilder.copyFrom(dataBuilder, finalizer); 13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataBuilder = newBuilder; 13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Encodes "temporary CE" data into a CE that fits into the CE32 data structure, 13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with 2-byte primary, 1-byte secondary and 6-bit tertiary, 13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with valid CE byte values. 13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The index must not exceed 20 bits (0xfffff). 13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The strength must fit into 2 bits (Collator.PRIMARY..Collator.QUATERNARY). 13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Temporary CEs are distinguished from real CEs by their use of 13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * secondary weights 06..45 which are otherwise reserved for compressed sort keys. 13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The case bits are unused and available. 13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long tempCEFromIndexAndStrength(int index, int strength) { 13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // CE byte offsets, to ensure valid CE bytes, and case bits 11 13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0x4040000006002000L + 13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // index bits 19..13 -> primary byte 1 = CE bits 63..56 (byte values 40..BF) 13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((long)(index & 0xfe000) << 43) + 13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // index bits 12..6 -> primary byte 2 = CE bits 55..48 (byte values 40..BF) 13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((long)(index & 0x1fc0) << 42) + 13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // index bits 5..0 -> secondary byte 1 = CE bits 31..24 (byte values 06..45) 13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((index & 0x3f) << 24) + 13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // strength bits 1..0 -> tertiary byte 1 = CE bits 13..8 (byte values 20..23) 13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (strength << 8); 13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int indexFromTempCE(long tempCE) { 13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tempCE -= 0x4040000006002000L; 13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((int)(tempCE >> 43) & 0xfe000) | 13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((int)(tempCE >> 42) & 0x1fc0) | 13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((int)(tempCE >> 24) & 0x3f); 13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int strengthFromTempCE(long tempCE) { 13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((int)tempCE >> 8) & 3; 13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isTempCE(long ce) { 13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sec = (int)ce >>> 24; 14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 6 <= sec && sec <= 0x45; 14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int indexFromTempCE32(int tempCE32) { 14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tempCE32 -= 0x40400620; 14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((tempCE32 >> 11) & 0xfe000) | 14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((tempCE32 >> 10) & 0x1fc0) | 14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((tempCE32 >> 8) & 0x3f); 14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isTempCE32(int ce32) { 14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (ce32 & 0xff) >= 2 && // not a long-primary/long-secondary CE32 14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6 <= ((ce32 >> 8) & 0xff) && ((ce32 >> 8) & 0xff) <= 0x45; 14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int ceStrength(long ce) { 14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isTempCE(ce) ? strengthFromTempCE(ce) : 14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (ce & 0xff00000000000000L) != 0 ? Collator.PRIMARY : 14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((int)ce & 0xff000000) != 0 ? Collator.SECONDARY : 14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ce != 0 ? Collator.TERTIARY : 14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collator.IDENTICAL; 14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** At most 1M nodes, limited by the 20 bits in node bit fields. */ 14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int MAX_INDEX = 0xfffff; 14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Node bit 6 is set on a primary node if there are nodes 14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with secondary values below the common secondary weight (05). 14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int HAS_BEFORE2 = 0x40; 14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Node bit 5 is set on a primary or secondary node if there are nodes 14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with tertiary values below the common tertiary weight (05). 14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int HAS_BEFORE3 = 0x20; 14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Node bit 3 distinguishes a tailored node, which has no weight value, 14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from a node with an explicit (root or default) weight. 14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int IS_TAILORED = 8; 14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long nodeFromWeight32(long weight32) { 14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return weight32 << 32; 14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long nodeFromWeight16(int weight16) { 14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (long)weight16 << 48; 14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long nodeFromPreviousIndex(int previous) { 14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (long)previous << 28; 14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long nodeFromNextIndex(int next) { 14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return next << 8; 14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long nodeFromStrength(int strength) { 14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return strength; 14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long weight32FromNode(long node) { 14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return node >>> 32; 14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int weight16FromNode(long node) { 14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (int)(node >> 48) & 0xffff; 14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int previousIndexFromNode(long node) { 14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (int)(node >> 28) & MAX_INDEX; 14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int nextIndexFromNode(long node) { 14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((int)node >> 8) & MAX_INDEX; 14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int strengthFromNode(long node) { 14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (int)node & 3; 14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean nodeHasBefore2(long node) { 14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (node & HAS_BEFORE2) != 0; 14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean nodeHasBefore3(long node) { 14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (node & HAS_BEFORE3) != 0; 14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean nodeHasAnyBefore(long node) { 14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (node & (HAS_BEFORE2 | HAS_BEFORE3)) != 0; 14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isTailoredNode(long node) { 14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (node & IS_TAILORED) != 0; 14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long changeNodePreviousIndex(long node, int previous) { 14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (node & 0xffff00000fffffffL) | nodeFromPreviousIndex(previous); 14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static long changeNodeNextIndex(long node, int next) { 14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (node & 0xfffffffff00000ffL) | nodeFromNextIndex(next); 14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Normalizer2 nfd, fcd; 14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Normalizer2Impl nfcImpl; 14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CollationTailoring base; 14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CollationData baseData; 15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CollationRootElements rootElements; 15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private long variableTop; 15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CollationDataBuilder dataBuilder; 15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean fastLatinEnabled; 15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet optimizeSet = new UnicodeSet(); 15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH]; 15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int cesLength; 15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Indexes of nodes with root primary weights, sorted by primary. 15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compact form of a TreeMap from root primary to node index. 15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is a performance optimization for finding reset positions. 15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Without this, we would have to search through the entire nodes list. 15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It also allows storing root primary weights in list head nodes, 15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * without previous index, leaving room in root primary nodes for 32-bit primary weights. 15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UVector32 rootPrimaryIndexes; 15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Data structure for assigning tailored weights and CEs. 15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Doubly-linked lists of nodes in mostly collation order. 15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Each list starts with a root primary node and ends with a nextIndex of 0. 15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When there are any nodes in the list, then there is always a root primary node at index 0. 15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This allows some code not to have to check explicitly for nextIndex==0. 15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Root primary nodes have 32-bit weights but do not have previous indexes. 15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All other nodes have at most 16-bit weights and do have previous indexes. 15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Nodes with explicit weights store root collator weights, 15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or default weak weights (e.g., secondary 05) for stronger nodes. 15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "Tailored" nodes, with the IS_TAILORED bit set, 15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * do not store explicit weights but rather 15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * create a difference of a certain strength from the preceding node. 15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A root node is followed by either 15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - a root/default node of the same strength, or 15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - a root/default node of the next-weaker strength, or 15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - a tailored node of the same strength. 15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A node of a given strength normally implies "common" weights on weaker levels. 15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A node with HAS_BEFORE2 must be immediately followed by 15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a secondary node with an explicit below-common weight, then a secondary tailored node, 15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and later an explicit common-secondary node. 15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The below-common weight can be a root weight, 15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or it can be BEFORE_WEIGHT16 for tailoring before an implied common weight 15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or before the lowest root weight. 15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (&[before 2] resets to an explicit secondary node so that 15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the following addRelation(secondary) tailors right after that. 15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If we did not have this node and instead were to reset on the primary node, 15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16 node.) 15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the flag is not set, then there are no explicit secondary nodes 15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with the common or lower weights. 15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Same for HAS_BEFORE3 for tertiary nodes and weights. 15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A node must not have both flags set. 15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Tailored CEs are initially represented in a CollationDataBuilder as temporary CEs 15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * which point to stable indexes in this list, 15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and temporary CEs stored in a CollationDataBuilder only point to tailored nodes. 15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A temporary CE in the ces[] array may point to a non-tailored reset-before-position node, 15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * until the next relation is added. 15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * At the end, the tailored weights are allocated as necessary, 15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then the tailored nodes are replaced with final CEs, 15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and the CollationData is rewritten by replacing temporary CEs with final ones. 15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We cannot simply insert new nodes in the middle of the array 15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * because that would invalidate the indexes stored in existing temporary CEs. 15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We need to use a linked graph with stable indexes to existing nodes. 15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A doubly-linked list seems easiest to maintain. 15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Each node is stored as an long, with its fields stored as bit fields. 15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Root primary node: 15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - primary weight: 32 bits 63..32 15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - reserved/unused/zero: 4 bits 31..28 15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Weaker root nodes & tailored nodes: 15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - a weight: 16 bits 63..48 15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * + a root or default weight for a non-tailored node 15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * + unused/zero for a tailored node 15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - index to the previous node: 20 bits 47..28 15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All types of nodes: 15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - index to the next node: 20 bits 27..8 15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * + nextIndex=0 in last node per root-primary list 15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - reserved/unused/zero bits: bits 7, 4, 2 15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - HAS_BEFORE2: bit 6 15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - HAS_BEFORE3: bit 5 15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - IS_TAILORED: bit 3 15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - the difference strength (primary/secondary/tertiary/quaternary): 2 bits 1..0 15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We could allocate structs with pointers, but we would have to store them 15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in a pointer list so that they can be indexed from temporary CEs, 16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and they would require more memory allocations. 16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UVector64 nodes; 16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 1604