12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2004-2015, International Business Machines 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Corporation and others. All Rights Reserved. 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * file name: UCaseProps.java 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * encoding: US-ASCII 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * tab size: 8 (not used) 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indentation:4 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * created on: 2005jan29 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * created by: Markus W. Scherer 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Low-level Unicode character/string case mapping code. 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Java port of ucase.h/.c. 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl; 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer; 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator; 283ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubertimport java.util.Locale; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty; 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16; 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet; 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException; 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale; 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 371537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/** 381537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android 39836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */ 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class UCaseProps { 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // constructors etc. --------------------------------------------------- *** 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // port of ucase_openProps() 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UCaseProps() throws IOException { 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME); 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller readData(bytes); 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final void readData(ByteBuffer bytes) throws IOException { 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // read the header 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ICUBinary.readHeader(bytes, FMT, new IsAcceptable()); 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // read indexes[] 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count=bytes.getInt(); 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(count<IX_TOP) { 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IOException("indexes[0] too small in "+DATA_FILE_NAME); 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexes=new int[count]; 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexes[0]=count; 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=1; i<count; ++i) { 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller indexes[i]=bytes.getInt(); 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // read the trie 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller trie=Trie2_16.createFromSerialized(bytes); 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int expectedTrieLength=indexes[IX_TRIE_SIZE]; 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int trieLength=trie.getSerializedLength(); 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trieLength>expectedTrieLength) { 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie"); 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // skip padding after trie bytes 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength); 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // read exceptions[] 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count=indexes[IX_EXC_LENGTH]; 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(count>0) { 793ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert exceptions=ICUBinary.getString(bytes, count, 0); 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // read unfold[] 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count=indexes[IX_UNFOLD_LENGTH]; 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(count>0) { 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unfold=ICUBinary.getChars(bytes, count, 0); 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // implement ICUBinary.Authenticate 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final static class IsAcceptable implements ICUBinary.Authenticate { 91f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDataVersionAcceptable(byte version[]) { 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return version[0]==3; 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set of property starts for UnicodeSet ------------------------------- *** 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final void addPropertyStarts(UnicodeSet set) { 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point of each same-value range of the trie */ 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=trie.iterator(); 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(range.startCodePoint); 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add code points with hardcoded properties, plus the ones following them */ 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* (none right now, see comment below) */ 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Omit code points with hardcoded specialcasing properties 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * because we do not build property UnicodeSets for them right now. 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data access primitives ---------------------------------------------- *** 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int getExceptionsOffset(int props) { 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return props>>EXC_SHIFT; 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final boolean propsHasException(int props) { 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (props&EXCEPTION)!=0; 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* number of bits in an 8-bit integer value */ 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte flagsOffset[/*256*/]={ 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final boolean hasSlot(int flags, int index) { 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (flags&(1<<index))!=0; 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte slotOffset(int flags, int index) { 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return flagsOffset[flags&((1<<index)-1)]; 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Get the value of an optional-value slot where hasSlot(excWord, index). 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param excWord (in) initial exceptions word 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param index (in) desired slot index 1583ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++); 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return bits 31..0: slot value 160f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final long getSlotValueAndOffset(int excWord, int index, int excOffset) { 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long value; 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((excWord&EXC_DOUBLE_SLOTS)==0) { 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=slotOffset(excWord, index); 1663ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert value=exceptions.charAt(excOffset); 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=2*slotOffset(excWord, index); 1693ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert value=exceptions.charAt(excOffset++); 1703ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert value=(value<<16)|exceptions.charAt(excOffset); 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return value |((long)excOffset<<32); 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* same as getSlotValueAndOffset() but does not return the slot offset */ 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int getSlotValue(int excWord, int index, int excOffset) { 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value; 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((excWord&EXC_DOUBLE_SLOTS)==0) { 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=slotOffset(excWord, index); 1803ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert value=exceptions.charAt(excOffset); 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=2*slotOffset(excWord, index); 1833ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert value=exceptions.charAt(excOffset++); 1843ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert value=(value<<16)|exceptions.charAt(excOffset); 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return value; 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // simple case mappings ------------------------------------------------ *** 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int tolower(int c) { 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props=trie.get(c); 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)>=UPPER) { 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c+=getDelta(props); 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props); 1993ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_LOWER)) { 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=getSlotValue(excWord, EXC_LOWER, excOffset); 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int toupper(int c) { 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props=trie.get(c); 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)==LOWER) { 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c+=getDelta(props); 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props); 2153ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_UPPER)) { 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=getSlotValue(excWord, EXC_UPPER, excOffset); 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int totitle(int c) { 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props=trie.get(c); 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)==LOWER) { 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c+=getDelta(props); 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props); 2313ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index; 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_TITLE)) { 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_TITLE; 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_UPPER)) { 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_UPPER; 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=getSlotValue(excWord, index, excOffset); 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds all simple case mappings and the full case folding for c to sa, 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and also adds special case closure mappings. 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c itself is not added. 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For example, the mappings 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - for s include long s 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - for sharp s include ss 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - for k include the Kelvin sign 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final void addCaseClosure(int c, UnicodeSet set) { 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Hardcode the case closure of i and its relatives and ignore the 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * data file data for these characters. 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The Turkic dotless i and dotted I with their case mapping conditions 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and case folding option make the related characters behave specially. 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This code matches their closure behavior to their case folding behavior. 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(c) { 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x49: 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* regular i and I are in one equivalence class */ 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(0x69); 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x69: 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(0x49); 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x130: 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(iDot); 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x131: 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* dotless i is in a class by itself */ 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* otherwise use the data file data */ 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props=trie.get(c); 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)!=NONE) { 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the one simple case mapping, no matter what type it is */ 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int delta=getDelta(props); 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(delta!=0) { 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c+delta); 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c has exceptions, so there may be multiple simple and/or 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * full case mappings. Add them all. 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset0, excOffset=getExceptionsOffset(props); 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int closureOffset; 2993ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index, closureLength, fullLength, length; 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset0=excOffset; 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add all simple case mappings */ 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(index=EXC_LOWER; index<=EXC_TITLE; ++index) { 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, index)) { 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=excOffset0; 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=getSlotValue(excWord, index, excOffset); 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c); 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* get the closure string pointer & length */ 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_CLOSURE)) { 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=excOffset0; 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset); 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */ 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */ 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closureLength=0; 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closureOffset=0; 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the full case folding */ 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=excOffset0; 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fullLength=(int)value; 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* start of full case mapping strings */ 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=(int)(value>>32)+1; 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fullLength&=0xffff; /* bits 16 and higher are reserved */ 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* skip the lowercase result string */ 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=fullLength&FULL_LOWER; 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fullLength>>=4; 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the full case folding string */ 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length=fullLength&0xf; 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length!=0) { 3423ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert set.add(exceptions.substring(excOffset, excOffset+length)); 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=length; 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* skip the uppercase and titlecase strings */ 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fullLength>>=4; 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=fullLength&0xf; 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fullLength>>=4; 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=fullLength; 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closureOffset=excOffset; /* behind full case mappings */ 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add each code point in the closure string */ 3563ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int limit=closureOffset+closureLength; 3573ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) { 3583ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert c=exceptions.codePointAt(index); 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c); 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * must be s.length()>0 and max>0 and s.length()<=max 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int strcmpMax(String s, int unfoldOffset, int max) { 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i1, length, c1, c2; 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length=s.length(); 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller max-=length; /* we require length<=max, so no need to decrement max in the loop */ 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i1=0; 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c1=s.charAt(i1++); 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c2=unfold[unfoldOffset++]; 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c2==0) { 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 1; /* reached the end of t but not of s */ 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c1-=c2; 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c1!=0) { 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c1; /* return difference result */ 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(--length>0); 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* ends with length==0 */ 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(max==0 || unfold[unfoldOffset]==0) { 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; /* equal to length of both strings */ 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -max; /* return lengh difference */ 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Maps the string to single code points and adds the associated case closure 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * mappings. 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The string is mapped to code points if it is their full case folding string. 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In other words, this performs a reverse full case folding and then 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * adds the case closure items of the resulting code points. 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the string is found and its closure applied, then 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the string itself is added as well as part of its code points' closure. 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the string was found 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean addStringCaseClosure(String s, UnicodeSet set) { 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth; 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(unfold==null || s==null) { 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* no reverse case folding data, or no string */ 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length=s.length(); 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length<=1) { 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* the string is too short to find any match */ 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * more precise would be: 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if(!u_strHasMoreChar32Than(s, length, 1)) 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * but this does not make much practical difference because 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a single supplementary code point would just not be found 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unfoldRows=unfold[UNFOLD_ROWS]; 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH]; 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH]; 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth; 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length>unfoldStringWidth) { 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* the string is too long to find any match */ 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* do a binary search for the string */ 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start=0; 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit=unfoldRows; 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(start<limit) { 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i=(start+limit)/2; 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=strcmpMax(s, unfoldOffset, unfoldStringWidth); 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(result==0) { 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* found the string: add each code point, and its case closure */ 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) { 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i); 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c); 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addCaseClosure(c, set); 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(result<0) { 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit=i; 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* result>0 */ { 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start=i+1; 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* string not found */ 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** @return NONE, LOWER, UPPER, TITLE */ 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int getType(int c) { 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getTypeFromProps(trie.get(c)); 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 466f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert /** @return like getType() but also sets IGNORABLE if c is case-ignorable */ 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int getTypeOrIgnorable(int c) { 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getTypeAndIgnorableFromProps(trie.get(c)); 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */ 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int getDotType(int c) { 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props=trie.get(c); 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return props&DOT_MASK; 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4773ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK; 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean isSoftDotted(int c) { 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getDotType(c)==SOFT_DOTTED; 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean isCaseSensitive(int c) { 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (trie.get(c)&SENSITIVE)!=0; 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // string casing ------------------------------------------------------- *** 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * These internal functions form the core of string case mappings. 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * They map single code points to result code points or strings and take 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * all necessary conditions (context, locale ID, options) into account. 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * They do not iterate over the source or write to the destination 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * so that the same functions are useful for non-standard string storage, 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For the same reason, the "surrounding text" context is passed in as a 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ContextIterator which does not make any assumptions about 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the underlying storage. 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This section contains helper functions that check for conditions 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in the input text surrounding the current code point 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * according to SpecialCasing.txt. 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Each helper function gets the index 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - after the current code point if it looks at following text 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - before the current code point if it looks at preceding text 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Final_Sigma 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * C is preceded by a sequence consisting of 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a cased letter and a case-ignorable sequence, 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and C is not followed by a sequence consisting of 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * an ignorable sequence and then a cased letter. 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * More_Above 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * C is followed by one or more characters of combining class 230 (ABOVE) 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in the combining character sequence. 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * After_Soft_Dotted 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The last preceding character with combining class of zero before C 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * was Soft_Dotted, 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and there is no intervening combining character class 230 (ABOVE). 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Before_Dot 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * C is followed by combining dot above (U+0307). 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Any sequence of characters with a combining class that is neither 0 nor 230 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * may intervene between the current character and the combining dot above. 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The erratum from 2002-10-31 adds the condition 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * After_I 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The last preceding base character was an uppercase I, and there is no 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * intervening combining character class 230 (ABOVE). 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (See Jitterbug 2344 and the comments on After_I below.) 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Helper definitions in Unicode 3.2 UAX 21: 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * D1. A character C is defined to be cased 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if it meets any of the following criteria: 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - The general category of C is Titlecase Letter (Lt) 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - In [CoreProps], C has one of the properties Uppercase, or Lowercase 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - Given D = NFD(C), then it is not the case that: 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (This third criterium does not add any characters to the list 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for Unicode 3.2. Ignored.) 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * D2. A character C is defined to be case-ignorable 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if it meets either of the following criteria: 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * - The general category of C is 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Letter Modifier (Lm), or Symbol Modifier (Sk) 559f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * - C is one of the following characters 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+0027 APOSTROPHE 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+00AD SOFT HYPHEN (SHY) 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+2019 RIGHT SINGLE QUOTATION MARK 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (the preferred character for apostrophe) 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * D3. A case-ignorable sequence is a sequence of 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * zero or more case-ignorable characters. 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Iterator for string case mappings, which need to look at the 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * context (surrounding text) of a given character for conditional mappings. 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The iterator only needs to go backward or forward away from the 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character in question. It does not use any indexes on this interface. 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It does not support random access or an arbitrary change of 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * iteration direction. 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The code point being case-mapped itself is never returned by 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this iterator. 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public interface ContextIterator { 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Reset the iterator for forward or backward iteration. 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param dir >0: Begin iterating forward from the first code point 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * after the one that is being case-mapped. 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <0: Begin iterating backward from the first code point 587f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * before the one that is being case-mapped. 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void reset(int dir); 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Iterate and return the next code point, moving in the direction 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * determined by the reset() call. 593f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * @return Next code point, or <0 when the iteration is done. 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int next(); 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For string case mappings, a single character (a code point) is mapped 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * either to itself (in which case in-place mapping functions do nothing), 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or to another single code point, or to a string. 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Aside from the string contents, these are indicated with a single int 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * value as follows: 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Mapping to self: Negative values (~self instead of -self to support U+0000) 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Mapping to another code point: Positive values >MAX_STRING_LENGTH 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returned. Note that the string result may indeed have zero length. 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAX_STRING_LENGTH=0x1f; 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6143ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert //ivate static final int LOC_UNKNOWN=0; 6153ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert public static final int LOC_ROOT=1; 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int LOC_TURKISH=2; 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int LOC_LITHUANIAN=3; 618f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert static final int LOC_GREEK=4; 6193ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert public static final int LOC_DUTCH=5; 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6213ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert public static final int getCaseLocale(Locale locale) { 6223ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return getCaseLocale(locale.getLanguage()); 6233ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 6243ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert public static final int getCaseLocale(ULocale locale) { 6253ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return getCaseLocale(locale.getLanguage()); 6263ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 6273ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert /** Accepts both 2- and 3-letter language subtags. */ 6283ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert private static final int getCaseLocale(String language) { 6293ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // Check the subtag length to reduce the number of comparisons 6303ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // for locales without special behavior. 6313ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // Fastpath for English "en" which is often used for default (=root locale) case mappings, 6323ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // and for Chinese "zh": Very common but no special case mapping behavior. 6333ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert if(language.length()==2) { 6343ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert if(language.equals("en") || language.charAt(0)>'t') { 6353ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_ROOT; 6363ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("tr") || language.equals("az")) { 6373ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_TURKISH; 6383ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("el")) { 6393ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_GREEK; 6403ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("lt")) { 6413ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_LITHUANIAN; 6423ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("nl")) { 6433ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_DUTCH; 6443ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 6453ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.length()==3) { 6463ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert if(language.equals("tur") || language.equals("aze")) { 6473ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_TURKISH; 6483ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("ell")) { 6493ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_GREEK; 6503ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("lit")) { 6513ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_LITHUANIAN; 6523ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(language.equals("nld")) { 6533ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_DUTCH; 6543ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6563ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return LOC_ROOT; 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */ 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) { 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(iter==null) { 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(iter.reset(dir); (c=iter.next())>=0;) { 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int type=getTypeOrIgnorable(c); 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((type&4)!=0) { 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* case-ignorable, continue with the loop */ 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(type!=NONE) { 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; /* followed by cased letter */ 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* uncased and not case-ignorable */ 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* not followed by cased letter */ 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean isPrecededBySoftDotted(ContextIterator iter) { 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int dotType; 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(iter==null) { 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(iter.reset(-1); (c=iter.next())>=0;) { 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dotType=getDotType(c); 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(dotType==SOFT_DOTTED) { 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; /* preceded by TYPE_i */ 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(dotType!=OTHER_ACCENT) { 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* not preceded by TYPE_i */ 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See Jitterbug 2344: 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The condition After_I for Turkic-lowercasing of U+0307 combining dot above 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * we made those releases compatible with Unicode 3.2 which had not fixed 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a related bug in SpecialCasing.txt. 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * From the Jitterbug 2344 text: 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ... this bug is listed as a Unicode erratum 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <quote> 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * There are two errors in SpecialCasing.txt. 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1. Missing semicolons on two lines. ... [irrelevant for ICU] 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2. An incorrect context definition. Correct as follows: 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * --- 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * where the context After_I is defined as: 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The last preceding base character was an uppercase I, and there is no 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * intervening combining character class 230 (ABOVE). 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </quote> 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * # This matches the behavior of the canonically equivalent I-dot_above 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See also the description in this place in older versions of uchar.c (revision 1.100). 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Markus W. Scherer 2003-feb-15 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Is preceded by base character 'I' with no intervening cc=230 ? */ 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean isPrecededBy_I(ContextIterator iter) { 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int dotType; 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(iter==null) { 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(iter.reset(-1); (c=iter.next())>=0;) { 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==0x49) { 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; /* preceded by I */ 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dotType=getDotType(c); 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(dotType!=OTHER_ACCENT) { 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* preceded by different base character (not I), or intervening cc==230 */ 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* not preceded by I */ 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Is followed by one or more cc==230 ? */ 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean isFollowedByMoreAbove(ContextIterator iter) { 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int dotType; 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(iter==null) { 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(iter.reset(1); (c=iter.next())>=0;) { 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dotType=getDotType(c); 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(dotType==ABOVE) { 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; /* at least one cc==230 following */ 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(dotType!=OTHER_ACCENT) { 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* next base character, no more cc==230 following */ 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* no more cc==230 following */ 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Is followed by a dot above (without cc==230 in between) ? */ 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean isFollowedByDotAbove(ContextIterator iter) { 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int dotType; 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(iter==null) { 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(iter.reset(1); (c=iter.next())>=0; ) { 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==0x307) { 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dotType=getDotType(c); 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(dotType!=OTHER_ACCENT) { 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* next base character or cc==230 in between */ 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; /* no dot above following */ 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller iDot= "i\u0307", 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller jDot= "j\u0307", 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller iOgonekDot= "\u012f\u0307", 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller iDotGrave= "i\u0307\u0300", 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller iDotAcute= "i\u0307\u0301", 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller iDotTilde= "i\u0307\u0303"; 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Get the full lowercase mapping for c. 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c Character to be mapped. 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param iter Character iterator, used for context-sensitive mappings. 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See ContextIterator for details. 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If iter==null then a context-independent result is returned. 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param out If the mapping result is a string, then it is appended to out. 8173ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert * @param caseLocale Case locale value from ucase_getCaseLocale(). 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return Output code point or string length, see MAX_STRING_LENGTH. 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see ContextIterator 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #MAX_STRING_LENGTH 822836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8243ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) { 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result, props; 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=c; 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller props=trie.get(c); 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)>=UPPER) { 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=c+getDelta(props); 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props), excOffset2; 8353ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int full; 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset2=excOffset; 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* use hardcoded conditions and mappings */ 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Test for conditional mappings first 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (otherwise the unconditional default mappings are always taken), 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then test for characters that have unconditional mappings in SpecialCasing.txt, 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then get the UnicodeData.txt mappings. 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8483ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert if( caseLocale==LOC_LITHUANIAN && 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* base characters, find accents above */ 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (((c==0x49 || c==0x4a || c==0x12e) && 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isFollowedByMoreAbove(iter)) || 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* precomposed with accent above, no need to find one */ 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (c==0xcc || c==0xcd || c==0x128)) 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Lithuanian 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Lithuanian retains the dot in a lowercase i when followed by accents. 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Introduce an explicit dot above when lowercasing capital I's and J's 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # whenever there are more accents above. 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8713ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert try { 8723ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert switch(c) { 8733ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert case 0x49: /* LATIN CAPITAL LETTER I */ 8743ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iDot); 8753ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 2; 8763ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert case 0x4a: /* LATIN CAPITAL LETTER J */ 8773ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(jDot); 8783ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 2; 8793ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ 8803ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iOgonekDot); 8813ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 2; 8823ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ 8833ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iDotGrave); 8843ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 3; 8853ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ 8863ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iDotAcute); 8873ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 3; 8883ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ 8893ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iDotTilde); 8903ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 3; 8913ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert default: 8923ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 0; /* will not occur */ 8933ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 8943ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } catch (IOException e) { 8953ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert throw new ICUUncheckedIOException(e); 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* # Turkish and Azeri */ 8983ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(caseLocale==LOC_TURKISH && c==0x130) { 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # The following rules handle those cases. 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x69; 9073ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) { 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # This matches the behavior of the canonically equivalent I-dot_above 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; /* remove the dot (continue without output) */ 9163ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) { 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x131; 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c==0x130) { 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Preserve canonical equivalence for I with dot. Turkic is handled below. 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9303ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert try { 9313ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iDot); 9323ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 2; 9333ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } catch (IOException e) { 9343ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert throw new ICUUncheckedIOException(e); 9353ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if( c==0x3a3 && 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller !isFollowedByCasedLetter(iter, 1) && 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isFollowedByCasedLetter(iter, -1) /* -1=preceded */ 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Special case for final form of sigma 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x3c2; /* greek small final sigma */ 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* no known conditional special case mapping, use a normal mapping */ 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full=(int)value&FULL_LOWER; 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(full!=0) { 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* start of full case mapping strings */ 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=(int)(value>>32)+1; 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9573ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert try { 9583ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // append the lowercase mapping 9593ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(exceptions, excOffset, excOffset+full); 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9613ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert /* return the string length */ 9623ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return full; 9633ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } catch (IOException e) { 9643ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert throw new ICUUncheckedIOException(e); 9653ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_LOWER)) { 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=getSlotValue(excWord, EXC_LOWER, excOffset2); 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (result==c) ? ~result : result; 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* internal */ 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int toUpperOrTitle(int c, ContextIterator iter, 9793ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert Appendable out, 9803ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int loc, 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean upperNotTitle) { 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result; 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props; 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=c; 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller props=trie.get(c); 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)==LOWER) { 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=c+getDelta(props); 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props), excOffset2; 9933ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int full, index; 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset2=excOffset; 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* use hardcoded conditions and mappings */ 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(loc==LOC_TURKISH && c==0x69) { 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Turkish and Azeri 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # The following rules handle those cases. 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # When uppercasing, i turns into a dotted capital I 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x130; 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) { 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Lithuanian 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Lithuanian retains the dot in a lowercase i when followed by accents. 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Remove DOT ABOVE after "i" with upper or titlecase 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; /* remove the dot (continue without output) */ 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* no known conditional special case mapping, use a normal mapping */ 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full=(int)value&0xffff; 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* start of full case mapping strings */ 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=(int)(value>>32)+1; 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* skip the lowercase and case-folding result strings */ 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=full&FULL_LOWER; 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full>>=4; 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=full&0xf; 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full>>=4; 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(upperNotTitle) { 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full&=0xf; 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* skip the uppercase result string */ 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=full&0xf; 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full=(full>>4)&0xf; 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(full!=0) { 10493ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert try { 10503ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // append the result string 10513ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(exceptions, excOffset, excOffset+full); 10523ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert 10533ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert /* return the string length */ 10543ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return full; 10553ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } catch (IOException e) { 10563ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert throw new ICUUncheckedIOException(e); 10573ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) { 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_TITLE; 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_UPPER)) { 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* here, titlecase is same as uppercase */ 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_UPPER; 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ~c; 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=getSlotValue(excWord, index, excOffset2); 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (result==c) ? ~result : result; 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int toFullUpper(int c, ContextIterator iter, 10763ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert Appendable out, 10773ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int caseLocale) { 10783ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return toUpperOrTitle(c, iter, out, caseLocale, true); 10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int toFullTitle(int c, ContextIterator iter, 10823ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert Appendable out, 10833ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int caseLocale) { 10843ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return toUpperOrTitle(c, iter, out, caseLocale, false); 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* case folding ------------------------------------------------------------- */ 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Case folding is similar to lowercasing. 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The result may be a simple mapping, i.e., a single code point, or 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a full mapping, i.e., a string. 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then only the lowercase mapping is stored. 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Some special cases are hardcoded because their conditions cannot be 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * parsed and processed from CaseFolding.txt. 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode 3.2 CaseFolding.txt specifies for its status field: 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # C: common case folding, common mappings shared by both simple and full mappings. 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # S: simple case folding, mappings to single characters where different from F. 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # T: special case for uppercase I and dotted uppercase I 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # - For non-Turkic languages, this mapping is normally not used. 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # Usage: 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # A. To do a simple case folding, use the mappings with status C + S. 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # B. To do a full case folding, use the mappings with status C + F. 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # The mappings with status T can be used or omitted depending on the desired case-folding 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller # behavior. (The default option is to exclude them.) 11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode 3.2 has 'T' mappings as follows: 11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0049; T; 0131; # LATIN CAPITAL LETTER I 11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * while the default mappings for these code points are: 11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0049; C; 0069; # LATIN CAPITAL LETTER I 11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+0130 has no simple case folding (simple-case-folds to itself). 11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bit mask for getting just the options from a string compare options word 11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that are relevant for case folding (of a single string or code point). 113105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 113205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Currently only bit 0 for FOLD_CASE_EXCLUDE_SPECIAL_I. 113305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * It is conceivable that at some point we might use one more bit for using uppercase sharp s. 113405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * It is conceivable that at some point we might want the option to use only simple case foldings 113505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * when operating on strings. 113605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 1137836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 113905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private static final int FOLD_CASE_OPTIONS_MASK = 7; 1140f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* return the simple case folding mapping for c */ 11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final int fold(int c, int options) { 11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props=trie.get(c); 11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)>=UPPER) { 11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c+=getDelta(props); 11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props); 11503ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index; 11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((excWord&EXC_CONDITIONAL_FOLD)!=0) { 11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* special case folding mappings, hardcoded */ 11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { 11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* default mappings */ 11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==0x49) { 11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x69; 11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c==0x130) { 11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* no simple case folding for U+0130 */ 11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Turkic mappings */ 11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==0x49) { 11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x131; 11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c==0x130) { 11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x69; 11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_FOLD)) { 11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_FOLD; 11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_LOWER)) { 11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_LOWER; 11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=getSlotValue(excWord, index, excOffset); 11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Issue for canonical caseless match (UAX #21): 11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve 11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * canonical equivalence, unlike default-option casefolding. 11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For example, I-grave and I + grave fold to strings that are not canonically 11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * equivalent. 11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For more details, see the comment in unorm_compare() in unorm.cpp 11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and the intermediate prototype changes for Jitterbug 2021. 11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) 11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This did not get fixed because it appears that it is not possible to fix 11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * it for uppercase and lowercase characters (I-grave vs. i-grave) 11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * together in a way that they still fold to common result strings. 11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12013ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert public final int toFullFolding(int c, Appendable out, int options) { 12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result; 12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int props; 12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=c; 12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller props=trie.get(c); 12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!propsHasException(props)) { 12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(getTypeFromProps(props)>=UPPER) { 12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=c+getDelta(props); 12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int excOffset=getExceptionsOffset(props), excOffset2; 12133ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert int excWord=exceptions.charAt(excOffset++); 12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int full, index; 12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset2=excOffset; 12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((excWord&EXC_CONDITIONAL_FOLD)!=0) { 12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* use hardcoded conditions and mappings */ 12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { 12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* default mappings */ 12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==0x49) { 12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x69; 12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c==0x130) { 12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 12273ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert try { 12283ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(iDot); 12293ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return 2; 12303ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } catch (IOException e) { 12313ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert throw new ICUUncheckedIOException(e); 12323ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Turkic mappings */ 12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==0x49) { 12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x131; 12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c==0x130) { 12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x69; 12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full=(int)value&0xffff; 12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* start of full case mapping strings */ 12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset=(int)(value>>32)+1; 12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* skip the lowercase result string */ 12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller excOffset+=full&FULL_LOWER; 12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full=(full>>4)&0xf; 12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(full!=0) { 12563ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert try { 12573ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // append the result string 12583ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert out.append(exceptions, excOffset, excOffset+full); 12593ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert 12603ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert /* return the string length */ 12613ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return full; 12623ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } catch (IOException e) { 12633ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert throw new ICUUncheckedIOException(e); 12643ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert } 12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasSlot(excWord, EXC_FOLD)) { 12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_FOLD; 12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(hasSlot(excWord, EXC_LOWER)) { 12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index=EXC_LOWER; 12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ~c; 12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result=getSlotValue(excWord, index, excOffset2); 12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (result==c) ? ~result : result; 12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* case mapping properties API ---------------------------------------------- */ 12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We need a StringBuilder for multi-code point output from the 12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * full case mapping functions. However, we do not actually use that output, 12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * we just check whether the input character was mapped to anything else. 12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We use a shared StringBuilder to avoid allocating a new one in each call. 12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We remove its contents each time so that it does not grow large over time. 12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12901fba789ac68efdd9120a7373f49daef42833e674Neil Fuller * @internal 12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final StringBuilder dummyStringBuilder = new StringBuilder(); 12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean hasBinaryProperty(int c, int which) { 12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(which) { 12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.LOWERCASE: 12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return LOWER==getType(c); 12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.UPPERCASE: 12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UPPER==getType(c); 13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.SOFT_DOTTED: 13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isSoftDotted(c); 13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CASE_SENSITIVE: 13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isCaseSensitive(c); 13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CASED: 13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return NONE!=getType(c); 13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CASE_IGNORABLE: 13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (getTypeOrIgnorable(c)>>2)!=0; 13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note: The following Changes_When_Xyz are defined as testing whether 13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the NFD form of the input changes when Xyz-case-mapped. 13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * However, this simpler implementation of these properties, 13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ignoring NFD, passes the tests. 13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The implementation needs to be changed if the tests start failing. 13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When that happens, optimizations should be used to work with the 13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * per-single-code point ucase_toFullXyz() functions unless 13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the NFD form has more than one code point, 13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and the property starts set needs to be the union of the 13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * start sets for normalization and case mappings. 13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CHANGES_WHEN_LOWERCASED: 13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dummyStringBuilder.setLength(0); 13223ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0; 13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CHANGES_WHEN_UPPERCASED: 13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dummyStringBuilder.setLength(0); 13253ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0; 13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CHANGES_WHEN_TITLECASED: 13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dummyStringBuilder.setLength(0); 13283ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0; 13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */ 13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.CHANGES_WHEN_CASEMAPPED: 13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dummyStringBuilder.setLength(0); 13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 13333ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 || 13343ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 || 13353ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0; 13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data members -------------------------------------------------------- *** 13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int indexes[]; 13433ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert private String exceptions; 13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private char unfold[]; 13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Trie2_16 trie; 13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // data format constants ----------------------------------------------- *** 13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String DATA_NAME="ucase"; 13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String DATA_TYPE="icu"; 13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE; 13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* format "cAsE" */ 13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int FMT=0x63415345; 13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* indexes into indexes[] */ 13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int IX_INDEX_TOP=0; 13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int IX_LENGTH=1; 13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int IX_TRIE_SIZE=2; 13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int IX_EXC_LENGTH=3; 13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int IX_UNFOLD_LENGTH=4; 13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int IX_MAX_FULL_LENGTH=15; 13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int IX_TOP=16; 13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // definitions for 16-bit case properties word ------------------------- *** 13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 2-bit constants for types of cased characters */ 13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int TYPE_MASK=3; 13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int NONE=0; 13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int LOWER=1; 13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int UPPER=2; 13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int TITLE=3; 13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1375f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert /** @return NONE, LOWER, UPPER, TITLE */ 13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int getTypeFromProps(int props) { 13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return props&TYPE_MASK; 13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1380f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert /** @return like getTypeFromProps() but also sets IGNORABLE if props indicate case-ignorable */ 13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int getTypeAndIgnorableFromProps(int props) { 13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return props&7; 13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1385f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert static final int IGNORABLE=4; 13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int SENSITIVE= 8; 13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXCEPTION= 0x10; 13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int DOT_MASK= 0x60; 13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int NO_DOT= 0; /* normal characters with cc=0 */ 13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int SOFT_DOTTED= 0x20; /* soft-dotted characters with cc=0 */ 13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int ABOVE= 0x40; /* "above" accents with cc=230 */ 13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int OTHER_ACCENT= 0x60; /* other accent character (0<cc!=230) */ 13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* no exception: bits 15..7 are a 9-bit signed case mapping delta */ 13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int DELTA_SHIFT= 7; 13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int DELTA_MASK= 0xff80; 13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int MAX_DELTA= 0xff; 13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int MIN_DELTA= (-MAX_DELTA-1); 14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int getDelta(int props) { 14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (short)props>>DELTA_SHIFT; 14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */ 14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_SHIFT= 5; 14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_MASK= 0xffe0; 14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1); 14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* definitions for 16-bit main exceptions word ------------------------------ */ 14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* first 8 bits indicate values in optional slots */ 14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_LOWER=0; 14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_FOLD=1; 14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_UPPER=2; 14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_TITLE=3; 14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_4=4; /* reserved */ 14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_5=5; /* reserved */ 14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_CLOSURE=6; 14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_FULL_MAPPINGS=7; 14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_ALL_SLOTS=8; /* one past the last slot */ 14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* each slot is 2 uint16_t instead of 1 */ 14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_DOUBLE_SLOTS= 0x100; 14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* reserved: exception bits 11..9 */ 14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */ 14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_DOT_SHIFT=7; 14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* normally stored in the main word, but pushed out for larger exception indexes */ 14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_DOT_MASK= 0x3000; 14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_NO_DOT= 0; 14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_SOFT_DOTTED= 0x1000; 14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_ABOVE= 0x2000; /* "above" accents with cc=230 */ 14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int EXC_OTHER_ACCENT= 0x3000; /* other character (0<cc!=230) */ 14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* complex/conditional mappings */ 14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_CONDITIONAL_SPECIAL= 0x4000; 14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int EXC_CONDITIONAL_FOLD= 0x8000; 14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* definitions for lengths word for full case mappings */ 14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int FULL_LOWER= 0xf; 14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int FULL_FOLDING= 0xf0; 14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int FULL_UPPER= 0xf00; 14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int FULL_TITLE= 0xf000; 14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* maximum lengths */ 14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf; 14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CLOSURE_MAX_LENGTH=0xf; 14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* constants for reverse case folding ("unfold") data */ 14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int UNFOLD_ROWS=0; 14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int UNFOLD_ROW_WIDTH=1; 14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int UNFOLD_STRING_WIDTH=2; 14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * public singleton instance 14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final UCaseProps INSTANCE; 14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This static initializer block must be placed after 14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // other static member initialization 14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static { 14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller INSTANCE = new UCaseProps(); 14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 1472