17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   Copyright (C) 2004-2014, International Business Machines
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   Corporation and others.  All Rights Reserved.
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   file name:  UCaseProps.java
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   encoding:   US-ASCII
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   tab size:   8 (not used)
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   indentation:4
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   created on: 2005jan29
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   created by: Markus W. Scherer
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   Low-level Unicode character/string case mapping code.
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   Java port of ucase.h/.c.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUUncheckedIOException;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class UCaseProps {
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // constructors etc. --------------------------------------------------- ***
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // port of ucase_openProps()
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UCaseProps() throws IOException {
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        readData(bytes);
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final void readData(ByteBuffer bytes) throws IOException {
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // read the header
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ICUBinary.readHeader(bytes, FMT, new IsAcceptable());
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // read indexes[]
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i, count;
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count=bytes.getInt();
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(count<IX_TOP) {
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        indexes=new int[count];
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        indexes[0]=count;
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(i=1; i<count; ++i) {
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            indexes[i]=bytes.getInt();
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // read the trie
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        trie=Trie2_16.createFromSerialized(bytes);
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int expectedTrieLength=indexes[IX_TRIE_SIZE];
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int trieLength=trie.getSerializedLength();
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(trieLength>expectedTrieLength) {
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // skip padding after trie bytes
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength);
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // read exceptions[]
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count=indexes[IX_EXC_LENGTH];
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(count>0) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            exceptions=new char[count];
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(i=0; i<count; ++i) {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                exceptions[i]=bytes.getChar();
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // read unfold[]
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count=indexes[IX_UNFOLD_LENGTH];
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(count>0) {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            unfold=new char[count];
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(i=0; i<count; ++i) {
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                unfold[i]=bytes.getChar();
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // implement ICUBinary.Authenticate
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final static class IsAcceptable implements ICUBinary.Authenticate {
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // @Override when we switch to Java 6
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public boolean isDataVersionAcceptable(byte version[]) {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return version[0]==3;
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // set of property starts for UnicodeSet ------------------------------- ***
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final void addPropertyStarts(UnicodeSet set) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add the start code point of each same-value range of the trie */
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator<Trie2.Range> trieIterator=trie.iterator();
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Trie2.Range range;
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(range.startCodePoint);
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add code points with hardcoded properties, plus the ones following them */
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* (none right now, see comment below) */
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Omit code points with hardcoded specialcasing properties
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * because we do not build property UnicodeSets for them right now.
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // data access primitives ---------------------------------------------- ***
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getExceptionsOffset(int props) {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return props>>EXC_SHIFT;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean propsHasException(int props) {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (props&EXCEPTION)!=0;
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* number of bits in an 8-bit integer value */
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte flagsOffset[/*256*/]={
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean hasSlot(int flags, int index) {
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (flags&(1<<index))!=0;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte slotOffset(int flags, int index) {
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return flagsOffset[flags&((1<<index)-1)];
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the value of an optional-value slot where hasSlot(excWord, index).
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param excWord (in) initial exceptions word
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param index (in) desired slot index
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return bits 31..0: slot value
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final long getSlotValueAndOffset(int excWord, int index, int excOffset) {
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long value;
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((excWord&EXC_DOUBLE_SLOTS)==0) {
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset+=slotOffset(excWord, index);
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            value=exceptions[excOffset];
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset+=2*slotOffset(excWord, index);
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            value=exceptions[excOffset++];
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            value=(value<<16)|exceptions[excOffset];
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return value |((long)excOffset<<32);
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* same as getSlotValueAndOffset() but does not return the slot offset */
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final int getSlotValue(int excWord, int index, int excOffset) {
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int value;
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((excWord&EXC_DOUBLE_SLOTS)==0) {
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset+=slotOffset(excWord, index);
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            value=exceptions[excOffset];
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset+=2*slotOffset(excWord, index);
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            value=exceptions[excOffset++];
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            value=(value<<16)|exceptions[excOffset];
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return value;
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // simple case mappings ------------------------------------------------ ***
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int tolower(int c) {
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props=trie.get(c);
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)>=UPPER) {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c+=getDelta(props);
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props);
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_LOWER)) {
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=getSlotValue(excWord, EXC_LOWER, excOffset);
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c;
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int toupper(int c) {
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props=trie.get(c);
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)==LOWER) {
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c+=getDelta(props);
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props);
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_UPPER)) {
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=getSlotValue(excWord, EXC_UPPER, excOffset);
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c;
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int totitle(int c) {
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props=trie.get(c);
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)==LOWER) {
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c+=getDelta(props);
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props);
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index;
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_TITLE)) {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_TITLE;
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_UPPER)) {
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_UPPER;
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return c;
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=getSlotValue(excWord, index, excOffset);
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c;
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Adds all simple case mappings and the full case folding for c to sa,
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and also adds special case closure mappings.
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * c itself is not added.
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For example, the mappings
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - for s include long s
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - for sharp s include ss
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - for k include the Kelvin sign
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final void addCaseClosure(int c, UnicodeSet set) {
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Hardcode the case closure of i and its relatives and ignore the
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * data file data for these characters.
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * The Turkic dotless i and dotted I with their case mapping conditions
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * and case folding option make the related characters behave specially.
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * This code matches their closure behavior to their case folding behavior.
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        switch(c) {
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x49:
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* regular i and I are in one equivalence class */
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(0x69);
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x69:
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(0x49);
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x130:
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(iDot);
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x131:
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* dotless i is in a class by itself */
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        default:
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* otherwise use the data file data */
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props=trie.get(c);
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)!=NONE) {
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* add the one simple case mapping, no matter what type it is */
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int delta=getDelta(props);
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(delta!=0) {
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    set.add(c+delta);
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * c has exceptions, so there may be multiple simple and/or
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * full case mappings. Add them all.
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset0, excOffset=getExceptionsOffset(props);
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int closureOffset;
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index, closureLength, fullLength, length;
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset0=excOffset;
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* add all simple case mappings */
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(index=EXC_LOWER; index<=EXC_TITLE; ++index) {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(hasSlot(excWord, index)) {
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    excOffset=excOffset0;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c=getSlotValue(excWord, index, excOffset);
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    set.add(c);
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* get the closure string pointer & length */
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_CLOSURE)) {
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset=excOffset0;
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                closureLength=0;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                closureOffset=0;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* add the full case folding */
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset=excOffset0;
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fullLength=(int)value;
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* start of full case mapping strings */
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset=(int)(value>>32)+1;
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fullLength&=0xffff; /* bits 16 and higher are reserved */
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* skip the lowercase result string */
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset+=fullLength&FULL_LOWER;
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fullLength>>=4;
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* add the full case folding string */
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                length=fullLength&0xf;
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(length!=0) {
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    set.add(new String(exceptions, excOffset, length));
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    excOffset+=length;
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* skip the uppercase and titlecase strings */
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fullLength>>=4;
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset+=fullLength&0xf;
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fullLength>>=4;
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset+=fullLength;
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                closureOffset=excOffset; /* behind full case mappings */
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* add each code point in the closure string */
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                set.add(c);
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * must be s.length()>0 and max>0 and s.length()<=max
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final int strcmpMax(String s, int unfoldOffset, int max) {
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i1, length, c1, c2;
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        length=s.length();
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        max-=length; /* we require length<=max, so no need to decrement max in the loop */
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        i1=0;
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c1=s.charAt(i1++);
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c2=unfold[unfoldOffset++];
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c2==0) {
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 1; /* reached the end of t but not of s */
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c1-=c2;
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c1!=0) {
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return c1; /* return difference result */
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while(--length>0);
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* ends with length==0 */
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(max==0 || unfold[unfoldOffset]==0) {
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0; /* equal to length of both strings */
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return -max; /* return lengh difference */
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Maps the string to single code points and adds the associated case closure
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * mappings.
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string is mapped to code points if it is their full case folding string.
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In other words, this performs a reverse full case folding and then
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * adds the case closure items of the resulting code points.
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the string is found and its closure applied, then
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the string itself is added as well as part of its code points' closure.
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if the string was found
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final boolean addStringCaseClosure(String s, UnicodeSet set) {
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(unfold==null || s==null) {
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false; /* no reverse case folding data, or no string */
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        length=s.length();
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(length<=1) {
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* the string is too short to find any match */
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * more precise would be:
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * if(!u_strHasMoreChar32Than(s, length, 1))
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * but this does not make much practical difference because
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * a single supplementary code point would just not be found
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unfoldRows=unfold[UNFOLD_ROWS];
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(length>unfoldStringWidth) {
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* the string is too long to find any match */
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* do a binary search for the string */
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        start=0;
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit=unfoldRows;
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(start<limit) {
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i=(start+limit)/2;
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result=strcmpMax(s, unfoldOffset, unfoldStringWidth);
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(result==0) {
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* found the string: add each code point, and its case closure */
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c;
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    set.add(c);
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    addCaseClosure(c, set);
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(result<0) {
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit=i;
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else /* result>0 */ {
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start=i+1;
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false; /* string not found */
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** @return NONE, LOWER, UPPER, TITLE */
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int getType(int c) {
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getTypeFromProps(trie.get(c));
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int getTypeOrIgnorable(int c) {
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getTypeAndIgnorableFromProps(trie.get(c));
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int getDotType(int c) {
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props=trie.get(c);
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return props&DOT_MASK;
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final boolean isSoftDotted(int c) {
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getDotType(c)==SOFT_DOTTED;
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final boolean isCaseSensitive(int c) {
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (trie.get(c)&SENSITIVE)!=0;
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // string casing ------------------------------------------------------- ***
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * These internal functions form the core of string case mappings.
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * They map single code points to result code points or strings and take
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * all necessary conditions (context, locale ID, options) into account.
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * They do not iterate over the source or write to the destination
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * so that the same functions are useful for non-standard string storage,
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For the same reason, the "surrounding text" context is passed in as a
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ContextIterator which does not make any assumptions about
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the underlying storage.
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This section contains helper functions that check for conditions
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in the input text surrounding the current code point
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * according to SpecialCasing.txt.
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Each helper function gets the index
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - after the current code point if it looks at following text
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - before the current code point if it looks at preceding text
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Final_Sigma
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   C is preceded by a sequence consisting of
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     a cased letter and a case-ignorable sequence,
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   and C is not followed by a sequence consisting of
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     an ignorable sequence and then a cased letter.
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * More_Above
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   C is followed by one or more characters of combining class 230 (ABOVE)
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   in the combining character sequence.
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * After_Soft_Dotted
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   The last preceding character with combining class of zero before C
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   was Soft_Dotted,
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   and there is no intervening combining character class 230 (ABOVE).
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Before_Dot
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   C is followed by combining dot above (U+0307).
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   Any sequence of characters with a combining class that is neither 0 nor 230
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   may intervene between the current character and the combining dot above.
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The erratum from 2002-10-31 adds the condition
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * After_I
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   The last preceding base character was an uppercase I, and there is no
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   intervening combining character class 230 (ABOVE).
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   (See Jitterbug 2344 and the comments on After_I below.)
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Helper definitions in Unicode 3.2 UAX 21:
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * D1. A character C is defined to be cased
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     if it meets any of the following criteria:
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - The general category of C is Titlecase Letter (Lt)
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - Given D = NFD(C), then it is not the case that:
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     (This third criterium does not add any characters to the list
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *      for Unicode 3.2. Ignored.)
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * D2. A character C is defined to be case-ignorable
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     if it meets either of the following criteria:
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - The general category of C is
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Letter Modifier (Lm), or Symbol Modifier (Sk)
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - C is one of the following characters
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     U+0027 APOSTROPHE
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     U+00AD SOFT HYPHEN (SHY)
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     U+2019 RIGHT SINGLE QUOTATION MARK
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            (the preferred character for apostrophe)
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * D3. A case-ignorable sequence is a sequence of
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     zero or more case-ignorable characters.
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Iterator for string case mappings, which need to look at the
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * context (surrounding text) of a given character for conditional mappings.
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator only needs to go backward or forward away from the
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character in question. It does not use any indexes on this interface.
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It does not support random access or an arbitrary change of
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration direction.
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code point being case-mapped itself is never returned by
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this iterator.
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public interface ContextIterator {
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Reset the iterator for forward or backward iteration.
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param dir >0: Begin iterating forward from the first code point
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * after the one that is being case-mapped.
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *            <0: Begin iterating backward from the first code point
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * before the one that is being case-mapped.
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void reset(int dir);
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Iterate and return the next code point, moving in the direction
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * determined by the reset() call.
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return Next code point, or <0 when the iteration is done.
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int next();
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For string case mappings, a single character (a code point) is mapped
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * either to itself (in which case in-place mapping functions do nothing),
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or to another single code point, or to a string.
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Aside from the string contents, these are indicated with a single int
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * value as follows:
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Mapping to self: Negative values (~self instead of -self to support U+0000)
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Mapping to another code point: Positive values >MAX_STRING_LENGTH
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned. Note that the string result may indeed have zero length.
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAX_STRING_LENGTH=0x1f;
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int LOC_UNKNOWN=0;
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int LOC_ROOT=1;
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int LOC_TURKISH=2;
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int LOC_LITHUANIAN=3;
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Checks and caches the type of locale ID as it is relevant for case mapping.
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the locCache is not null, then it must be initialized with locCache[0]=0 .
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getCaseLocale(ULocale locale, int[] locCache) {
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result;
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result=LOC_ROOT;
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String language=locale.getLanguage();
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result=LOC_TURKISH;
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(language.equals("lt") || language.equals("lit")) {
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result=LOC_LITHUANIAN;
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(locCache!=null) {
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            locCache[0]=result;
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* Is followed by {case-ignorable}* cased  ? (dir determines looking forward/backward) */
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) {
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(iter==null) {
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(iter.reset(dir); (c=iter.next())>=0;) {
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int type=getTypeOrIgnorable(c);
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((type&4)!=0) {
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* case-ignorable, continue with the loop */
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(type!=NONE) {
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true; /* followed by cased letter */
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false; /* uncased and not case-ignorable */
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false; /* not followed by cased letter */
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final boolean isPrecededBySoftDotted(ContextIterator iter) {
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int dotType;
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(iter==null) {
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(iter.reset(-1); (c=iter.next())>=0;) {
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dotType=getDotType(c);
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(dotType==SOFT_DOTTED) {
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true; /* preceded by TYPE_i */
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(dotType!=OTHER_ACCENT) {
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false; /* not preceded by TYPE_i */
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See Jitterbug 2344:
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * we made those releases compatible with Unicode 3.2 which had not fixed
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a related bug in SpecialCasing.txt.
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * From the Jitterbug 2344 text:
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ... this bug is listed as a Unicode erratum
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <quote>
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * There are two errors in SpecialCasing.txt.
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 2. An incorrect context definition. Correct as follows:
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ---
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * where the context After_I is defined as:
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The last preceding base character was an uppercase I, and there is no
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * intervening combining character class 230 (ABOVE).
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </quote>
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * # This matches the behavior of the canonically equivalent I-dot_above
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See also the description in this place in older versions of uchar.c (revision 1.100).
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Markus W. Scherer 2003-feb-15
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* Is preceded by base character 'I' with no intervening cc=230 ? */
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final boolean isPrecededBy_I(ContextIterator iter) {
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int dotType;
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(iter==null) {
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(iter.reset(-1); (c=iter.next())>=0;) {
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c==0x49) {
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true; /* preceded by I */
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dotType=getDotType(c);
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(dotType!=OTHER_ACCENT) {
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false; /* preceded by different base character (not I), or intervening cc==230 */
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false; /* not preceded by I */
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* Is followed by one or more cc==230 ? */
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final boolean isFollowedByMoreAbove(ContextIterator iter) {
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int dotType;
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(iter==null) {
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(iter.reset(1); (c=iter.next())>=0;) {
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dotType=getDotType(c);
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(dotType==ABOVE) {
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true; /* at least one cc==230 following */
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(dotType!=OTHER_ACCENT) {
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false; /* next base character, no more cc==230 following */
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false; /* no more cc==230 following */
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* Is followed by a dot above (without cc==230 in between) ? */
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final boolean isFollowedByDotAbove(ContextIterator iter) {
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int dotType;
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(iter==null) {
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(iter.reset(1); (c=iter.next())>=0; ) {
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c==0x307) {
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dotType=getDotType(c);
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(dotType!=OTHER_ACCENT) {
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false; /* next base character or cc==230 in between */
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false; /* no dot above following */
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iDot=       "i\u0307",
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        jDot=       "j\u0307",
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iOgonekDot= "\u012f\u0307",
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iDotGrave=  "i\u0307\u0300",
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iDotAcute=  "i\u0307\u0301",
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iDotTilde=  "i\u0307\u0303";
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the full lowercase mapping for c.
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c Character to be mapped.
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param iter Character iterator, used for context-sensitive mappings.
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             See ContextIterator for details.
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             If iter==null then a context-independent result is returned.
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param out If the mapping result is a string, then it is appended to out.
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale Locale ID for locale-dependent mappings.
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                 the locale ID for subsequent calls.
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                 Can be null.
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return Output code point or string length, see MAX_STRING_LENGTH.
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see ContextIterator
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #MAX_STRING_LENGTH
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int toFullLower(int c, ContextIterator iter,
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 StringBuilder out,
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 ULocale locale, int[] locCache) {
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result, props;
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result=c;
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        props=trie.get(c);
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)>=UPPER) {
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result=c+getDelta(props);
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props), excOffset2;
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int full;
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset2=excOffset;
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* use hardcoded conditions and mappings */
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int loc=getCaseLocale(locale, locCache);
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /*
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * Test for conditional mappings first
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 *   (otherwise the unconditional default mappings are always taken),
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * then test for characters that have unconditional mappings in SpecialCasing.txt,
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * then get the UnicodeData.txt mappings.
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 */
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( loc==LOC_LITHUANIAN &&
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* base characters, find accents above */
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        (((c==0x49 || c==0x4a || c==0x12e) &&
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            isFollowedByMoreAbove(iter)) ||
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* precomposed with accent above, no need to find one */
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        (c==0xcc || c==0xcd || c==0x128))
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Lithuanian
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Lithuanian retains the dot in a lowercase i when followed by accents.
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Introduce an explicit dot above when lowercasing capital I's and J's
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # whenever there are more accents above.
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    switch(c) {
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    case 0x49:  /* LATIN CAPITAL LETTER I */
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(iDot);
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 2;
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    case 0x4a:  /* LATIN CAPITAL LETTER J */
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(jDot);
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 2;
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(iOgonekDot);
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 2;
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(iDotGrave);
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 3;
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(iDotAcute);
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 3;
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(iDotTilde);
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 3;
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    default:
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0; /* will not occur */
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* # Turkish and Azeri */
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(loc==LOC_TURKISH && c==0x130) {
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # The following rules handle those cases.
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0x69;
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # This matches the behavior of the canonically equivalent I-dot_above
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0; /* remove the dot (continue without output) */
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0x131;
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c==0x130) {
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Preserve canonical equivalence for I with dot. Turkic is handled below.
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    out.append(iDot);
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 2;
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(  c==0x3a3 &&
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            !isFollowedByCasedLetter(iter, 1) &&
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            isFollowedByCasedLetter(iter, -1) /* -1=preceded */
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Special case for final form of sigma
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0x3c2; /* greek small final sigma */
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* no known conditional special case mapping, use a normal mapping */
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                full=(int)value&FULL_LOWER;
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(full!=0) {
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* start of full case mapping strings */
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    excOffset=(int)(value>>32)+1;
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* set the output pointer to the lowercase mapping */
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    out.append(exceptions, excOffset, full);
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* return the string length */
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return full;
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_LOWER)) {
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result=getSlotValue(excWord, EXC_LOWER, excOffset2);
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (result==c) ? ~result : result;
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* internal */
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final int toUpperOrTitle(int c, ContextIterator iter,
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                     StringBuilder out,
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                     ULocale locale, int[] locCache,
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                     boolean upperNotTitle) {
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result;
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props;
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result=c;
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        props=trie.get(c);
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)==LOWER) {
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result=c+getDelta(props);
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props), excOffset2;
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int full, index;
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset2=excOffset;
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* use hardcoded conditions and mappings */
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int loc=getCaseLocale(locale, locCache);
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(loc==LOC_TURKISH && c==0x69) {
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Turkish and Azeri
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # The following rules handle those cases.
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # When uppercasing, i turns into a dotted capital I
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    */
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0x130;
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) {
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Lithuanian
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Lithuanian retains the dot in a lowercase i when followed by accents.
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        # Remove DOT ABOVE after "i" with upper or titlecase
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0; /* remove the dot (continue without output) */
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* no known conditional special case mapping, use a normal mapping */
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                full=(int)value&0xffff;
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* start of full case mapping strings */
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset=(int)(value>>32)+1;
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* skip the lowercase and case-folding result strings */
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset+=full&FULL_LOWER;
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                full>>=4;
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset+=full&0xf;
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                full>>=4;
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(upperNotTitle) {
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    full&=0xf;
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* skip the uppercase result string */
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    excOffset+=full&0xf;
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    full=(full>>4)&0xf;
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(full!=0) {
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* set the output pointer to the result string */
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    out.append(exceptions, excOffset, full);
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* return the string length */
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return full;
10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) {
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_TITLE;
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_UPPER)) {
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* here, titlecase is same as uppercase */
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_UPPER;
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return ~c;
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result=getSlotValue(excWord, index, excOffset2);
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (result==c) ? ~result : result;
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int toFullUpper(int c, ContextIterator iter,
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 StringBuilder out,
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 ULocale locale, int[] locCache) {
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return toUpperOrTitle(c, iter, out, locale, locCache, true);
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int toFullTitle(int c, ContextIterator iter,
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 StringBuilder out,
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 ULocale locale, int[] locCache) {
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return toUpperOrTitle(c, iter, out, locale, locCache, false);
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* case folding ------------------------------------------------------------- */
10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Case folding is similar to lowercasing.
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The result may be a simple mapping, i.e., a single code point, or
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a full mapping, i.e., a string.
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then only the lowercase mapping is stored.
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Some special cases are hardcoded because their conditions cannot be
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * parsed and processed from CaseFolding.txt.
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode 3.2 CaseFolding.txt specifies for its status field:
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    # C: common case folding, common mappings shared by both simple and full mappings.
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    # S: simple case folding, mappings to single characters where different from F.
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    # T: special case for uppercase I and dotted uppercase I
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #    - For non-Turkic languages, this mapping is normally not used.
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    # Usage:
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #  A. To do a simple case folding, use the mappings with status C + S.
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #  B. To do a full case folding, use the mappings with status C + F.
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #    The mappings with status T can be used or omitted depending on the desired case-folding
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    #    behavior. (The default option is to exclude them.)
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode 3.2 has 'T' mappings as follows:
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    0049; T; 0131; # LATIN CAPITAL LETTER I
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * while the default mappings for these code points are:
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    0049; C; 0069; # LATIN CAPITAL LETTER I
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * U+0130 has no simple case folding (simple-case-folds to itself).
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit mask for getting just the options from a string compare options word
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that are relevant for case folding (of a single string or code point).
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int FOLD_CASE_OPTIONS_MASK = 0xff;
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* return the simple case folding mapping for c */
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int fold(int c, int options) {
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props=trie.get(c);
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)>=UPPER) {
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c+=getDelta(props);
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props);
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index;
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* special case folding mappings, hardcoded */
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* default mappings */
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c==0x49) {
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0x69;
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(c==0x130) {
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* no simple case folding for U+0130 */
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return c;
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* Turkic mappings */
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c==0x49) {
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0x131;
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(c==0x130) {
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0x69;
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_FOLD)) {
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_FOLD;
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_LOWER)) {
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_LOWER;
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return c;
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=getSlotValue(excWord, index, excOffset);
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c;
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Issue for canonical caseless match (UAX #21):
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * canonical equivalence, unlike default-option casefolding.
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For example, I-grave and I + grave fold to strings that are not canonically
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * equivalent.
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For more details, see the comment in unorm_compare() in unorm.cpp
11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and the intermediate prototype changes for Jitterbug 2021.
11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This did not get fixed because it appears that it is not possible to fix
11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * it for uppercase and lowercase characters (I-grave vs. i-grave)
11707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * together in a way that they still fold to common result strings.
11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int toFullFolding(int c, StringBuilder out, int options) {
11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result;
11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int props;
11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result=c;
11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        props=trie.get(c);
11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!propsHasException(props)) {
11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(getTypeFromProps(props)>=UPPER) {
11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result=c+getDelta(props);
11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
11847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excOffset=getExceptionsOffset(props), excOffset2;
11857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int excWord=exceptions[excOffset++];
11867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int full, index;
11877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            excOffset2=excOffset;
11897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* use hardcoded conditions and mappings */
11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* default mappings */
11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c==0x49) {
11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0x69;
11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(c==0x130) {
11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        out.append(iDot);
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 2;
12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* Turkic mappings */
12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c==0x49) {
12057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
12067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0x131;
12077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(c==0x130) {
12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return 0x69;
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
12147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                full=(int)value&0xffff;
12157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* start of full case mapping strings */
12177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset=(int)(value>>32)+1;
12187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* skip the lowercase result string */
12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                excOffset+=full&FULL_LOWER;
12217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                full=(full>>4)&0xf;
12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(full!=0) {
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* set the output pointer to the result string */
12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    out.append(exceptions, excOffset, full);
12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* return the string length */
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return full;
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasSlot(excWord, EXC_FOLD)) {
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_FOLD;
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(hasSlot(excWord, EXC_LOWER)) {
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index=EXC_LOWER;
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return ~c;
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result=getSlotValue(excWord, index, excOffset2);
12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (result==c) ? ~result : result;
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* case mapping properties API ---------------------------------------------- */
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int[] rootLocCache = { LOC_ROOT };
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We need a StringBuilder for multi-code point output from the
12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * full case mapping functions. However, we do not actually use that output,
12517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * we just check whether the input character was mapped to anything else.
12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We use a shared StringBuilder to avoid allocating a new one in each call.
12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We remove its contents each time so that it does not grow large over time.
12547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
12567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
12577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final StringBuilder dummyStringBuilder = new StringBuilder();
12587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final boolean hasBinaryProperty(int c, int which) {
12607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        switch(which) {
12617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.LOWERCASE:
12627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return LOWER==getType(c);
12637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.UPPERCASE:
12647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UPPER==getType(c);
12657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.SOFT_DOTTED:
12667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return isSoftDotted(c);
12677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CASE_SENSITIVE:
12687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return isCaseSensitive(c);
12697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CASED:
12707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return NONE!=getType(c);
12717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CASE_IGNORABLE:
12727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (getTypeOrIgnorable(c)>>2)!=0;
12737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
12747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Note: The following Changes_When_Xyz are defined as testing whether
12757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * the NFD form of the input changes when Xyz-case-mapped.
12767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * However, this simpler implementation of these properties,
12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * ignoring NFD, passes the tests.
12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * The implementation needs to be changed if the tests start failing.
12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * When that happens, optimizations should be used to work with the
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * per-single-code point ucase_toFullXyz() functions unless
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * the NFD form has more than one code point,
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * and the property starts set needs to be the union of the
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * start sets for normalization and case mappings.
12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CHANGES_WHEN_LOWERCASED:
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dummyStringBuilder.setLength(0);
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CHANGES_WHEN_UPPERCASED:
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dummyStringBuilder.setLength(0);
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CHANGES_WHEN_TITLECASED:
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dummyStringBuilder.setLength(0);
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case UProperty.CHANGES_WHEN_CASEMAPPED:
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dummyStringBuilder.setLength(0);
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        default:
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // data members -------------------------------------------------------- ***
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int indexes[];
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private char exceptions[];
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private char unfold[];
13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Trie2_16 trie;
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // data format constants ----------------------------------------------- ***
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String DATA_NAME="ucase";
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String DATA_TYPE="icu";
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE;
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* format "cAsE" */
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int FMT=0x63415345;
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* indexes into indexes[] */
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int IX_INDEX_TOP=0;
13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int IX_LENGTH=1;
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int IX_TRIE_SIZE=2;
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int IX_EXC_LENGTH=3;
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int IX_UNFOLD_LENGTH=4;
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int IX_MAX_FULL_LENGTH=15;
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int IX_TOP=16;
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // definitions for 16-bit case properties word ------------------------- ***
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* 2-bit constants for types of cased characters */
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int TYPE_MASK=3;
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int NONE=0;
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int LOWER=1;
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int UPPER=2;
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int TITLE=3;
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getTypeFromProps(int props) {
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return props&TYPE_MASK;
13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getTypeAndIgnorableFromProps(int props) {
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return props&7;
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int IGNORABLE=   4;
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int SENSITIVE=     8;
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXCEPTION=     0x10;
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int DOT_MASK=      0x60;
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int NO_DOT=        0;      /* normal characters with cc=0 */
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int SOFT_DOTTED=   0x20;   /* soft-dotted characters with cc=0 */
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int ABOVE=         0x40;   /* "above" accents with cc=230 */
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int OTHER_ACCENT=  0x60;   /* other accent character (0<cc!=230) */
13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* no exception: bits 15..7 are a 9-bit signed case mapping delta */
13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int DELTA_SHIFT=   7;
13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int DELTA_MASK=    0xff80;
13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int MAX_DELTA=     0xff;
13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int MIN_DELTA=     (-MAX_DELTA-1);
13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int getDelta(int props) {
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (short)props>>DELTA_SHIFT;
13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */
13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_SHIFT=     5;
13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_MASK=      0xffe0;
13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1);
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* definitions for 16-bit main exceptions word ------------------------------ */
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* first 8 bits indicate values in optional slots */
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_LOWER=0;
13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_FOLD=1;
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_UPPER=2;
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_TITLE=3;
13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_4=4;           /* reserved */
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_5=5;           /* reserved */
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_CLOSURE=6;
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_FULL_MAPPINGS=7;
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_ALL_SLOTS=8;   /* one past the last slot */
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* each slot is 2 uint16_t instead of 1 */
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_DOUBLE_SLOTS=          0x100;
13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* reserved: exception bits 11..9 */
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_DOT_SHIFT=7;
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* normally stored in the main word, but pushed out for larger exception indexes */
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_DOT_MASK=              0x3000;
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_NO_DOT=                0;
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_SOFT_DOTTED=           0x1000;
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_ABOVE=                 0x2000; /* "above" accents with cc=230 */
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int EXC_OTHER_ACCENT=          0x3000; /* other character (0<cc!=230) */
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* complex/conditional mappings */
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_CONDITIONAL_SPECIAL=   0x4000;
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EXC_CONDITIONAL_FOLD=      0x8000;
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* definitions for lengths word for full case mappings */
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int FULL_LOWER=    0xf;
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int FULL_FOLDING=  0xf0;
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int FULL_UPPER=    0xf00;
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int FULL_TITLE=    0xf000;
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* maximum lengths */
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf;
14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CLOSURE_MAX_LENGTH=0xf;
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* constants for reverse case folding ("unfold") data */
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int UNFOLD_ROWS=0;
14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int UNFOLD_ROW_WIDTH=1;
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int UNFOLD_STRING_WIDTH=2;
14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * public singleton instance
14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final UCaseProps INSTANCE;
14247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // This static initializer block must be placed after
14267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // other static member initialization
14277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static {
14287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
14297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            INSTANCE = new UCaseProps();
14307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (IOException e) {
14317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUUncheckedIOException(e);
14327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
1435