17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2000-2014, International Business Machines Corporation and
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.CharBuffer;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Norm2AllModes;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UCaseProps;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUCloneNotSupportedException;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unicode Normalization
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <h2>Unicode normalization API</h2>
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>normalize</code> transforms Unicode text into an equivalent composed or
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * decomposed form, allowing for easier sorting and searching of text.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>normalize</code> supports the standard normalization forms described in
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Characters with accents or other adornments can be encoded in
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * several different ways in Unicode.  For example, take the character A-acute.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In Unicode, this can be encoded as a single character (the
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "composed" form):
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or as two separate characters (the "decomposed" form):
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      0041    LATIN CAPITAL LETTER A
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      0301    COMBINING ACUTE ACCENT
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * To a user of your program, however, both of these sequences should be
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * treated as the same "user-level" character "A with acute accent".  When you
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are searching or comparing text, you must ensure that these two sequences are
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * treated equivalently.  In addition, you must handle characters with more than
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * one accent.  Sometimes the order of a character's combining accents is
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * significant, while in other cases accent sequences in different orders are
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * really equivalent.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Similarly, the string "ffi" can be encoded as three separate letters:
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      0066    LATIN SMALL LETTER F
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      0066    LATIN SMALL LETTER F
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      0069    LATIN SMALL LETTER I
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or as the single character
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      FB03    LATIN SMALL LIGATURE FFI
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The ffi ligature is not a distinct semantic character, and strictly speaking
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * it shouldn't be in Unicode at all, but it was included for compatibility
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with existing character sets that already provided it.  The Unicode standard
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * identifies such characters by giving them "compatibility" decompositions
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * into the corresponding semantic characters.  When sorting and searching, you
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will often want to use these mappings.
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>normalize</code> helps solve these problems by transforming text into
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the canonical composed and decomposed forms as shown in the first example
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * above. In addition, you can have it perform compatibility decompositions so
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that you can treat compatibility characters the same as their equivalents.
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finally, <code>normalize</code> rearranges accents into the proper canonical
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * order, so that you do not have to worry about accent rearrangement on your
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * own.
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Form FCD, "Fast C or D", is also designed for collation.
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It allows to work on strings that are not necessarily normalized
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with an algorithm (like in collation) that works under "canonical closure",
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * i.e., it treats precomposed characters and their decomposed equivalents the
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * same.
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It is not a normalization form because it does not provide for uniqueness of
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * representation. Multiple strings may be canonically equivalent (their NFDs
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are identical) and may all conform to FCD without being identical themselves.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The form is defined such that the "raw decomposition", the recursive
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * canonical decomposition of each character, results in a string that is
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * canonically ordered. This means that precomposed characters are allowed for
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * as long as their decompositions do not need canonical reordering.
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Its advantage for a process like collation is that all NFD and most NFC texts
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - and many unnormalized texts - already conform to FCD and do not need to be
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * normalized (NFD) for such a process. The FCD quick check will return YES for
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * most strings in practice.
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * normalize(FCD) may be implemented with NFD.
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence in Applications):
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * http://www.unicode.org/notes/tn5/#FCD
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ICU collation performs either NFD or FCD normalization automatically if
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * normalization is turned on for the collator object. Beyond collation and
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * string search, normalized strings may be useful for string equivalence
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * comparisons, transliteration/transcription, unique representations, etc.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The W3C generally recommends to exchange texts in NFC.
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note also that most legacy character encodings use only precomposed forms and
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * often do not encode any combining marks by themselves. For conversion to such
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * character encodings the Unicode text needs to be normalized to NFC.
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For more usage examples, see the Unicode Standard Annex.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note: The Normalizer class also provides API for iterative normalization.
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * While the setIndex() and getIndex() refer to indices in the
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * underlying Unicode input text, the next() and previous() methods
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * iterate through characters in the normalized output.
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This means that there is not necessarily a one-to-one correspondence
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between characters returned by next() and previous() and the indices
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * passed to and returned from setIndex() and getIndex().
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It is for this reason that Normalizer does not implement the CharacterIterator interface.
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class Normalizer implements Cloneable {
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The input text and our position in it
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UCharacterIterator  text;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Normalizer2         norm2;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Mode                mode;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int                 options;
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The normalization buffer is the result of normalization
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // of the source in [currentIndex..nextIndex[ .
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int                 currentIndex;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int                 nextIndex;
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // A buffer for holding intermediate results
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder       buffer;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int                 bufferPos;
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Helper classes to defer loading of normalization data.
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class ModeImpl {
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private ModeImpl(Normalizer2 n2) {
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            normalizer2 = n2;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final Normalizer2 normalizer2;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFDModeImpl {
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFDInstance());
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFKDModeImpl {
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKDInstance());
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFCModeImpl {
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFCInstance());
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFKCModeImpl {
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKCInstance());
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class FCDModeImpl {
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE = new ModeImpl(Norm2AllModes.getFCDNormalizer2());
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class Unicode32 {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final UnicodeSet INSTANCE = new UnicodeSet("[:age=3.2:]").freeze();
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFD32ModeImpl {
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE =
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFDInstance(),
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                 Unicode32.INSTANCE));
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFKD32ModeImpl {
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE =
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKDInstance(),
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                 Unicode32.INSTANCE));
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFC32ModeImpl {
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE =
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFCInstance(),
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                 Unicode32.INSTANCE));
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFKC32ModeImpl {
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE =
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKCInstance(),
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                 Unicode32.INSTANCE));
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class FCD32ModeImpl {
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final ModeImpl INSTANCE =
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new ModeImpl(new FilteredNormalizer2(Norm2AllModes.getFCDNormalizer2(),
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                 Unicode32.INSTANCE));
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Options bit set value to select Unicode 3.2 normalization
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (except NormalizationCorrections).
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * At most one Unicode version can be selected at a time.
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int UNICODE_3_2=0x20;
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Constant indicating that the end of the iteration has been reached.
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int DONE = UCharacterIterator.DONE;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Constants for normalization modes.
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The Mode class is not intended for public subclassing.
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Only the Mode constants provided by the Normalizer class should be used,
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and any fields or methods should not be called or overridden by users.
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static abstract class Mode {
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Sole constructor
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @internal
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @deprecated This API is ICU internal only.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @Deprecated
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Mode() {
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @internal
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @deprecated This API is ICU internal only.
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        @Deprecated
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected abstract Normalizer2 getNormalizer2(int options);
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NONEMode extends Mode {
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Normalizer2 getNormalizer2(int options) { return Norm2AllModes.NOOP_NORMALIZER2; }
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFDMode extends Mode {
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Normalizer2 getNormalizer2(int options) {
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (options&UNICODE_3_2) != 0 ?
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    NFD32ModeImpl.INSTANCE.normalizer2 : NFDModeImpl.INSTANCE.normalizer2;
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFKDMode extends Mode {
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Normalizer2 getNormalizer2(int options) {
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (options&UNICODE_3_2) != 0 ?
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    NFKD32ModeImpl.INSTANCE.normalizer2 : NFKDModeImpl.INSTANCE.normalizer2;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFCMode extends Mode {
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Normalizer2 getNormalizer2(int options) {
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (options&UNICODE_3_2) != 0 ?
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    NFC32ModeImpl.INSTANCE.normalizer2 : NFCModeImpl.INSTANCE.normalizer2;
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class NFKCMode extends Mode {
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Normalizer2 getNormalizer2(int options) {
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (options&UNICODE_3_2) != 0 ?
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    NFKC32ModeImpl.INSTANCE.normalizer2 : NFKCModeImpl.INSTANCE.normalizer2;
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class FCDMode extends Mode {
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Normalizer2 getNormalizer2(int options) {
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (options&UNICODE_3_2) != 0 ?
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    FCD32ModeImpl.INSTANCE.normalizer2 : FCDModeImpl.INSTANCE.normalizer2;
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * No decomposition/composition.
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode NONE = new NONEMode();
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Canonical decomposition.
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode NFD = new NFDMode();
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compatibility decomposition.
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode NFKD = new NFKDMode();
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Canonical decomposition followed by canonical composition.
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode NFC = new NFCMode();
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Default normalization.
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode DEFAULT = NFC;
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compatibility decomposition followed by canonical composition.
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode NFKC =new NFKCMode();
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * "Fast C or D" form.
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode FCD = new FCDMode();
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Null operation for use with the {@link com.ibm.icu.text.Normalizer constructors}
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and the static {@link #normalize normalize} method.  This value tells
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the <tt>Normalizer</tt> to do nothing but return unprocessed characters
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * from the underlying String or CharacterIterator.  If you have code which
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * requires raw text at some times and normalized text at others, you can
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * use <tt>NO_OP</tt> for the cases where you want raw text, rather
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * than having a separate code path that bypasses <tt>Normalizer</tt>
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * altogether.
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMode
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.8. Use Nomalizer.NONE
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #NONE
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode NO_OP = NONE;
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Canonical decomposition followed by canonical composition.  Used with the
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link com.ibm.icu.text.Normalizer constructors} and the static
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #normalize normalize} method to determine the operation to be
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * performed.
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * off, this operation produces output that is in
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Form</a>
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>C</b>.
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMode
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.8. Use Normalier.NFC
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #NFC
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode COMPOSE = NFC;
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compatibility decomposition followed by canonical composition.
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Used with the {@link com.ibm.icu.text.Normalizer constructors} and the static
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #normalize normalize} method to determine the operation to be
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * performed.
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * off, this operation produces output that is in
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Form</a>
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>KC</b>.
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMode
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.8. Use Normalizer.NFKC
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #NFKC
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode COMPOSE_COMPAT = NFKC;
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Canonical decomposition.  This value is passed to the
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link com.ibm.icu.text.Normalizer constructors} and the static
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #normalize normalize}
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * method to determine the operation to be performed.
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * off, this operation produces output that is in
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Form</a>
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>D</b>.
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMode
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.8. Use Normalizer.NFD
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #NFD
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode DECOMP = NFD;
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compatibility decomposition.  This value is passed to the
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link com.ibm.icu.text.Normalizer constructors} and the static
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #normalize normalize}
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * method to determine the operation to be performed.
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * off, this operation produces output that is in
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Form</a>
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>KD</b>.
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMode
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.8. Use Normalizer.NFKD
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #NFKD
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final Mode DECOMP_COMPAT = NFKD;
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option to disable Hangul/Jamo composition and decomposition.
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This option applies to Korean text,
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * which can be represented either in the Jamo alphabet or in Hangul
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * characters, which are really just two or three Jamo combined
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * into one visual glyph.  Since Jamo takes up more storage space than
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Hangul, applications that process only Hangul text may wish to turn
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this option on when decomposing text.
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The Unicode standard treates Hangul to Jamo conversion as a
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * canonical decomposition, so this option must be turned <b>off</b> if you
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * wish to transform strings into one of the standard
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode Normalization Forms</a>.
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setOption
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.8. This option is no longer supported.
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IGNORE_HANGUL = 0x0001;
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Result values for quickCheck().
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For details see Unicode Technical Report 15.
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final class QuickCheckResult{
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //private int resultValue;
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private QuickCheckResult(int value) {
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //resultValue=value;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Indicates that string is not in the normalized format
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final QuickCheckResult NO = new QuickCheckResult(0);
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Indicates that string is in the normalized format
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final QuickCheckResult YES = new QuickCheckResult(1);
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Indicates it cannot be determined if string is in the normalized
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * format without further thorough checks.
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option bit for compare:
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Case sensitively compare the strings
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int FOLD_CASE_DEFAULT =  UCharacter.FOLD_CASE_DEFAULT;
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option bit for compare:
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Both input strings are assumed to fulfill FCD conditions.
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int INPUT_IS_FCD    =      0x20000;
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option bit for compare:
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Perform case-insensitive comparison.
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMPARE_IGNORE_CASE  =     0x10000;
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option bit for compare:
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compare strings in code point order instead of code unit order.
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMPARE_CODE_POINT_ORDER = 0x8000;
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option value for case folding:
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and dotless i appropriately for Turkic languages (tr, az).
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I;
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Lowest-order bit number of compare() options bits corresponding to
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * normalization options bits.
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The options parameter for compare() uses most bits for
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * itself and for various comparison and folding flags.
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The most significant bits, however, are shifted down and passed on
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to the normalization implementation.
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (That is, from compare(..., options, ...),
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * options>>COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * internal normalization functions.)
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #compare
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMPARE_NORM_OPTIONS_SHIFT  = 20;
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //-------------------------------------------------------------------------
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Iterator constructors
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //-------------------------------------------------------------------------
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creates a new <tt>Normalizer</tt> object for iterating over the
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * normalized form of a given string.
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The <tt>options</tt> parameter specifies which optional
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>Normalizer</tt> features are to be enabled for this object.
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str  The string to be normalized.  The normalization
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              will start at the beginning of the string.
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode The normalization mode.
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param opt Any optional features to be enabled.
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            Currently the only available option is {@link #UNICODE_3_2}.
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            If you want the default behavior corresponding to one of the
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            standard Unicode Normalization Forms, use 0 for this argument.
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer(String str, Mode mode, int opt) {
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.text = UCharacterIterator.getInstance(str);
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.mode = mode;
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.options=opt;
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2 = mode.getNormalizer2(opt);
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        buffer = new StringBuilder();
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creates a new <tt>Normalizer</tt> object for iterating over the
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * normalized form of the given text.
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param iter  The input text to be normalized.  The normalization
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              will start at the beginning of the string.
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode  The normalization mode.
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param opt Any optional features to be enabled.
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            Currently the only available option is {@link #UNICODE_3_2}.
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            If you want the default behavior corresponding to one of the
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            standard Unicode Normalization Forms, use 0 for this argument.
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer(CharacterIterator iter, Mode mode, int opt) {
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.text = UCharacterIterator.getInstance((CharacterIterator)iter.clone());
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.mode = mode;
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.options = opt;
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2 = mode.getNormalizer2(opt);
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        buffer = new StringBuilder();
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creates a new <tt>Normalizer</tt> object for iterating over the
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * normalized form of the given text.
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param iter  The input text to be normalized.  The normalization
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              will start at the beginning of the string.
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode  The normalization mode.
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer(UCharacterIterator iter, Mode mode, int options) {
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.text     = (UCharacterIterator)iter.clone();
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.mode     = mode;
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.options  = options;
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            norm2 = mode.getNormalizer2(options);
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buffer = new StringBuilder();
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (CloneNotSupportedException e) {
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUCloneNotSupportedException(e);
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Clones this <tt>Normalizer</tt> object.  All properties of this
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * object are duplicated in the new object, including the cloning of any
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link CharacterIterator} that was passed in to the constructor
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or to {@link #setText(CharacterIterator) setText}.
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * However, the text storage underlying
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the <tt>CharacterIterator</tt> is not duplicated unless the
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iterator's <tt>clone</tt> method does so.
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Object clone() {
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Normalizer copy = (Normalizer) super.clone();
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.text = (UCharacterIterator) text.clone();
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.mode = mode;
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.options = options;
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.norm2 = norm2;
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.buffer = new StringBuilder(buffer);
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.bufferPos = bufferPos;
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.currentIndex = currentIndex;
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            copy.nextIndex = nextIndex;
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return copy;
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        catch (CloneNotSupportedException e) {
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUCloneNotSupportedException(e);
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //--------------------------------------------------------------------------
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Static Utility methods
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //--------------------------------------------------------------------------
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final Normalizer2 getComposeNormalizer2(boolean compat, int options) {
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (compat ? NFKC : NFC).getNormalizer2(options);
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final Normalizer2 getDecomposeNormalizer2(boolean compat, int options) {
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (compat ? NFKD : NFD).getNormalizer2(options);
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compose a string.
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be composed to according to the specified mode.
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str        The string to compose.
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat     If true the string will be composed according to
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                    NFKC rules and if false will be composed according to
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                    NFC rules.
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String    The composed string
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String compose(String str, boolean compat) {
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return compose(str,compat,0);
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compose a string.
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be composed to according to the specified mode.
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str        The string to compose.
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat     If true the string will be composed according to
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                    NFKC rules and if false will be composed according to
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                    NFC rules.
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options    The only recognized option is UNICODE_3_2
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String    The composed string
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String compose(String str, boolean compat, int options) {
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getComposeNormalizer2(compat, options).normalize(str);
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compose a string.
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be composed to according to the specified mode.
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source The char array to compose.
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target A char buffer to receive the normalized text.
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat If true the char array will be composed according to
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                NFKC rules and if false will be composed according to
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                NFC rules.
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return int   The total buffer size needed;if greater than length of
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                result, the output was truncated.
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException if target.length is less than the
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             required length
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compose(char[] source,char[] target, boolean compat, int options) {
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return compose(source, 0, source.length, target, 0, target.length, compat, options);
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compose a string.
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be composed to according to the specified mode.
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param src       The char array to compose.
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param srcStart  Start index of the source
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param srcLimit  Limit index of the source
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param dest      The char buffer to fill in
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destStart Start index of the destination buffer
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destLimit End index of the destination buffer
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat If true the char array will be composed according to
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                NFKC rules and if false will be composed according to
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                NFC rules.
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return int   The total buffer size needed;if greater than length of
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                result, the output was truncated.
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException if target.length is less than the
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             required length
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compose(char[] src,int srcStart, int srcLimit,
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                              char[] dest,int destStart, int destLimit,
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                              boolean compat, int options) {
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsAppendable app = new CharsAppendable(dest, destStart, destLimit);
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        getComposeNormalizer2(compat, options).normalize(srcBuffer, app);
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return app.length();
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decompose a string.
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be decomposed to according to the specified mode.
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str       The string to decompose.
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat    If true the string will be decomposed according to NFKD
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   rules and if false will be decomposed according to NFD
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   rules.
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String   The decomposed string
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String decompose(String str, boolean compat) {
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return decompose(str,compat,0);
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decompose a string.
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be decomposed to according to the specified mode.
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str     The string to decompose.
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat  If true the string will be decomposed according to NFKD
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                 rules and if false will be decomposed according to NFD
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                 rules.
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String The decomposed string
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String decompose(String str, boolean compat, int options) {
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getDecomposeNormalizer2(compat, options).normalize(str);
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decompose a string.
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be decomposed to according to the specified mode.
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source The char array to decompose.
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target A char buffer to receive the normalized text.
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat If true the char array will be decomposed according to NFKD
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                rules and if false will be decomposed according to
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                NFD rules.
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return int   The total buffer size needed;if greater than length of
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                result,the output was truncated.
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException if the target capacity is less than
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             the required length
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int decompose(char[] source,char[] target, boolean compat, int options) {
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return decompose(source, 0, source.length, target, 0, target.length, compat, options);
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decompose a string.
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be decomposed to according to the specified mode.
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param src       The char array to compose.
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param srcStart  Start index of the source
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param srcLimit  Limit index of the source
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param dest      The char buffer to fill in
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destStart Start index of the destination buffer
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destLimit End index of the destination buffer
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param compat If true the char array will be decomposed according to NFKD
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                rules and if false will be decomposed according to
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                NFD rules.
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return int   The total buffer size needed;if greater than length of
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                result,the output was truncated.
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException if the target capacity is less than
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             the required length
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int decompose(char[] src,int srcStart, int srcLimit,
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                char[] dest,int destStart, int destLimit,
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                boolean compat, int options) {
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsAppendable app = new CharsAppendable(dest, destStart, destLimit);
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        getDecomposeNormalizer2(compat, options).normalize(srcBuffer, app);
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return app.length();
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalizes a <tt>String</tt> using the given normalization operation.
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The <tt>options</tt> parameter specifies which optional
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>Normalizer</tt> features are to be enabled for this operation.
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Currently the only available option is {@link #UNICODE_3_2}.
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If you want the default behavior corresponding to one of the standard
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode Normalization Forms, use 0 for this argument.
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str       the input string to be normalized.
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      the normalization mode
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   the optional features to be enabled.
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String   the normalized string
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String normalize(String str, Mode mode, int options) {
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).normalize(str);
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalize a string.
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be normalized according to the specified normalization
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * mode and options.
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param src        The string to normalize.
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode       The normalization mode; one of Normalizer.NONE,
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                    Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                    Normalizer.NFKD, Normalizer.DEFAULT
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the normalized string
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String normalize(String src,Mode mode) {
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalize(src, mode, 0);
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalize a string.
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be normalized according to the specified normalization
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * mode and options.
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source The char array to normalize.
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target A char buffer to receive the normalized text.
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode   The normalization mode; one of Normalizer.NONE,
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                Normalizer.NFKD, Normalizer.DEFAULT
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return int   The total buffer size needed;if greater than length of
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                result, the output was truncated.
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception    IndexOutOfBoundsException if the target capacity is less
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                than the required length
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int normalize(char[] source,char[] target, Mode  mode, int options) {
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalize(source,0,source.length,target,0,target.length,mode, options);
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalize a string.
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The string will be normalized according to the specified normalization
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * mode and options.
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param src       The char array to compose.
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param srcStart  Start index of the source
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param srcLimit  Limit index of the source
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param dest      The char buffer to fill in
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destStart Start index of the destination buffer
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destLimit End index of the destination buffer
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      The normalization mode; one of Normalizer.NONE,
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   Normalizer.NFKD, Normalizer.DEFAULT
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return int      The total buffer size needed;if greater than length of
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   result, the output was truncated.
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception       IndexOutOfBoundsException if the target capacity is
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   less than the required length
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int normalize(char[] src,int srcStart, int srcLimit,
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                char[] dest,int destStart, int destLimit,
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                Mode  mode, int options) {
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsAppendable app = new CharsAppendable(dest, destStart, destLimit);
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mode.getNormalizer2(options).normalize(srcBuffer, app);
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return app.length();
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalize a codepoint according to the given mode
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param char32    The input string to be normalized.
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      The normalization mode
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                                   The only option that is currently recognized is UNICODE_3_2
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String   The normalized string
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #UNICODE_3_2
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String normalize(int char32, Mode mode, int options) {
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(mode == NFD && options == 0) {
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String decomposition = Normalizer2.getNFCInstance().getDecomposition(char32);
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(decomposition == null) {
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                decomposition = UTF16.valueOf(char32);
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return decomposition;
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalize(UTF16.valueOf(char32), mode, options);
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience method to normalize a codepoint according to the given mode
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param char32    The input string to be normalized.
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      The normalization mode
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String   The normalized string
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String normalize(int char32, Mode mode) {
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalize(char32, mode, 0);
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience method.
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source   string for determining if it is in a normalized format
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode     normalization format (Normalizer.NFC,Normalizer.NFD,
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  Normalizer.NFKC,Normalizer.NFKD)
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return         Return code to specify if the text is normalized or not
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                     (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static QuickCheckResult quickCheck(String source, Mode mode) {
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return quickCheck(source, mode, 0);
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Performing quick check on a string, to quickly determine if the string is
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in a particular normalization format.
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Three types of result can be returned Normalizer.YES, Normalizer.NO or
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * string is in the desired normalized format, Normalizer.NO determines that
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument string is not in the desired normalized format. A
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalizer.MAYBE result indicates that a more thorough check is required,
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the user may have to put the string in its normalized form and compare
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the results.
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source   string for determining if it is in a normalized format
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode     normalization format (Normalizer.NFC,Normalizer.NFD,
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  Normalizer.NFKC,Normalizer.NFKD)
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                                   The only option that is currently recognized is UNICODE_3_2
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return         Return code to specify if the text is normalized or not
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                     (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static QuickCheckResult quickCheck(String source, Mode mode, int options) {
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).quickCheck(source);
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience method.
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source Array of characters for determining if it is in a
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                normalized format
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode   normalization format (Normalizer.NFC,Normalizer.NFD,
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                Normalizer.NFKC,Normalizer.NFKD)
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                                   The only option that is currently recognized is UNICODE_3_2
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return       Return code to specify if the text is normalized or not
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static QuickCheckResult quickCheck(char[] source, Mode mode, int options) {
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return quickCheck(source, 0, source.length, mode, options);
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Performing quick check on a string, to quickly determine if the string is
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in a particular normalization format.
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Three types of result can be returned Normalizer.YES, Normalizer.NO or
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * string is in the desired normalized format, Normalizer.NO determines that
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * argument string is not in the desired normalized format. A
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Normalizer.MAYBE result indicates that a more thorough check is required,
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the user may have to put the string in its normalized form and compare
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the results.
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source    string for determining if it is in a normalized format
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start     the start index of the source
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param limit     the limit index of the source it is equal to the length
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      normalization format (Normalizer.NFC,Normalizer.NFD,
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   Normalizer.NFKC,Normalizer.NFKD)
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                                   The only option that is currently recognized is UNICODE_3_2
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return          Return code to specify if the text is normalized or not
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   (Normalizer.YES, Normalizer.NO or
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   Normalizer.MAYBE)
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static QuickCheckResult quickCheck(char[] source,int start,
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              int limit, Mode mode,int options) {
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharBuffer srcBuffer = CharBuffer.wrap(source, start, limit - start);
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).quickCheck(srcBuffer);
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Test if a string is in a given normalization form.
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is semantically equivalent to source.equals(normalize(source, mode)).
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unlike quickCheck(), this function returns a definitive result,
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * never a "maybe".
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For NFD, NFKD, and FCD, both functions work exactly the same.
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For NFC and NFKC where quickCheck may return "maybe", this function will
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * perform further tests to arrive at a true/false result.
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param src       The input array of characters to be checked to see if
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   it is normalized
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start     The strart index in the source
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param limit     The limit index in the source
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      the normalization mode
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                                   The only option that is currently recognized is UNICODE_3_2
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return Boolean value indicating whether the source string is in the
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         "mode" normalization form
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static boolean isNormalized(char[] src,int start,
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       int limit, Mode mode,
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       int options) {
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharBuffer srcBuffer = CharBuffer.wrap(src, start, limit - start);
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).isNormalized(srcBuffer);
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Test if a string is in a given normalization form.
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is semantically equivalent to source.equals(normalize(source, mode)).
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unlike quickCheck(), this function returns a definitive result,
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * never a "maybe".
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For NFD, NFKD, and FCD, both functions work exactly the same.
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For NFC and NFKC where quickCheck may return "maybe", this function will
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * perform further tests to arrive at a true/false result.
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str       the input string to be checked to see if it is
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   normalized
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      the normalization mode
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  The only option that is currently recognized is UNICODE_3_2
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #isNormalized
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static boolean isNormalized(String str, Mode mode, int options) {
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).isNormalized(str);
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience Method
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param char32    the input code point to be checked to see if it is
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                   normalized
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode      the normalization mode
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   Options for use with exclusion set and tailored Normalization
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  The only option that is currently recognized is UNICODE_3_2
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #isNormalized
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static boolean isNormalized(int char32, Mode mode,int options) {
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isNormalized(UTF16.valueOf(char32), mode, options);
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compare two strings for canonical equivalence.
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Further options include case-insensitive comparison and
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * code point order (as opposed to code unit order).
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Canonical equivalence between two strings is defined as their normalized
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * forms (NFD or NFC) being identical.
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function compares strings incrementally instead of normalizing
10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (and optionally case-folding) both strings entirely,
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * improving performance significantly.
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bulk normalization is only necessary if the strings do not fulfill the
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * FCD conditions. Only in this case, and only if the strings are relatively
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * long, is memory allocated temporarily.
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For FCD strings and short non-FCD strings there is no memory allocation.
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Semantically, this is equivalent to
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * where code point order and foldCase are all optional.
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s1        First source character array.
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s1Start   start index of source
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s1Limit   limit of the source
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s2        Second source character array.
10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s2Start   start index of the source
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s2Limit   limit of the source
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options A bit set of options:
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - FOLD_CASE_DEFAULT or 0 is used for default options:
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Case-sensitive comparison in code unit order, and the input strings
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     are quick-checked for FCD.
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - INPUT_IS_FCD
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set if the caller knows that both s1 and s2 fulfill the FCD
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     conditions.If not set, the function will quickCheck for FCD
10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     and normalize if necessary.
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - COMPARE_CODE_POINT_ORDER
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set to choose code point order instead of code unit order
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - COMPARE_IGNORE_CASE
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set to compare strings case-insensitively using case folding,
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     instead of case-sensitively.
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     If set, then the following case folding options are used.
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return <0 or 0 or >0 as usual for string comparisons
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #normalize
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #FCD
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(char[] s1, int s1Start, int s1Limit,
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                              char[] s2, int s2Start, int s2Limit,
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                              int options) {
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if( s1==null || s1Start<0 || s1Limit<0 ||
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            s2==null || s2Start<0 || s2Limit<0 ||
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            s1Limit<s1Start || s2Limit<s2Start
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ) {
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start),
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                               CharBuffer.wrap(s2, s2Start, s2Limit-s2Start),
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                               options);
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compare two strings for canonical equivalence.
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Further options include case-insensitive comparison and
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * code point order (as opposed to code unit order).
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Canonical equivalence between two strings is defined as their normalized
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * forms (NFD or NFC) being identical.
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function compares strings incrementally instead of normalizing
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (and optionally case-folding) both strings entirely,
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * improving performance significantly.
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bulk normalization is only necessary if the strings do not fulfill the
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * FCD conditions. Only in this case, and only if the strings are relatively
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * long, is memory allocated temporarily.
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For FCD strings and short non-FCD strings there is no memory allocation.
11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Semantically, this is equivalent to
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * where code point order and foldCase are all optional.
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s1 First source string.
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s2 Second source string.
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options A bit set of options:
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - FOLD_CASE_DEFAULT or 0 is used for default options:
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Case-sensitive comparison in code unit order, and the input strings
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     are quick-checked for FCD.
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - INPUT_IS_FCD
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set if the caller knows that both s1 and s2 fulfill the FCD
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     conditions. If not set, the function will quickCheck for FCD
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     and normalize if necessary.
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - COMPARE_CODE_POINT_ORDER
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set to choose code point order instead of code unit order
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - COMPARE_IGNORE_CASE
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set to compare strings case-insensitively using case folding,
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     instead of case-sensitively.
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     If set, then the following case folding options are used.
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return <0 or 0 or >0 as usual for string comparisons
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #normalize
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #FCD
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(String s1, String s2, int options) {
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return internalCompare(s1, s2, options);
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compare two strings for canonical equivalence.
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Further options include case-insensitive comparison and
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * code point order (as opposed to code unit order).
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience method.
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s1 First source string.
11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s2 Second source string.
11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options A bit set of options:
11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - FOLD_CASE_DEFAULT or 0 is used for default options:
11707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Case-sensitive comparison in code unit order, and the input strings
11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     are quick-checked for FCD.
11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - INPUT_IS_FCD
11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set if the caller knows that both s1 and s2 fulfill the FCD
11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     conditions. If not set, the function will quickCheck for FCD
11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     and normalize if necessary.
11777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - COMPARE_CODE_POINT_ORDER
11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set to choose code point order instead of code unit order
11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   - COMPARE_IGNORE_CASE
11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     Set to compare strings case-insensitively using case folding,
11837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     instead of case-sensitively.
11847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     If set, then the following case folding options are used.
11857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return <0 or 0 or >0 as usual for string comparisons
11877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
11887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #normalize
11897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #FCD
11907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(char[] s1, char[] s2, int options) {
11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return internalCompare(CharBuffer.wrap(s1), CharBuffer.wrap(s2), options);
11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience method that can have faster implementation
11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * by not allocating buffers.
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param char32a    the first code point to be checked against the
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param char32b    the second code point
12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options    A bit set of options
12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(int char32a, int char32b, int options) {
12057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return internalCompare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options|INPUT_IS_FCD);
12067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Convenience method that can have faster implementation
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * by not allocating buffers.
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param char32a   the first code point to be checked against
12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param str2      the second string
12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options   A bit set of options
12147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
12157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
12167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(int char32a, String str2, int options) {
12177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return internalCompare(UTF16.valueOf(char32a), str2, options);
12187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* Concatenation of normalized strings --------------------------------- */
12217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Concatenate normalized strings, making sure that the result is normalized
12237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * as well.
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If both the left and the right strings are in
12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the normalization form according to "mode",
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then the result will be
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <code>
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     dest=normalize(left+right, mode)
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </code>
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * With the input strings already being normalized,
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this function will use next() and previous()
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to find the adjacent end pieces of the input strings.
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Only the concatenation of these end pieces will be normalized and
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then concatenated with the remaining parts of the input strings.
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It is allowed to have dest==left to avoid copying the entire left string.
12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param left Left source array, may be same as dest.
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param leftStart start in the left array.
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param leftLimit limit in the left array (==length)
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param right Right source array.
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param rightStart start in the right array.
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param rightLimit limit in the right array (==length)
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param dest The output buffer; can be null if destStart==destLimit==0
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              for pure preflighting.
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destStart start in the destination array
12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param destLimit limit in the destination array (==length)
12517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode The normalization mode.
12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return Length of output (number of chars) when successful or
12547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          IndexOutOfBoundsException
12557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException whose message has the string
12567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             representation of destination capacity required.
12577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #normalize
12587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
12597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous
12607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException if target capacity is less than the
12617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *             required length
12627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
12637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
12647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int concatenate(char[] left,  int leftStart,  int leftLimit,
12657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                  char[] right, int rightStart, int rightLimit,
12667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                  char[] dest,  int destStart,  int destLimit,
12677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                  Normalizer.Mode mode, int options) {
12687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(dest == null) {
12697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
12707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* check for overlapping right and destination */
12737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (right == dest && rightStart < destLimit && destStart < rightLimit) {
12747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("overlapping right and dst ranges");
12757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* allow left==dest */
12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder destBuilder=new StringBuilder(leftLimit-leftStart+rightLimit-rightStart+16);
12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        destBuilder.append(left, leftStart, leftLimit-leftStart);
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharBuffer rightBuffer=CharBuffer.wrap(right, rightStart, rightLimit-rightStart);
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mode.getNormalizer2(options).append(destBuilder, rightBuffer);
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int destLength=destBuilder.length();
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(destLength<=(destLimit-destStart)) {
12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            destBuilder.getChars(0, destLength, dest, destStart);
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return destLength;
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IndexOutOfBoundsException(Integer.toString(destLength));
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Concatenate normalized strings, making sure that the result is normalized
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * as well.
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If both the left and the right strings are in
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the normalization form according to "mode",
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then the result will be
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <code>
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     dest=normalize(left+right, mode)
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </code>
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For details see concatenate
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param left Left source string.
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param right Right source string.
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode The normalization mode.
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return result
13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #concatenate
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #normalize
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #concatenate
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String concatenate(char[] left, char[] right,Mode mode, int options) {
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder dest=new StringBuilder(left.length+right.length+16).append(left);
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).append(dest, CharBuffer.wrap(right)).toString();
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Concatenate normalized strings, making sure that the result is normalized
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * as well.
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If both the left and the right strings are in
13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the normalization form according to "mode",
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then the result will be
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <code>
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     dest=normalize(left+right, mode)
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </code>
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * With the input strings already being normalized,
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this function will use next() and previous()
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to find the adjacent end pieces of the input strings.
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Only the concatenation of these end pieces will be normalized and
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then concatenated with the remaining parts of the input strings.
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param left Left source string.
13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param right Right source string.
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param mode The normalization mode.
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param options The normalization options, ORed together (0 for no options).
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return result
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #concatenate
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #normalize
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #concatenate
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String concatenate(String left, String right, Mode mode, int options) {
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder dest=new StringBuilder(left.length()+right.length()+16).append(left);
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode.getNormalizer2(options).append(dest, right).toString();
13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the FC_NFKC closure value.
13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c The code point whose closure value is to be retrieved
13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param dest The char array to receive the closure value
13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the length of the closure value; 0 if there is none
13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.8
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int getFC_NFKC_Closure(int c,char[] dest) {
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String closure=getFC_NFKC_Closure(c);
13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length=closure.length();
13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(length!=0 && dest!=null && length<=dest.length) {
13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            closure.getChars(0, length, dest, 0);
13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return length;
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the FC_NFKC closure value.
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c The code point whose closure value is to be retrieved
13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return String representation of the closure value; "" if there is none
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.8
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String getFC_NFKC_Closure(int c) {
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Compute the FC_NFKC_Closure on the fly:
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We have the API for complete coverage of Unicode properties, although
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // this value by itself is not useful via API.
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // (What could be useful is a custom normalization table that combines
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // case folding and NFKC.)
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For the derivation, see Unicode's DerivedNormalizationProps.txt.
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer2 nfkc=NFKCModeImpl.INSTANCE.normalizer2;
13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCaseProps csp=UCaseProps.INSTANCE;
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // first: b = NFKC(Fold(a))
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder folded=new StringBuilder();
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int folded1Length=csp.toFullFolding(c, folded, 0);
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(folded1Length<0) {
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Normalizer2Impl nfkcImpl=((Norm2AllModes.Normalizer2WithImpl)nfkc).impl;
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c))!=0) {
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return "";  // c does not change at all under CaseFolding+NFKC
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            folded.appendCodePoint(c);
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(folded1Length>UCaseProps.MAX_STRING_LENGTH) {
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                folded.appendCodePoint(folded1Length);
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String kc1=nfkc.normalize(folded);
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // second: c = NFKC(Fold(b))
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String kc2=nfkc.normalize(UCharacter.foldCase(kc1, 0));
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if (c != b) add the mapping from a to c
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(kc1.equals(kc2)) {
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "";
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return kc2;
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //-------------------------------------------------------------------------
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Iteration API
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //-------------------------------------------------------------------------
14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the current character in the normalized text.
14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int current() {
14247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(bufferPos<buffer.length() || nextNormalize()) {
14257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buffer.codePointAt(bufferPos);
14267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
14277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE;
14287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the next character in the normalized text and advance
14337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iteration position by one.  If the end
14347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the text has already been reached, {@link #DONE} is returned.
14357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
14367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
14377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int next() {
14397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(bufferPos<buffer.length() ||  nextNormalize()) {
14407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=buffer.codePointAt(bufferPos);
14417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            bufferPos+=Character.charCount(c);
14427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
14437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
14447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE;
14457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the previous character in the normalized text and decrement
14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iteration position by one.  If the beginning
14527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the text has already been reached, {@link #DONE} is returned.
14537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
14547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
14557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previous() {
14577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(bufferPos>0 || previousNormalize()) {
14587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=buffer.codePointBefore(bufferPos);
14597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            bufferPos-=Character.charCount(c);
14607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c;
14617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
14627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return DONE;
14637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Reset the index to the beginning of the text.
14687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is equivalent to setIndexOnly(startIndex)).
14697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
14707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset() {
14727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setToStart();
14737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        currentIndex=nextIndex=0;
14747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clearBuffer();
14757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the iteration position in the input text that is being normalized,
14797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * without any immediate normalization.
14807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * After setIndexOnly(), getIndex() will return the same index that is
14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * specified here.
14827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
14837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param index the desired index in the input text.
14847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
14857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setIndexOnly(int index) {
14877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(index);  // validates index
14887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        currentIndex=nextIndex=index;
14897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clearBuffer();
14907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the iteration position in the input text that is being normalized
14947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and return the first normalized character at that position.
14957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
14967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>Note:</b> This method sets the position in the <em>input</em> text,
14977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * while {@link #next} and {@link #previous} iterate through characters
14987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in the normalized <em>output</em>.  This means that there is not
14997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * necessarily a one-to-one correspondence between characters returned
15007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
15017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned from <tt>setIndex</tt> and {@link #getIndex}.
15027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
15037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param index the desired index in the input text.
15047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return   the first normalized character that is the result of iterating
15067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            forward starting at the given index.
15077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IllegalArgumentException if the given index is less than
15097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
15107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 3.2
15117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @obsolete ICU 3.2
15127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
15147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     ///CLOVER:OFF
15157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     public int setIndex(int index) {
15167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         setIndexOnly(index);
15177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         return current();
15187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     }
15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     ///CLOVER:ON
15207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Retrieve the index of the start of the input text. This is the begin
15227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
15237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
15247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.2. Use startIndex() instead.
15257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
15267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #startIndex
15277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getBeginIndex() {
15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0;
15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Retrieve the index of the end of the input text.  This is the end index
15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
15367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * over which this <tt>Normalizer</tt> is iterating
15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated ICU 2.2. Use endIndex() instead.
15387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
15397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #endIndex
15407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
15427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getEndIndex() {
15437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return endIndex();
15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the first character in the normalized text.  This resets
15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the <tt>Normalizer's</tt> position to the beginning of the text.
15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int first() {
15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return next();
15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the last character in the normalized text.  This resets
15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the <tt>Normalizer's</tt> position to be just before the
15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the input text corresponding to that normalized character.
15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The codepoint as an int
15617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int last() {
15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setToLimit();
15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        currentIndex=nextIndex=text.getIndex();
15667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clearBuffer();
15677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return previous();
15687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Retrieve the current iteration position in the input text that is
15727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * being normalized.  This method is useful in applications such as
15737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * searching, where you need to be able to determine the position in
15747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the input text that corresponds to a given normalized output character.
15757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
15767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>Note:</b> This method sets the position in the <em>input</em>, while
15777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #next} and {@link #previous} iterate through characters in the
15787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <em>output</em>.  This means that there is not necessarily a one-to-one
15797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * correspondence between characters returned by <tt>next</tt> and
15807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>previous</tt> and the indices passed to and returned from
15817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>setIndex</tt> and {@link #getIndex}.
15827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The current iteration position
15837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
15847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getIndex() {
15867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(bufferPos<buffer.length()) {
15877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return currentIndex;
15887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return nextIndex;
15907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Retrieve the index of the start of the input text. This is the begin
15957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
15967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
15977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The current iteration position
15987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
15997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int startIndex() {
16017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0;
16027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Retrieve the index of the end of the input text.  This is the end index
16067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
16077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * over which this <tt>Normalizer</tt> is iterating
16087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The current iteration position
16097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
16107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int endIndex() {
16127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return text.getLength();
16137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //-------------------------------------------------------------------------
16167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Iterator attributes
16177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //-------------------------------------------------------------------------
16187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the normalization mode for this object.
16207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
16217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>Note:</b>If the normalization mode is changed while iterating
16227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * over a string, calls to {@link #next} and {@link #previous} may
16237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * return previously buffers characters in the old normalization mode
16247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * until the iteration is able to re-sync at the next base character.
16257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It is safest to call {@link #setText setText()}, {@link #first},
16267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #last}, etc. after calling <tt>setMode</tt>.
16277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
16287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newMode the new mode for this <tt>Normalizer</tt>.
16297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The supported modes are:
16307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <ul>
16317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  <li>{@link #NFC}    - Unicode canonical decompositiion
16327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                        followed by canonical composition.
16337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  <li>{@link #NFKC}   - Unicode compatibility decompositiion
16347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                        follwed by canonical composition.
16357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  <li>{@link #NFD}    - Unicode canonical decomposition
16367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  <li>{@link #NFKD}   - Unicode compatibility decomposition.
16377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  <li>{@link #NONE}   - Do nothing but return characters
16387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                        from the underlying input text.
16397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </ul>
16407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
16417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getMode
16427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
16437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setMode(Mode newMode) {
16457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mode = newMode;
16467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2 = mode.getNormalizer2(options);
16477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the basic operation performed by this <tt>Normalizer</tt>
16507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
16517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMode
16527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
16537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Mode getMode() {
16557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return mode;
16567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set options that affect this <tt>Normalizer</tt>'s operation.
16597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Options do not change the basic composition or decomposition operation
16607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that is being performed , but they control whether
16617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * certain optional portions of the operation are done.
16627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Currently the only available option is:
16637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
16647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <ul>
16657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   <li>{@link #UNICODE_3_2} - Use Normalization conforming to Unicode version 3.2.
16667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </ul>
16677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
16687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param   option  the option whose value is to be set.
16697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param   value   the new setting for the option.  Use <tt>true</tt> to
16707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  turn the option on and <tt>false</tt> to turn it off.
16717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
16727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getOption
16737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
16747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setOption(int option,boolean value) {
16767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (value) {
16777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            options |= option;
16787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
16797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            options &= (~option);
16807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2 = mode.getNormalizer2(options);
16827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Determine whether an option is turned on or off.
16867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
16877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setOption
16887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
16897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getOption(int option) {
16917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((options & option)!=0) {
16927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 1 ;
16937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
16947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
16957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the underlying text storage
17007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param fillIn the char buffer to fill the UTF-16 units.
17017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         The length of the buffer should be equal to the length of the
17027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         underlying text storage
17037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IndexOutOfBoundsException If the index passed for the array is invalid.
17047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see   #getLength
17057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getText(char[] fillIn) {
17087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return text.getText(fillIn);
17097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the length of underlying text storage
17137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the length
17147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getLength() {
17177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return text.getLength();
17187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the text under iteration as a string
17227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a copy of the text under iteration.
17237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getText() {
17267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return text.getText();
17277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the input text over which this <tt>Normalizer</tt> will iterate.
17317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iteration position is set to the beginning of the input text.
17327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText   The new string to be normalized.
17337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(StringBuffer newText) {
17367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
17377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (newIter == null) {
17387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalStateException("Could not create a new UCharacterIterator");
17397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text = newIter;
17417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
17427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the input text over which this <tt>Normalizer</tt> will iterate.
17467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iteration position is set to the beginning of the input text.
17477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText   The new string to be normalized.
17487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(char[] newText) {
17517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
17527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (newIter == null) {
17537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalStateException("Could not create a new UCharacterIterator");
17547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text = newIter;
17567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
17577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the input text over which this <tt>Normalizer</tt> will iterate.
17617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iteration position is set to the beginning of the input text.
17627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText   The new string to be normalized.
17637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(String newText) {
17667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
17677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (newIter == null) {
17687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalStateException("Could not create a new UCharacterIterator");
17697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text = newIter;
17717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
17727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the input text over which this <tt>Normalizer</tt> will iterate.
17767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iteration position is set to the beginning of the input text.
17777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText   The new string to be normalized.
17787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(CharacterIterator newText) {
17817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
17827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (newIter == null) {
17837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalStateException("Could not create a new UCharacterIterator");
17847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text = newIter;
17867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
17877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the input text over which this <tt>Normalizer</tt> will iterate.
17917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iteration position is set to the beginning of the string.
17927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText   The new string to be normalized.
17937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
17947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(UCharacterIterator newText) {
17967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try{
17977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UCharacterIterator newIter = (UCharacterIterator)newText.clone();
17987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (newIter == null) {
17997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalStateException("Could not create a new UCharacterIterator");
18007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text = newIter;
18027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            reset();
18037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }catch(CloneNotSupportedException e) {
18047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUCloneNotSupportedException("Could not clone the UCharacterIterator", e);
18057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
18077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void clearBuffer() {
18097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        buffer.setLength(0);
18107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        bufferPos=0;
18117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
18127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean nextNormalize() {
18147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clearBuffer();
18157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        currentIndex=nextIndex;
18167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(nextIndex);
18177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Skip at least one character so we make progress.
18187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c=text.nextCodePoint();
18197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(c<0) {
18207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
18217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder segment=new StringBuilder().appendCodePoint(c);
18237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while((c=text.nextCodePoint())>=0) {
18247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(norm2.hasBoundaryBefore(c)) {
18257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                text.moveCodePointIndex(-1);
18267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
18277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            segment.appendCodePoint(c);
18297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nextIndex=text.getIndex();
18317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2.normalize(segment, buffer);
18327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return buffer.length()!=0;
18337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
18347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean previousNormalize() {
18367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clearBuffer();
18377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nextIndex=currentIndex;
18387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(currentIndex);
18397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder segment=new StringBuilder();
18407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
18417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while((c=text.previousCodePoint())>=0) {
18427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<=0xffff) {
18437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                segment.insert(0, (char)c);
18447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
18457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                segment.insert(0, Character.toChars(c));
18467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(norm2.hasBoundaryBefore(c)) {
18487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
18497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        currentIndex=text.getIndex();
18527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2.normalize(segment, buffer);
18537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        bufferPos=buffer.length();
18547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return buffer.length()!=0;
18557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
18567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* compare canonically equivalent ------------------------------------------- */
18587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: Broaden the public compare(String, String, options) API like this. Ticket #7407
18607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int internalCompare(CharSequence s1, CharSequence s2, int options) {
18617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int normOptions=options>>>COMPARE_NORM_OPTIONS_SHIFT;
18627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        options|= COMPARE_EQUIV;
18637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
18657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * UAX #21 Case Mappings, as fixed for Unicode version 4
18667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * (see Jitterbug 2021), defines a canonical caseless match as
18677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
18687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * A string X is a canonical caseless match
18697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * for a string Y if and only if
18707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
18717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
18727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * For better performance, we check for FCD (or let the caller tell us that
18737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * both strings are in FCD) for the inner normalization.
18747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
18757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * case-folding preserves the FCD-ness of a string.
18767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * The outer normalization is then only performed by NormalizerImpl.cmpEquivFold()
18777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * when there is a difference.
18787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
18797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Exception: When using the Turkic case-folding option, we do perform
18807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * full NFD first. This is because in the Turkic case precomposed characters
18817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * with 0049 capital I or 0069 small i fold differently whether they
18827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * are first decomposed or not, so an FCD check - a check only for
18837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * canonical order - is not sufficient.
18847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
18857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((options&INPUT_IS_FCD)==0 || (options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
18867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Normalizer2 n2;
18877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
18887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                n2=NFD.getNormalizer2(normOptions);
18897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
18907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                n2=FCD.getNormalizer2(normOptions);
18917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // check if s1 and/or s2 fulfill the FCD conditions
18947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int spanQCYes1=n2.spanQuickCheckYes(s1);
18957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int spanQCYes2=n2.spanQuickCheckYes(s2);
18967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
18977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
18987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * ICU 2.4 had a further optimization:
18997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * If both strings were not in FCD, then they were both NFD'ed,
19007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * and the COMPARE_EQUIV option was turned off.
19017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * It is not entirely clear that this is valid with the current
19027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * definition of the canonical caseless match.
19037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Therefore, ICU 2.6 removes that optimization.
19047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
19057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(spanQCYes1<s1.length()) {
19077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder fcd1=new StringBuilder(s1.length()+16).append(s1, 0, spanQCYes1);
19087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s1=n2.normalizeSecondAndAppend(fcd1, s1.subSequence(spanQCYes1, s1.length()));
19097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(spanQCYes2<s2.length()) {
19117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder fcd2=new StringBuilder(s2.length()+16).append(s2, 0, spanQCYes2);
19127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s2=n2.normalizeSecondAndAppend(fcd2, s2.subSequence(spanQCYes2, s2.length()));
19137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
19147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
19157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return cmpEquivFold(s1, s2, options);
19177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
19187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
19207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compare two strings for canonical equivalence.
19217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Further options include case-insensitive comparison and
19227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * code point order (as opposed to code unit order).
19237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In this function, canonical equivalence is optional as well.
19257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If canonical equivalence is tested, then both strings must fulfill
19267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the FCD check.
19277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Semantically, this is equivalent to
19297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
19307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * where code point order, NFD and foldCase are all optional.
19317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * String comparisons almost always yield results before processing both strings
19337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * completely.
19347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * They are generally more efficient working incrementally instead of
19357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * performing the sub-processing (strlen, normalization, case-folding)
19367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * on the entire strings first.
19377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It is also unnecessary to not normalize identical characters.
19397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function works in principle as follows:
19417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * loop {
19437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   get one code unit c1 from s1 (-1 if end of source)
19447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   get one code unit c2 from s2 (-1 if end of source)
19457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   if(either string finished) {
19477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     return result;
19487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   }
19497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   if(c1==c2) {
19507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *     continue;
19517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   }
19527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   // c1!=c2
19547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   try to decompose/case-fold c1/c2, and continue if one does;
19557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   // still c1!=c2 and neither decomposes/case-folds, return result
19577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   return c1-c2;
19587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * }
19597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * When a character decomposes, then the pointer for that source changes to
19617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the decomposition, pushing the previous pointer onto a stack.
19627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * When the end of the decomposition is reached, then the code unit reader
19637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * pops the previous source from the stack.
19647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (Same for case-folding.)
19657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is complicated further by operating on variable-width UTF-16.
19677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The top part of the loop works on code units, while lookups for decomposition
19687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and case-folding need code points.
19697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Code points are assembled after the equality/end-of-source part.
19707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The source pointer is only advanced beyond all code units when the code point
19717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * actually decomposes/case-folds.
19727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If we were on a trail surrogate unit when assembling a code point,
19747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and the code point decomposes/case-folds, then the decomposition/folding
19757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * result must be compared with the part of the other string that corresponds to
19767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this string's lead surrogate.
19777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Since we only assemble a code point when hitting a trail unit when the
19787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * preceding lead units were identical, we back up the other string by one unit
19797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in such a case.
19807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The optional code point order comparison at the end works with
19827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the same fix-up as the other code point order comparison functions.
19837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See ustring.c and the comment near the end of this function.
19847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Assumption: A decomposition or case-folding result string never contains
19867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a single surrogate. This is a safe assumption in the Unicode Standard.
19877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Therefore, we do not need to check for surrogate pairs across
19887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * decomposition/case-folding boundaries.
19897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Further assumptions (see verifications tstnorm.cpp):
19917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The API function checks for FCD first, while the core function
19927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * first case-folds and then decomposes. This requires that case-folding does not
19937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * un-FCD any strings.
19947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The API function may also NFD the input and turn off decomposition.
19967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This requires that case-folding does not un-NFD strings either.
19977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
19987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * TODO If any of the above two assumptions is violated,
19997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then this entire code must be re-thought.
20007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If this happens, then a simple solution is to case-fold both strings up front
20017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and to turn off UNORM_INPUT_IS_FCD.
20027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We already do this when not both strings are in FCD because makeFCD
20037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * would be a partial NFD before the case folding, which does not work.
20047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Note that all of this is only a problem when case-folding _and_
20057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * canonical equivalence come together.
20067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (Comments in unorm_compare() are more up to date than this TODO.)
20077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
20087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* stack element for previous-level source/decomposition pointers */
20107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class CmpEquivLevel {
20117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharSequence cs;
20127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int s;
20137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
20147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final CmpEquivLevel[] createCmpEquivLevelStack() {
20157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return new CmpEquivLevel[] {
20167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new CmpEquivLevel(), new CmpEquivLevel()
20177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
20187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
20197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
20217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Internal option for unorm_cmpEquivFold() for decomposing.
20227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If not set, just do strcasecmp().
20237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
20247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int COMPARE_EQUIV=0x80000;
20257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* internal function; package visibility for use by UTF16.StringComparator */
20277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*package*/ static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
20287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer2Impl nfcImpl;
20297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCaseProps csp;
20307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* current-level start/limit - s1/s2 as current */
20327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int s1, s2, limit1, limit2;
20337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* decomposition and case folding variables */
20357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length;
20367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* stacks of previous-level start/current/limit */
20387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CmpEquivLevel[] stack1=null, stack2=null;
20397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* buffers for algorithmic decompositions */
20417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String decomp1, decomp2;
20427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* case folding buffers, only use current-level start/limit */
20447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder fold1, fold2;
20457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* track which is the current level per string */
20477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int level1, level2;
20487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* current code units, and code points for lookups */
20507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c1, c2, cp1, cp2;
20517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* no argument error checking because this itself is not an API */
20537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
20557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
20567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * otherwise this function must behave exactly as uprv_strCompare()
20577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * not checking for that here makes testing this function easier
20587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
20597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* normalization/properties data loaded? */
20617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((options&COMPARE_EQUIV)!=0) {
20627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nfcImpl=Norm2AllModes.getNFCInstance().impl;
20637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
20647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nfcImpl=null;
20657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((options&COMPARE_IGNORE_CASE)!=0) {
20677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            csp=UCaseProps.INSTANCE;
20687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fold1=new StringBuilder();
20697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fold2=new StringBuilder();
20707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
20717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            csp=null;
20727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fold1=fold2=null;
20737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* initialize */
20767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        s1=0;
20777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit1=cs1.length();
20787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        s2=0;
20797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        limit2=cs2.length();
20807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        level1=level2=0;
20827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c1=c2=-1;
20837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* comparison loop */
20857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
20867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
20877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * here a code unit value of -1 means "get another code unit"
20887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * below it will mean "this source is finished"
20897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
20907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c1<0) {
20927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* get next code unit from string 1, post-increment */
20937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(;;) {
20947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(s1==limit1) {
20957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(level1==0) {
20967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c1=-1;
20977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
20987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
20997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
21007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c1=cs1.charAt(s1++);
21017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
21027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
21037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* reached end of level buffer, pop one level */
21057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    do {
21067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --level1;
21077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cs1=stack1[level1].cs;
21087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } while(cs1==null);
21097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    s1=stack1[level1].s;
21107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    limit1=cs1.length();
21117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c2<0) {
21157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* get next code unit from string 2, post-increment */
21167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(;;) {
21177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(s2==limit2) {
21187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(level2==0) {
21197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c2=-1;
21207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
21217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
21227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
21237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c2=cs2.charAt(s2++);
21247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
21257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
21267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* reached end of level buffer, pop one level */
21287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    do {
21297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --level2;
21307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cs2=stack2[level2].cs;
21317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } while(cs2==null);
21327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    s2=stack2[level2].s;
21337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    limit2=cs2.length();
21347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
21387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * compare c1 and c2
21397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * either variable c1, c2 is -1 only if the corresponding string is finished
21407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
21417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c1==c2) {
21427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c1<0) {
21437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0;   /* c1==c2==-1 indicating end of strings */
21447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c1=c2=-1;       /* make us fetch new code units */
21467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
21477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(c1<0) {
21487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return -1;      /* string 1 ends before string 2 */
21497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(c2<0) {
21507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 1;       /* string 2 ends before string 1 */
21517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* c1!=c2 && c1>=0 && c2>=0 */
21537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* get complete code points for c1, c2 for lookups if either is a surrogate */
21557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cp1=c1;
21567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(UTF16.isSurrogate((char)c1)) {
21577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c;
21587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
21607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(s1!=limit1 && Character.isLowSurrogate(c=cs1.charAt(s1))) {
21617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* advance ++s1; only below if cp1 decomposes/case-folds */
21627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cp1=Character.toCodePoint((char)c1, c);
21637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
21647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else /* isTrail(c1) */ {
21657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(0<=(s1-2) && Character.isHighSurrogate(c=cs1.charAt(s1-2))) {
21667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cp1=Character.toCodePoint(c, (char)c1);
21677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
21687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cp2=c2;
21727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(UTF16.isSurrogate((char)c2)) {
21737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c;
21747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
21767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(s2!=limit2 && Character.isLowSurrogate(c=cs2.charAt(s2))) {
21777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* advance ++s2; only below if cp2 decomposes/case-folds */
21787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cp2=Character.toCodePoint((char)c2, c);
21797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
21807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else /* isTrail(c2) */ {
21817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(0<=(s2-2) && Character.isHighSurrogate(c=cs2.charAt(s2-2))) {
21827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cp2=Character.toCodePoint(c, (char)c2);
21837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
21847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
21887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * go down one level for each string
21897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * continue with the main loop as soon as there is a real change
21907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
21917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( level1==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
21937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (length=csp.toFullFolding(cp1, fold1, options))>=0
21947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ) {
21957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* cp1 case-folds to the code point "length" or to p[length] */
21967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(UTF16.isSurrogate((char)c1)) {
21977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
21987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* advance beyond source surrogate pair if it case-folds */
21997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++s1;
22007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* isTrail(c1) */ {
22017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /*
22027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * we got a supplementary code point when hitting its trail surrogate,
22037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * therefore the lead surrogate must have been the same as in the other string;
22047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * compare this decomposition with the lead surrogate in the other string
22057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * remember that this simulates bulk text replacement:
22067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * the decomposition would replace the entire code point
22077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         */
22087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --s2;
22097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c2=cs2.charAt(s2-1);
22107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
22117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
22127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* push current level pointers */
22147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(stack1==null) {
22157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stack1=createCmpEquivLevelStack();
22167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
22177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack1[0].cs=cs1;
22187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack1[0].s=s1;
22197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++level1;
22207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* copy the folding result to fold1[] */
22227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* Java: the buffer was probably not empty, remove the old contents */
22237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(length<=UCaseProps.MAX_STRING_LENGTH) {
22247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fold1.delete(0, fold1.length()-length);
22257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
22267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fold1.setLength(0);
22277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fold1.appendCodePoint(length);
22287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
22297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* set next level pointers to case folding */
22317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cs1=fold1;
22327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s1=0;
22337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit1=fold1.length();
22347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* get ready to read from decomposition, continue with loop */
22367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c1=-1;
22377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
22387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
22397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( level2==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
22417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (length=csp.toFullFolding(cp2, fold2, options))>=0
22427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ) {
22437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* cp2 case-folds to the code point "length" or to p[length] */
22447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(UTF16.isSurrogate((char)c2)) {
22457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
22467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* advance beyond source surrogate pair if it case-folds */
22477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++s2;
22487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* isTrail(c2) */ {
22497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /*
22507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * we got a supplementary code point when hitting its trail surrogate,
22517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * therefore the lead surrogate must have been the same as in the other string;
22527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * compare this decomposition with the lead surrogate in the other string
22537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * remember that this simulates bulk text replacement:
22547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * the decomposition would replace the entire code point
22557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         */
22567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --s1;
22577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c1=cs1.charAt(s1-1);
22587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
22597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
22607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* push current level pointers */
22627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(stack2==null) {
22637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stack2=createCmpEquivLevelStack();
22647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
22657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack2[0].cs=cs2;
22667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack2[0].s=s2;
22677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++level2;
22687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* copy the folding result to fold2[] */
22707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* Java: the buffer was probably not empty, remove the old contents */
22717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(length<=UCaseProps.MAX_STRING_LENGTH) {
22727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fold2.delete(0, fold2.length()-length);
22737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
22747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fold2.setLength(0);
22757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fold2.appendCodePoint(length);
22767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
22777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* set next level pointers to case folding */
22797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cs2=fold2;
22807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s2=0;
22817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit2=fold2.length();
22827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* get ready to read from decomposition, continue with loop */
22847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c2=-1;
22857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
22867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
22877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( level1<2 && (options&COMPARE_EQUIV)!=0 &&
22897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (decomp1=nfcImpl.getDecomposition(cp1))!=null
22907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ) {
22917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* cp1 decomposes into p[length] */
22927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(UTF16.isSurrogate((char)c1)) {
22937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
22947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* advance beyond source surrogate pair if it decomposes */
22957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++s1;
22967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* isTrail(c1) */ {
22977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /*
22987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * we got a supplementary code point when hitting its trail surrogate,
22997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * therefore the lead surrogate must have been the same as in the other string;
23007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * compare this decomposition with the lead surrogate in the other string
23017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * remember that this simulates bulk text replacement:
23027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * the decomposition would replace the entire code point
23037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         */
23047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --s2;
23057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c2=cs2.charAt(s2-1);
23067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
23077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
23087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* push current level pointers */
23107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(stack1==null) {
23117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stack1=createCmpEquivLevelStack();
23127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
23137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack1[level1].cs=cs1;
23147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack1[level1].s=s1;
23157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++level1;
23167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* set empty intermediate level if skipped */
23187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(level1<2) {
23197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stack1[level1++].cs=null;
23207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
23217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* set next level pointers to decomposition */
23237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cs1=decomp1;
23247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s1=0;
23257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit1=decomp1.length();
23267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* get ready to read from decomposition, continue with loop */
23287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c1=-1;
23297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
23307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
23317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( level2<2 && (options&COMPARE_EQUIV)!=0 &&
23337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (decomp2=nfcImpl.getDecomposition(cp2))!=null
23347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ) {
23357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* cp2 decomposes into p[length] */
23367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(UTF16.isSurrogate((char)c2)) {
23377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
23387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /* advance beyond source surrogate pair if it decomposes */
23397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++s2;
23407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* isTrail(c2) */ {
23417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        /*
23427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * we got a supplementary code point when hitting its trail surrogate,
23437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * therefore the lead surrogate must have been the same as in the other string;
23447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * compare this decomposition with the lead surrogate in the other string
23457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * remember that this simulates bulk text replacement:
23467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         * the decomposition would replace the entire code point
23477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         */
23487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --s1;
23497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c1=cs1.charAt(s1-1);
23507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
23517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
23527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* push current level pointers */
23547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(stack2==null) {
23557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stack2=createCmpEquivLevelStack();
23567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
23577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack2[level2].cs=cs2;
23587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stack2[level2].s=s2;
23597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++level2;
23607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* set empty intermediate level if skipped */
23627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(level2<2) {
23637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stack2[level2++].cs=null;
23647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
23657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* set next level pointers to decomposition */
23677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cs2=decomp2;
23687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s2=0;
23697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit2=decomp2.length();
23707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* get ready to read from decomposition, continue with loop */
23727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c2=-1;
23737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
23747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
23757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
23777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * no decomposition/case folding, max level for both sides:
23787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * return difference result
23797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
23807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * code point order comparison must not just return cp1-cp2
23817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * because when single surrogates are present then the surrogate pairs
23827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * that formed cp1 and cp2 may be from different string indexes
23837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
23847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
23857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * c1=d800 cp1=10001 c2=dc00 cp2=10000
23867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
23877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
23887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * therefore, use same fix-up as in ustring.c/uprv_strCompare()
23897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
23907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * so we have slightly different pointer/start/limit comparisons here
23917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
23927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
23937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c1>=0xd800 && c2>=0xd800 && (options&COMPARE_CODE_POINT_ORDER)!=0) {
23947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
23957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(
23967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (c1<=0xdbff && s1!=limit1 && Character.isLowSurrogate(cs1.charAt(s1))) ||
23977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (Character.isLowSurrogate((char)c1) && 0!=(s1-1) && Character.isHighSurrogate(cs1.charAt(s1-2)))
23987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
23997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* part of a surrogate pair, leave >=d800 */
24007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
24017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* BMP code point - may be surrogate code point - make <d800 */
24027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c1-=0x2800;
24037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
24047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
24057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(
24067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (c2<=0xdbff && s2!=limit2 && Character.isLowSurrogate(cs2.charAt(s2))) ||
24077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (Character.isLowSurrogate((char)c2) && 0!=(s2-1) && Character.isHighSurrogate(cs2.charAt(s2-2)))
24087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
24097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* part of a surrogate pair, leave >=d800 */
24107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
24117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* BMP code point - may be surrogate code point - make <d800 */
24127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c2-=0x2800;
24137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
24147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
24157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
24167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c1-c2;
24177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
24187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
24197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
24207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
24217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * An Appendable that writes into a char array with a capacity that may be
24227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * less than array.length.
24237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (By contrast, CharBuffer will write beyond destLimit all the way up to array.length.)
24247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
24257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * An overflow is only reported at the end, for the old Normalizer API functions that write
24267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to char arrays.
24277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
24287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class CharsAppendable implements Appendable {
24297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsAppendable(char[] dest, int destStart, int destLimit) {
24307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            chars=dest;
24317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            start=offset=destStart;
24327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limit=destLimit;
24337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
24347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int length() {
24357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int len=offset-start;
24367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(offset<=limit) {
24377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return len;
24387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
24397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IndexOutOfBoundsException(Integer.toString(len));
24407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
24417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
24427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public Appendable append(char c) {
24437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(offset<limit) {
24447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                chars[offset]=c;
24457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
24467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++offset;
24477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
24487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
24497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public Appendable append(CharSequence s) {
24507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return append(s, 0, s.length());
24517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
24527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public Appendable append(CharSequence s, int sStart, int sLimit) {
24537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int len=sLimit-sStart;
24547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(len<=(limit-offset)) {
24557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while(sStart<sLimit) {  // TODO: Is there a better way to copy the characters?
24567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    chars[offset++]=s.charAt(sStart++);
24577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
24587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
24597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offset+=len;
24607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
24617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
24627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
24637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
24647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final char[] chars;
24657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final int start, limit;
24667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int offset;
24677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
24687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
2469