17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
3f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * Copyright (C) 1996-2015, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.lang.ref.SoftReference;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.StringCharacterIterator;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.MissingResourceException;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUDebug;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUCloneNotSupportedException;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@icuenhanced java.text.BreakIterator}.{@icu _usage_}
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>A class that locates boundaries in text.  This class defines a protocol for
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * objects that break up a piece of natural-language text according to a set
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of criteria.  Instances or subclasses of BreakIterator can be provided, for
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * example, to break a piece of text into words, sentences, or logical characters
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * according to the conventions of some language or group of languages.
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * We provide five built-in types of BreakIterator:
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul><li>getTitleInstance() returns a BreakIterator that locates boundaries
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between title breaks.
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>getSentenceInstance() returns a BreakIterator that locates boundaries
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between sentences.  This is useful for triple-click selection, for example.
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>getWordInstance() returns a BreakIterator that locates boundaries between
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * words.  This is useful for double-click selection or "find whole words" searches.
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This type of BreakIterator makes sure there is a boundary position at the
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * beginning and end of each legal word.  (Numbers count as words, too.)  Whitespace
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and punctuation are kept separate from real words.
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>getLineInstance() returns a BreakIterator that locates positions where it is
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * legal for a text editor to wrap lines.  This is similar to word breaking, but
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * not the same: punctuation and whitespace are generally kept with words (you don't
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * want a line to start with whitespace, for example), and some special characters
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * can force a position to be considered a line-break position or prevent a position
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from being a line-break position.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>getCharacterInstance() returns a BreakIterator that locates boundaries between
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * logical characters.  Because of the structure of the Unicode encoding, a logical
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * character may be stored internally as more than one Unicode code point.  (A with an
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * umlaut may be stored as an a followed by a separate combining umlaut character,
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for example, but the user still thinks of it as one character.)  This iterator allows
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * various processes (especially text editors) to treat as characters the units of text
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that a user would think of as characters, rather than the units of text that the
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * computer sees as "characters".</ul>
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The text boundary positions are found according to the rules
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * described in Unicode Standard Annex #29, Text Boundaries, and
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unicode Standard Annex #14, Line Breaking Properties.  These
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are available at http://www.unicode.org/reports/tr14/ and
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * http://www.unicode.org/reports/tr29/.
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * BreakIterator's interface follows an "iterator" model (hence the name), meaning it
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * has a concept of a "current position" and methods like first(), last(), next(),
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and previous() that update the current position.  All BreakIterators uphold the
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * following invariants:
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul><li>The beginning and end of the text are always treated as boundary positions.
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>The current position of the iterator is always a boundary position (random-
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * access methods move the iterator to the nearest boundary position before or
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * after the specified position, not _to_ the specified position).
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>DONE is used as a flag to indicate when iteration has stopped.  DONE is only
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * returned when the current position is the end of the text and the user calls next(),
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or when the current position is the beginning of the text and the user calls
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * previous().
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>Break positions are numbered by the positions of the characters that follow
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * them.  Thus, under normal circumstances, the position before the first character
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is 0, the position after the first character is 1, and the position after the
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * last character is 1 plus the length of the string.
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>The client can change the position of an iterator, or the text it analyzes,
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * at will, but cannot change the behavior.  If the user wants different behavior, he
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * must instantiate a new iterator.</ul>
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * it possible to use BreakIterator to analyze text in any text-storage vehicle that
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * provides a CharacterIterator interface.
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Note:</b>  Some types of BreakIterator can take a long time to create, and
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * instances of BreakIterator are not currently cached by the system.  For
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * optimal performance, keep instances of BreakIterator around as long as makes
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * sense.  For example, when word-wrapping a document, don't create and destroy a
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * new BreakIterator for each line.  Create one break iterator for the whole document
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (or whatever stretch of text you're wrapping) and use it to do the whole job of
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * wrapping the text.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert  * <P>
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <strong>Examples</strong>:<P>
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Creating and using text boundaries
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static void main(String args[]) {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      if (args.length == 1) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          String stringToExamine = args[0];
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          //print each word in order
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          BreakIterator boundary = BreakIterator.getWordInstance();
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          boundary.setText(stringToExamine);
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          printEachForward(boundary, stringToExamine);
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          //print each sentence in reverse order
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          boundary = BreakIterator.getSentenceInstance(Locale.US);
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          boundary.setText(stringToExamine);
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          printEachBackward(boundary, stringToExamine);
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          printFirst(boundary, stringToExamine);
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          printLast(boundary, stringToExamine);
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *      }
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Print each element in order
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static void printEachForward(BreakIterator boundary, String source) {
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int start = boundary.first();
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     for (int end = boundary.next();
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          end != BreakIterator.DONE;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          start = end, end = boundary.next()) {
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          System.out.println(source.substring(start,end));
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     }
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Print each element in reverse order
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static void printEachBackward(BreakIterator boundary, String source) {
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int end = boundary.last();
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     for (int start = boundary.previous();
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          start != BreakIterator.DONE;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *          end = start, start = boundary.previous()) {
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         System.out.println(source.substring(start,end));
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Print first element
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static void printFirst(BreakIterator boundary, String source) {
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int start = boundary.first();
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int end = boundary.next();
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     System.out.println(source.substring(start,end));
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Print last element
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static void printLast(BreakIterator boundary, String source) {
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int end = boundary.last();
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int start = boundary.previous();
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     System.out.println(source.substring(start,end));
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Print the element at a specified position
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static void printAt(BreakIterator boundary, int pos, String source) {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int end = boundary.following(pos);
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int start = boundary.previous();
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     System.out.println(source.substring(start,end));
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Find the next word
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote>
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre>
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public static int nextWordStartAfter(int pos, String text) {
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     BreakIterator wb = BreakIterator.getWordInstance();
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     wb.setText(text);
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int last = wb.following(pos);
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     int current = wb.next();
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     while (current != BreakIterator.DONE) {
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         for (int p = last; p < current; p++) {
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *             if (Character.isLetter(text.charAt(p)))
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *                 return last;
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         last = current;
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         current = wb.next();
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     }
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     return BreakIterator.DONE;
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre>
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (The iterator returned by BreakIterator.getWordInstance() is unique in that
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the break positions it returns don't represent both the start and end of the
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * thing being iterated over.  That is, a sentence-break iterator returns breaks
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that each represent the end of one sentence and the beginning of the next.
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * With the word-break iterator, the characters between two boundaries might be a
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * word, or they might be the punctuation or whitespace between two words.  The
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * above code uses a simple heuristic to determine which boundary is the beginning
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of a word: If the characters between this boundary and the next boundary
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a Hangul syllable, a Kana character, etc.), then the text between this boundary
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and the next is a word; otherwise, it's the material between words.)
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </blockquote>
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see CharacterIterator
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class BreakIterator implements Cloneable
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean DEBUG = ICUDebug.enabled("breakiterator");
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Default constructor.  There is no state that is carried by this abstract
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * base class.
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected BreakIterator()
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Clone method.  Creates another BreakIterator with the same behavior and
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * current state as this one.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The clone.
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Object clone()
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return super.clone();
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        catch (CloneNotSupportedException e) {
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:OFF
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUCloneNotSupportedException(e);
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:ON
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * DONE is returned by previous() and next() after all valid
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries have been returned.
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int DONE = -1;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the iterator to the first boundary position.  This is always the beginning
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * index of the text this iterator iterates over.  For example, if
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iterator iterates over a whole string, this function will
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * always return 0.
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character offset of the beginning of the stretch of text
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * being broken.
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int first();
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the iterator to the last boundary position.  This is always the "past-the-end"
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * index of the text this iterator iterates over.  For example, if the
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iterator iterates over a whole string (call it "text"), this function
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * will always return text.length().
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character offset of the end of the stretch of text
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * being broken.
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int last();
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Move the iterator by the specified number of steps in the text.
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A positive number moves the iterator forward; a negative number
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * moves the iterator backwards. If this causes the iterator
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to move off either end of the text, this function returns DONE;
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * otherwise, this function returns the position of the appropriate
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundary.  Calling this function is equivalent to calling next() or
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * previous() n times.
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param n The number of boundaries to advance over (if positive, moves
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * forward; if negative, moves backwards).
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the boundary n boundaries from the current
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration position, or DONE if moving n boundaries causes the iterator
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to advance off either end of the text.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int next(int n);
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Advances the iterator forward one boundary.  The current iteration
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position is updated to point to the next boundary position after the
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * current position, and this is also the value that is returned.  If
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the current position is equal to the value returned by last(), or to
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * DONE, this function returns DONE and sets the current position to
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * DONE.
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the first boundary position following the
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration position.
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int next();
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Move the iterator backward one boundary.  The current iteration
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position is updated to point to the last boundary position before
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the current position, and this is also the value that is returned.  If
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the current position is equal to the value returned by first(), or to
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * DONE, this function returns DONE and sets the current position to
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * DONE.
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the last boundary position preceding the
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration position.
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int previous();
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the iterator's current iteration position to be the first
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundary position following the specified position.  (Whether the
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * specified position is itself a boundary position or not doesn't
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * matter-- this function always moves the iteration position to the
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * first boundary after the specified position.)  If the specified
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position is the past-the-end position, returns DONE.
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset The character position to start searching from.
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the first boundary position following
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * "offset" (whether or not "offset" itself is a boundary position),
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or DONE if "offset" is the past-the-end offset.
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int following(int offset);
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the iterator's current iteration position to be the last
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundary position preceding the specified position.  (Whether the
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * specified position is itself a boundary position or not doesn't
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * matter-- this function always moves the iteration position to the
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * last boundary before the specified position.)  If the specified
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position is the starting position, returns DONE.
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset The character position to start searching from.
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the last boundary position preceding
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * "offset" (whether of not "offset" itself is a boundary position),
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or DONE if "offset" is the starting offset of the iterator.
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int preceding(int offset) {
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // NOTE:  This implementation is here solely because we can't add new
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // abstract methods to an existing class.  There is almost ALWAYS a
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // better, faster way to do this.
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int pos = following(offset);
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (pos >= offset && pos != DONE)
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            pos = previous();
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pos;
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return true if the specified position is a boundary position.  If the
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * function returns true, the current iteration position is set to the
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * specified position; if the function returns false, the current
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration position is set as though following() had been called.
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset the offset to check.
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return True if "offset" is a boundary position.
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isBoundary(int offset) {
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Again, this is the default implementation, which is provided solely because
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we couldn't add a new abstract method to an existing class.  The real
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // implementations will usually need to do a little more work.
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset == 0) {
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return following(offset - 1) == offset;
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the iterator's current position.
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The iterator's current position.
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int current();
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for "words" that do not fit into any of other categories.
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Includes spaces and most punctuation.
385f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_NONE           = 0;
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Upper bound for tags for uncategorized words.
391f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_NONE_LIMIT     = 100;
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words that appear to be numbers, lower limit.
397f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_NUMBER         = 100;
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words that appear to be numbers, upper limit.
403f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_NUMBER_LIMIT   = 200;
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words that contain letters, excluding
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * hiragana, katakana or ideographic characters, lower limit.
410f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_LETTER         = 200;
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words containing letters, upper limit
416f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_LETTER_LIMIT   = 300;
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words containing kana characters, lower limit
422f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_KANA           = 300;
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words containing kana characters, upper limit
428f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_KANA_LIMIT     = 400;
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words containing ideographic characters, lower limit
434f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_IDEO           = 400;
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag value for words containing ideographic characters, upper limit
440f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int WORD_IDEO_LIMIT     = 500;
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For RuleBasedBreakIterators, return the status tag from the
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * break rule that determined the most recently
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned break position.
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For break iterator types that do not support a rule status,
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a default value of 0 is returned.
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The status from the break rule that determined the most recently
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         returned break position.
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 52
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int  getRuleStatus() {
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0;
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that determined the most recently returned break position.
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For break iterator types that do not support rule status,
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * no values are returned.
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the size  of the output array is insufficient to hold the data,
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  the output will be truncated to the available length.  No exception
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  will be thrown.
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param fillInArray an array to be filled in with the status values.
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return          The number of rule status values from rules that determined
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  the most recent boundary returned by the break iterator.
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  In the event that the array is too small, the return value
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  is the total number of status values that were available,
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  not the reduced number that were actually returned.
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 52
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getRuleStatusVec(int[] fillInArray) {
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fillInArray != null && fillInArray.length > 0) {
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fillInArray[0] = 0;
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 1;
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a CharacterIterator over the text being analyzed.
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For at least some subclasses of BreakIterator, this is a reference
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to the <b>actual iterator being used</b> by the BreakIterator,
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and therefore, this function's return value should be treated as
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>const</tt>.  No guarantees are made about the current position
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of this iterator when it is returned.  If you need to move that
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position to examine the text, clone this function's return value first.
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A CharacterIterator over the text being analyzed.
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract CharacterIterator getText();
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the iterator to analyze a new piece of text.  The new
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * piece of text is passed in as a String, and the current
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iteration position is reset to the beginning of the string.
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (The old text is dropped.)
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText A String containing the text to analyze with
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this BreakIterator.
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(String newText)
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setText(new StringCharacterIterator(newText));
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the iterator to analyze a new piece of text.  The
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * BreakIterator is passed a CharacterIterator through which
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * it will access the text itself.  The current iteration
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position is reset to the CharacterIterator's start index.
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (The old iterator is dropped.)
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText A CharacterIterator referring to the text
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to analyze with this BreakIterator (the iterator's current
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position is ignored, but its other state is significant).
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract void setText(CharacterIterator newText);
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu}
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int KIND_CHARACTER = 0;
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu}
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int KIND_WORD = 1;
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu}
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int KIND_LINE = 2;
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu}
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int KIND_SENTENCE = 3;
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu}
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int KIND_TITLE = 4;
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @since ICU 2.8
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int KIND_COUNT = 5;
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final SoftReference<?>[] iterCache = new SoftReference<?>[5];
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates word boundaries.
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function assumes that the text being analyzed is in the default
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * locale's language.
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An instance of BreakIterator that locates word boundaries.
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getWordInstance()
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getWordInstance(ULocale.getDefault());
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates word boundaries.
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A locale specifying the language of the text to be
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * analyzed.
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An instance of BreakIterator that locates word boundaries.
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getWordInstance(Locale where)
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(ULocale.forLocale(where), KIND_WORD);
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates word boundaries.
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A locale specifying the language of the text to be
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * analyzed.
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An instance of BreakIterator that locates word boundaries.
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getWordInstance(ULocale where)
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(where, KIND_WORD);
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates legal line-
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * wrapping positions.  This function assumes the text being broken
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * is in the default locale's language.
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates legal
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * line-wrapping positions.
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getLineInstance()
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getLineInstance(ULocale.getDefault());
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates legal line-
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * wrapping positions.
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being broken.
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates legal
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * line-wrapping positions.
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getLineInstance(Locale where)
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(ULocale.forLocale(where), KIND_LINE);
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates legal line-
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * wrapping positions.
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being broken.
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates legal
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * line-wrapping positions.
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getLineInstance(ULocale where)
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(where, KIND_LINE);
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates logical-character
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries.  This function assumes that the text being analyzed is
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in the default locale's language.
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates logical-character
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries.
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getCharacterInstance()
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getCharacterInstance(ULocale.getDefault());
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates logical-character
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries.
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being analyzed.
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates logical-character
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries.
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getCharacterInstance(Locale where)
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(ULocale.forLocale(where), KIND_CHARACTER);
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates logical-character
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries.
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being analyzed.
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates logical-character
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * boundaries.
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getCharacterInstance(ULocale where)
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(where, KIND_CHARACTER);
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates sentence boundaries.
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function assumes the text being analyzed is in the default locale's
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * language.
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates sentence boundaries.
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getSentenceInstance()
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getSentenceInstance(ULocale.getDefault());
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a new instance of BreakIterator that locates sentence boundaries.
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being analyzed.
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates sentence boundaries.
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getSentenceInstance(Locale where)
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(ULocale.forLocale(where), KIND_SENTENCE);
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates sentence boundaries.
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being analyzed.
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates sentence boundaries.
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getSentenceInstance(ULocale where)
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(where, KIND_SENTENCE);
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function assumes the text being analyzed is in the default locale's
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * language. The iterator returned locates title boundaries as described for
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * please use a word boundary iterator. {@link #getWordInstance}
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates title boundaries.
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getTitleInstance()
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getTitleInstance(ULocale.getDefault());
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator returned locates title boundaries as described for
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * please use Word Boundary iterator.{@link #getWordInstance}
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being analyzed.
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates title boundaries.
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getTitleInstance(Locale where)
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(ULocale.forLocale(where), KIND_TITLE);
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator returned locates title boundaries as described for
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * please use Word Boundary iterator.{@link #getWordInstance}
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param where A Locale specifying the language of the text being analyzed.
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A new instance of BreakIterator that locates title boundaries.
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws NullPointerException if <code>where</code> is null.
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Rouberts     */
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getTitleInstance(ULocale where)
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getBreakInstance(where, KIND_TITLE);
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Registers a new break iterator of the indicated kind, to use in the given
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * locale.  Clones of the iterator will be returned if a request for a break iterator
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the given kind matches or falls back to this locale.
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Because ICU may choose to cache BreakIterator objects internally, this must
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * be called at application startup, prior to any calls to
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * BreakIterator.getInstance to avoid undefined behavior.
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param iter the BreakIterator instance to adopt.
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale the Locale for which this instance is to be registered
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param kind the type of iterator for which this instance is to be registered
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a registry key that can be used to unregister this instance
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static Object registerInstance(BreakIterator iter, Locale locale, int kind) {
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return registerInstance(iter, ULocale.forLocale(locale), kind);
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Registers a new break iterator of the indicated kind, to use in the given
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * locale.  Clones of the iterator will be returned if a request for a break iterator
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the given kind matches or falls back to this locale.
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Because ICU may choose to cache BreakIterator objects internally, this must
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * be called at application startup, prior to any calls to
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * BreakIterator.getInstance to avoid undefined behavior.
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param iter the BreakIterator instance to adopt.
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param locale the Locale for which this instance is to be registered
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param kind the type of iterator for which this instance is to be registered
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a registry key that can be used to unregister this instance
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 3.2
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) {
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If the registered object matches the one in the cache, then
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // flush the cached object.
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (iterCache[kind] != null) {
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind].get();
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cache != null) {
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (cache.getLocale().equals(locale)) {
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    iterCache[kind] = null;
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getShim().registerInstance(iter, locale, kind);
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Unregisters a previously-registered BreakIterator using the key returned
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * from the register call.  Key becomes invalid after this call and should not be used
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * again.
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param key the registry key returned by a previous call to registerInstance
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if the iterator for the key was successfully unregistered
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static boolean unregister(Object key) {
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (key == null) {
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("registry key must not be null");
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: we don't do code coverage for the following lines
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // because in getBreakInstance we always instantiate the shim,
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and test execution is such that we always instantiate a
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // breakiterator before we get to the break iterator tests.
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // this is for modularization, and we could remove the
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // dependencies in getBreakInstance by rewriting part of the
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // LocaleData code, or perhaps by accepting it into the
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // module.
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ///CLOVER:OFF
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (shim != null) {
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Unfortunately, we don't know what is being unregistered
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // -- what `kind' and what locale -- so we flush all
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // caches.  This is safe but inefficient if people are
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // actively registering and unregistering.
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int kind=0; kind<KIND_COUNT; ++kind) {
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                iterCache[kind] = null;
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return shim.unregister(key);
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false;
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ///CLOVER:ON
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // end of registration
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a particular kind of BreakIterator for a locale.
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Avoids writing a switch statement with getXYZInstance(where) calls.
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static BreakIterator getBreakInstance(ULocale where, int kind) {
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (where == null) {
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new NullPointerException("Specified locale is null");
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (iterCache[kind] != null) {
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BreakIteratorCache cache = (BreakIteratorCache)iterCache[kind].get();
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cache != null) {
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (cache.getLocale().equals(where)) {
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return cache.createBreakInstance();
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // sigh, all to avoid linking in ICULocaleData...
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator result = getShim().createBreakIterator(where, kind);
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIteratorCache cache = new BreakIteratorCache(where, result);
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iterCache[kind] = new SoftReference<BreakIteratorCache>(cache);
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (result instanceof RuleBasedBreakIterator) {
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator)result;
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            rbbi.setBreakType(kind);
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a list of locales for which BreakIterators can be used.
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An array of Locales.  All of the locales in the array can
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * be used when creating a BreakIterator.
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.6
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static synchronized Locale[] getAvailableLocales()
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // to avoid linking ICULocaleData
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getShim().getAvailableLocales();
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns a list of locales for which BreakIterators can be used.
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An array of Locales.  All of the locales in the array can
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * be used when creating a BreakIterator.
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 3.2 (retain)
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This API might change or be removed in a future release.
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static synchronized ULocale[] getAvailableULocales()
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // to avoid linking ICULocaleData
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getShim().getAvailableULocales();
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class BreakIteratorCache {
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private BreakIterator iter;
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private ULocale where;
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIteratorCache(ULocale where, BreakIterator iter) {
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.where = where;
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.iter = (BreakIterator) iter.clone();
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale getLocale() {
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return where;
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator createBreakInstance() {
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (BreakIterator) iter.clone();
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static abstract class BreakIteratorServiceShim {
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract Object registerInstance(BreakIterator iter, ULocale l, int k);
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract boolean unregister(Object key);
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract Locale[] getAvailableLocales();
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract ULocale[] getAvailableULocales();
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract BreakIterator createBreakIterator(ULocale l, int k);
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static BreakIteratorServiceShim shim;
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static BreakIteratorServiceShim getShim() {
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: this instantiation is safe on loose-memory-model configurations
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // despite lack of synchronization, since the shim instance has no state--
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // it's all in the class init.  The worst problem is we might instantiate
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // two shim instances, but they'll share the same state so that's ok.
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (shim == null) {
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Class<?> cls = Class.forName("com.ibm.icu.text.BreakIteratorFactory");
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                shim = (BreakIteratorServiceShim)cls.newInstance();
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            catch (MissingResourceException e)
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            {
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw e;
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            catch (Exception e) {
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ///CLOVER:OFF
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(DEBUG){
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    e.printStackTrace();
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new RuntimeException(e.getMessage());
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ///CLOVER:ON
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return shim;
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // -------- BEGIN ULocale boilerplate --------
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@icu} Returns the locale that was used to create this object, or null.
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This may may differ from the locale requested at the time of
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this object's creation.  For example, if an object is created
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * drawn from <tt>en</tt> (the <i>actual</i> locale), and
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>en_US</tt> may be the most specific locale that exists (the
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <i>valid</i> locale).
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>Note: The <i>actual</i> locale is returned correctly, but the <i>valid</i>
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * locale is not, in most cases.
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param type type of information requested, either {@link
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the information specified by <i>type</i>, or null if
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this object was not constructed from locale data.
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale#VALID_LOCALE
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 2.8 (retain)
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This API might change or be removed in a future release.
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final ULocale getLocale(ULocale.Type type) {
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return type == ULocale.ACTUAL_LOCALE ?
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.actualLocale : this.validLocale;
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set information about the locales that were used to create this
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * object.  If the object was not constructed from locale data,
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * both arguments should be set to null.  Otherwise, neither
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * should be null.  The actual locale must be at the same level or
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * less specific than the valid locale.  This method is intended
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * for use by factories or other entities that create objects of
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * this class.
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param valid the most specific locale containing any resource
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * data, or null
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param actual the locale containing data used to construct this
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * object, or null
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale#VALID_LOCALE
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    final void setLocale(ULocale valid, ULocale actual) {
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Change the following to an assertion later
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((valid == null) != (actual == null)) {
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:OFF
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:ON
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Another check we could do is that the actual locale is at
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the same level or less specific than the valid locale.
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.validLocale = valid;
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.actualLocale = actual;
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The most specific locale containing any resource data, or null.
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private ULocale validLocale;
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The locale containing data used to construct this object, or
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * null.
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.util.ULocale
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private ULocale actualLocale;
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // -------- END ULocale boilerplate --------
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
1032