151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/*
22c87ad3a45cecf9e344487cad1abfdebe79f2c7cNarayan Kamath * Copyright (C) 2014 The Android Open Source Project
36e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This code is free software; you can redistribute it and/or modify it
751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * under the terms of the GNU General Public License version 2 only, as
851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * published by the Free Software Foundation.  Oracle designates this
951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * particular file as subject to the "Classpath" exception as provided
1051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * by Oracle in the LICENSE file that accompanied this code.
1151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This code is distributed in the hope that it will be useful, but WITHOUT
1351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * version 2 for more details (a copy is included in the LICENSE file that
1651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * accompanied this code).
1751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * You should have received a copy of the GNU General Public License version
1951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * 2 along with this work; if not, write to the Free Software Foundation,
2051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
2251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * or visit www.oracle.com if you need additional information or have any
2451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * questions.
2551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
2651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/*
2851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
2951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
3051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
3151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * The original version of this source code and documentation
3251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * is copyrighted and owned by Taligent, Inc., a wholly-owned
3351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * subsidiary of IBM. These materials are provided under terms
3451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * of a License Agreement between Taligent and Sun. This technology
3551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * is protected by multiple US and International patents.
3651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
3751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This notice and attribution to Taligent may not be removed.
3851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Taligent is a registered trademark of Taligent, Inc.
3951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
4051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
4151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskipackage java.text;
4351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.util.Locale;
4551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47e10f71e903ded0b7418bf32fc28c6b43962ecf3cVictor Chang// Android-changed: Discourage modification on CharacterIterator after setText. http://b/80456574
4851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/**
4951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * The <code>BreakIterator</code> class implements methods for finding
5051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the location of boundaries in text. Instances of <code>BreakIterator</code>
5151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * maintain a current position and scan over text
5251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * returning the index of characters where boundaries occur.
5351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Internally, <code>BreakIterator</code> scans text using a
5451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <code>CharacterIterator</code>, and is thus able to scan text held
5551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * by any object implementing that protocol. A <code>StringCharacterIterator</code>
5651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * is used to scan <code>String</code> objects passed to <code>setText</code>.
57e10f71e903ded0b7418bf32fc28c6b43962ecf3cVictor Chang * The <code>CharacterIterator</code> object must not be modified after having been
58e10f71e903ded0b7418bf32fc28c6b43962ecf3cVictor Chang * passed to <code>setText</code>. If the text in the <code>CharacterIterator</code> object
59e10f71e903ded0b7418bf32fc28c6b43962ecf3cVictor Chang * is changed, the caller must reset <code>BreakIterator</code> by calling
60e10f71e903ded0b7418bf32fc28c6b43962ecf3cVictor Chang * <code>setText</code>.
6151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p>
6351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * You use the factory methods provided by this class to create
6451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * instances of various types of break iterators. In particular,
6551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * use <code>getWordInstance</code>, <code>getLineInstance</code>,
6651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <code>getSentenceInstance</code>, and <code>getCharacterInstance</code>
6751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * to create <code>BreakIterator</code>s that perform
6851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * word, line, sentence, and character boundary analysis respectively.
6951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * A single <code>BreakIterator</code> can work only on one unit
7051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * (word, line, sentence, and so on). You must use a different iterator
7151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * for each unit boundary analysis you wish to perform.
7251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
7351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p><a name="line"></a>
7451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Line boundary analysis determines where a text string can be
7551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * broken when line-wrapping. The mechanism correctly handles
7651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * punctuation and hyphenated words. Actual line breaking needs
7751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * to also consider the available line width and is handled by
7851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * higher-level software.
7951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
8051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p><a name="sentence"></a>
8151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Sentence boundary analysis allows selection with correct interpretation
8251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * of periods within numbers and abbreviations, and trailing punctuation
8351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * marks such as quotation marks and parentheses.
8451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
8551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p><a name="word"></a>
8651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Word boundary analysis is used by search and replace functions, as
8751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * well as within text editing applications that allow the user to
8851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * select words with a double click. Word selection provides correct
8951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * interpretation of punctuation marks within and following
9051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * words. Characters that are not part of a word, such as symbols
9151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * or punctuation marks, have word-breaks on both sides.
9251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
9351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p><a name="character"></a>
9451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Character boundary analysis allows users to interact with characters
9551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * as they expect to, for example, when moving the cursor through a text
9651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * string. Character boundary analysis provides correct navigation
9751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * through character strings, regardless of how the character is stored.
9851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * The boundaries returned may be those of supplementary characters,
9951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * combining character sequences, or ligature clusters.
10051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * For example, an accented character might be stored as a base character
10151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * and a diacritical mark. What users consider to be a character can
10251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * differ between languages.
10351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
10451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p>
10551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * The <code>BreakIterator</code> instances returned by the factory methods
10651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * of this class are intended for use with natural languages only, not for
10751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * programming language text. It is however possible to define subclasses
10851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * that tokenize a programming language.
10951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
11051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <P>
11151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <strong>Examples</strong>:<P>
11251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Creating and using text boundaries:
11351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
11451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <pre>
11551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static void main(String args[]) {
11651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *      if (args.length == 1) {
11751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          String stringToExamine = args[0];
11851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          //print each word in order
11951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          BreakIterator boundary = BreakIterator.getWordInstance();
12051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          boundary.setText(stringToExamine);
12151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          printEachForward(boundary, stringToExamine);
12251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          //print each sentence in reverse order
12351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          boundary = BreakIterator.getSentenceInstance(Locale.US);
12451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          boundary.setText(stringToExamine);
12551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          printEachBackward(boundary, stringToExamine);
12651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          printFirst(boundary, stringToExamine);
12751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          printLast(boundary, stringToExamine);
12851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *      }
12951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
13051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </pre>
13151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
13251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
13351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Print each element in order:
13451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
13551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <pre>
13651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static void printEachForward(BreakIterator boundary, String source) {
13751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int start = boundary.first();
13851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     for (int end = boundary.next();
13951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          end != BreakIterator.DONE;
14051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          start = end, end = boundary.next()) {
14151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          System.out.println(source.substring(start,end));
14251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     }
14351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
14451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </pre>
14551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
14651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
14751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Print each element in reverse order:
14851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
14951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <pre>
15051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static void printEachBackward(BreakIterator boundary, String source) {
15151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int end = boundary.last();
15251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     for (int start = boundary.previous();
15351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          start != BreakIterator.DONE;
15451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *          end = start, start = boundary.previous()) {
15551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *         System.out.println(source.substring(start,end));
15651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     }
15751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
15851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </pre>
15951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
16051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Print first element:
16251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
16351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <pre>
16451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static void printFirst(BreakIterator boundary, String source) {
16551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int start = boundary.first();
16651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int end = boundary.next();
16751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     System.out.println(source.substring(start,end));
16851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
16951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </pre>
17051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
17151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
17251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Print last element:
17351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
17451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <pre>
17551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static void printLast(BreakIterator boundary, String source) {
17651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int end = boundary.last();
17751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int start = boundary.previous();
17851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     System.out.println(source.substring(start,end));
17951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
18051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </pre>
18151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
18251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
18351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Print the element at a specified position:
18451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
18551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <pre>
18651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static void printAt(BreakIterator boundary, int pos, String source) {
18751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int end = boundary.following(pos);
18851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int start = boundary.previous();
18951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     System.out.println(source.substring(start,end));
19051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
19151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </pre>
19251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
19351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Find the next word:
19551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
1966e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin * <pre>{@code
19751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * public static int nextWordStartAfter(int pos, String text) {
19851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     BreakIterator wb = BreakIterator.getWordInstance();
19951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     wb.setText(text);
20051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int last = wb.following(pos);
20151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     int current = wb.next();
20251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     while (current != BreakIterator.DONE) {
20351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *         for (int p = last; p < current; p++) {
20451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *             if (Character.isLetter(text.codePointAt(p)))
20551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *                 return last;
20651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *         }
20751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *         last = current;
20851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *         current = wb.next();
20951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     }
21051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *     return BreakIterator.DONE;
21151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * }
2126e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin * }</pre>
21351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * (The iterator returned by BreakIterator.getWordInstance() is unique in that
21451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the break positions it returns don't represent both the start and end of the
21551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * thing being iterated over.  That is, a sentence-break iterator returns breaks
21651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * that each represent the end of one sentence and the beginning of the next.
21751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * With the word-break iterator, the characters between two boundaries might be a
21851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * word, or they might be the punctuation or whitespace between two words.  The
21951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * above code uses a simple heuristic to determine which boundary is the beginning
22051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * of a word: If the characters between this boundary and the next boundary
22151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
22251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * a Hangul syllable, a Kana character, etc.), then the text between this boundary
22351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * and the next is a word; otherwise, it's the material between words.)
22451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
22551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * @see CharacterIterator
22751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
22951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2304c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffinpublic abstract class BreakIterator implements Cloneable
2314c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin{
23251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
23351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructor. BreakIterator is stateless and has no default behavior.
23451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
2354c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    protected BreakIterator()
2364c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
23751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
23851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
23951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
24051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Create a copy of this iterator
24151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A copy of this
24251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
2436e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin    @Override
2444c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public Object clone()
2454c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
24651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
24751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return super.clone();
2484c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin        }
2494c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin        catch (CloneNotSupportedException e) {
2506e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin            throw new InternalError(e);
25151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
25251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
25351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
25451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
25551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * DONE is returned by previous(), next(), next(int), preceding(int)
25651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and following(int) when either the first or last text boundary has been
25751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * reached.
25851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
25951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public static final int DONE = -1;
26051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
26151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
26251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the first boundary. The iterator's current position is set
26351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * to the first text boundary.
26451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The character index of the first text boundary.
26551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
26651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int first();
26751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
26851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
26951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the last boundary. The iterator's current position is set
27051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * to the last text boundary.
27151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The character index of the last text boundary.
27251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
27351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int last();
27451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
27551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
27651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the nth boundary from the current boundary. If either
27751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the first or last text boundary has been reached, it returns
27851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> and the current position is set to either
27951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the first or last text boundary depending on which one is reached. Otherwise,
28051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the iterator's current position is set to the new boundary.
28151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * For example, if the iterator's current position is the mth text boundary
28251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and three more boundaries exist from the current boundary to the last text
28351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * boundary, the next(2) call will return m + 2. The new text position is set
28451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * to the (m + 2)th text boundary. A next(4) call would return
28551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> and the last text boundary would become the
28651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * new text position.
28751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param n which boundary to return.  A value of 0
2884c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * does nothing.  Negative values move to previous boundaries
2894c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * and positive values move to later boundaries.
29051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The character index of the nth boundary from the current position
29151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * or <code>BreakIterator.DONE</code> if either first or last text boundary
29251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * has been reached.
29351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
29451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int next(int n);
29551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
29651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
29751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the boundary following the current boundary. If the current boundary
29851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * is the last text boundary, it returns <code>BreakIterator.DONE</code> and
29951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the iterator's current position is unchanged. Otherwise, the iterator's
30051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * current position is set to the boundary following the current boundary.
30151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The character index of the next text boundary or
30251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> if the current boundary is the last text
30351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * boundary.
30451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Equivalent to next(1).
30551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #next(int)
30651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
30751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int next();
30851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
30951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
31051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the boundary preceding the current boundary. If the current boundary
31151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * is the first text boundary, it returns <code>BreakIterator.DONE</code> and
31251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the iterator's current position is unchanged. Otherwise, the iterator's
31351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * current position is set to the boundary preceding the current boundary.
31451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The character index of the previous text boundary or
31551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> if the current boundary is the first text
31651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * boundary.
31751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
31851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int previous();
31951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
32051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
32151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the first boundary following the specified character offset. If the
32251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * specified offset equals to the last text boundary, it returns
32351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
32451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Otherwise, the iterator's current position is set to the returned boundary.
32551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The value returned is always greater than the offset or the value
32651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code>.
32751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param offset the character offset to begin scanning.
32851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The first boundary after the specified offset or
32951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> if the last text boundary is passed in
33051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * as the offset.
3314c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * @exception  IllegalArgumentException if the specified offset is less than
3324c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * the first text boundary or greater than the last text boundary.
33351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
33451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int following(int offset);
33551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
33651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
33751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the last boundary preceding the specified character offset. If the
33851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * specified offset equals to the first text boundary, it returns
33951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
34051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Otherwise, the iterator's current position is set to the returned boundary.
34151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The value returned is always less than the offset or the value
34251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code>.
3436e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * @param offset the character offset to begin scanning.
34451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The last boundary before the specified offset or
34551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> if the first text boundary is passed in
34651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * as the offset.
34751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @exception   IllegalArgumentException if the specified offset is less than
34851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the first text boundary or greater than the last text boundary.
34951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @since 1.2
35051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
35151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int preceding(int offset) {
35251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // NOTE:  This implementation is here solely because we can't add new
35351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // abstract methods to an existing class.  There is almost ALWAYS a
35451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // better, faster way to do this.
35551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int pos = following(offset);
3566e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin        while (pos >= offset && pos != DONE) {
35751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            pos = previous();
3586e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin        }
35951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return pos;
36051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
36151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
36251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
36351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns true if the specified character offset is a text boundary.
36451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param offset the character offset to check.
36551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return <code>true</code> if "offset" is a boundary position,
36651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>false</code> otherwise.
36751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @exception   IllegalArgumentException if the specified offset is less than
36851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the first text boundary or greater than the last text boundary.
36951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @since 1.2
37051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
37151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean isBoundary(int offset) {
37251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // NOTE: This implementation probably is wrong for most situations
37351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // because it fails to take into account the possibility that a
37451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // CharacterIterator passed to setText() may not have a begin offset
37551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // of 0.  But since the abstract BreakIterator doesn't have that
37651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // knowledge, it assumes the begin offset is 0.  If you subclass
37751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // BreakIterator, copy the SimpleTextBoundary implementation of this
37851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // function into your subclass.  [This should have been abstract at
37951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // this level, but it's too late to fix that now.]
38051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (offset == 0) {
38151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
38251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
38351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int boundary = following(offset - 1);
38451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (boundary == DONE) {
38551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new IllegalArgumentException();
38651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
38751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return boundary == offset;
38851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
38951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
39051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
39151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns character index of the text boundary that was most
39251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * recently returned by next(), next(int), previous(), first(), last(),
39351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * following(int) or preceding(int). If any of these methods returns
39451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>BreakIterator.DONE</code> because either first or last text boundary
39551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * has been reached, it returns the first or last text boundary depending on
39651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * which one is reached.
39751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The text boundary returned from the above methods, first or last
39851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * text boundary.
39951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #next()
40051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #next(int)
40151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #previous()
40251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #first()
40351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #last()
40451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #following(int)
40551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @see #preceding(int)
40651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
40751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract int current();
40851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
40951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
41051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Get the text being scanned
41151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return the text being scanned
41251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
41351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract CharacterIterator getText();
41451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
41551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
41651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Set a new text string to be scanned.  The current scan
41751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * position is reset to first().
41851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param newText new text to scan.
41951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
4204c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public void setText(String newText)
4214c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
42251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        setText(new StringCharacterIterator(newText));
42351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
42451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
42551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
42651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Set a new text for scanning.  The current scan
42751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * position is reset to first().
42851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param newText new text to scan.
42951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
43051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public abstract void setText(CharacterIterator newText);
43151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
43276f589b84b39b103d7b3c7715d93a34edb9f8f8fJoachim Sauer    // Android-removed: Removed code related to BreakIteratorProvider support.
43376f589b84b39b103d7b3c7715d93a34edb9f8f8fJoachim Sauer
43451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
43551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
4366e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#word">word breaks</a>
43751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the {@linkplain Locale#getDefault() default locale}.
43851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for word breaks
43951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
4404c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getWordInstance()
4414c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
44251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return getWordInstance(Locale.getDefault());
44351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
44451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
44551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
44651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
4476e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#word">word breaks</a>
44851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the given locale.
44951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param locale the desired locale
45051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for word breaks
4514c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * @exception NullPointerException if <code>locale</code> is null
45251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
4534c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getWordInstance(Locale locale)
4544c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
45592924f23a03635bb194b6481c4a950e6414ca4e4Neil Fuller        // Android-changed: Switched to ICU.
456133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera        return new IcuIteratorWrapper(
457133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera                android.icu.text.BreakIterator.getWordInstance(locale));
45851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
45951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
46051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
46151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
4626e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#line">line breaks</a>
46351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the {@linkplain Locale#getDefault() default locale}.
46451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for line breaks
46551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
4664c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getLineInstance()
4674c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
46851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return getLineInstance(Locale.getDefault());
46951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
47051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
47251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
4736e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#line">line breaks</a>
47451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the given locale.
47551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param locale the desired locale
47651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for line breaks
4774c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * @exception NullPointerException if <code>locale</code> is null
47851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
4794c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getLineInstance(Locale locale)
4804c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
48192924f23a03635bb194b6481c4a950e6414ca4e4Neil Fuller        // Android-changed: Switched to ICU.
482133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera        return new IcuIteratorWrapper(
483133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera                android.icu.text.BreakIterator.getLineInstance(locale));
48451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
48551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
48651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
48751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
4886e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#character">character breaks</a>
48951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the {@linkplain Locale#getDefault() default locale}.
49051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for character breaks
49151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
4924c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getCharacterInstance()
4934c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
49451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return getCharacterInstance(Locale.getDefault());
49551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
49651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
49851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
4996e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#character">character breaks</a>
50051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the given locale.
50151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param locale the desired locale
50251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for character breaks
5034c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * @exception NullPointerException if <code>locale</code> is null
50451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
5054c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getCharacterInstance(Locale locale)
5064c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
50792924f23a03635bb194b6481c4a950e6414ca4e4Neil Fuller        // Android-changed: Switched to ICU.
508133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera        return new IcuIteratorWrapper(
509133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera                android.icu.text.BreakIterator.getCharacterInstance(locale));
51051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
51151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
51351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
5146e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#sentence">sentence breaks</a>
51551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the {@linkplain Locale#getDefault() default locale}.
51651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for sentence breaks
51751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
5184c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getSentenceInstance()
5194c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
52051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return getSentenceInstance(Locale.getDefault());
52151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
52251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
52351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
52451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a new <code>BreakIterator</code> instance
5256e42190c7f7d7cf3d8b787c918de0d797c6ddbbaPaul Duffin     * for <a href="BreakIterator.html#sentence">sentence breaks</a>
52651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the given locale.
52751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param locale the desired locale
52851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return A break iterator for sentence breaks
5294c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     * @exception NullPointerException if <code>locale</code> is null
53051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
5314c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static BreakIterator getSentenceInstance(Locale locale)
5324c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
53392924f23a03635bb194b6481c4a950e6414ca4e4Neil Fuller        // Android-changed: Switched to ICU.
534133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera        return new IcuIteratorWrapper(
535133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera                android.icu.text.BreakIterator.getSentenceInstance(locale));
53651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
53751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
53876f589b84b39b103d7b3c7715d93a34edb9f8f8fJoachim Sauer    // Android-removed: Removed code related to BreakIteratorProvider support.
53976f589b84b39b103d7b3c7715d93a34edb9f8f8fJoachim Sauer
54076f589b84b39b103d7b3c7715d93a34edb9f8f8fJoachim Sauer    // Android-changed: Removed references to BreakIteratorProvider from JavaDoc.
54151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
54251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns an array of all locales for which the
54351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <code>get*Instance</code> methods of this class can return
54451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * localized instances.
54551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
54651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return An array of locales for which localized
5474c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin     *         <code>BreakIterator</code> instances are available.
54851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
5494c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    public static synchronized Locale[] getAvailableLocales()
5504c8d6d2a84cffba9a34ea03d534d77027fb1974ePaul Duffin    {
55192924f23a03635bb194b6481c4a950e6414ca4e4Neil Fuller        // Android-changed: Switched to ICU.
552133d3d2f58ce6299616c4b345d2ffedc2eeb8d93Shubham Ajmera        return android.icu.text.BreakIterator.getAvailableLocales();
55351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
55451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski}
555