1e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
2e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne/*
3e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * Copyright (C) 2011 The Android Open Source Project
4e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne *
5e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * Licensed under the Apache License, Version 2.0 (the "License");
6e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * you may not use this file except in compliance with the License.
7e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * You may obtain a copy of the License at
8e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne *
9e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne *      http://www.apache.org/licenses/LICENSE-2.0
10e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne *
11e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * Unless required by applicable law or agreed to in writing, software
12e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * distributed under the License is distributed on an "AS IS" BASIS,
13e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * See the License for the specific language governing permissions and
15e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * limitations under the License.
16e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne */
17e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
18e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunnepackage android.text.method;
19e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
20e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunneimport android.text.Selection;
21653d3a27878d5358b4a91518a756f6b9b3407b07Gilles Debunneimport android.text.SpannableStringBuilder;
22e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
23108c9604e3a418f9b0895599e3468d58869d4af8Roozbeh Pournaderimport android.icu.text.BreakIterator;
24e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunneimport java.util.Locale;
25e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
26e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne/**
27e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * Walks through cursor positions at word boundaries. Internally uses
28e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence}
29e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * for performance reasons.
30e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne *
31e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * Also provides methods to determine word boundaries.
32e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne * {@hide}
33e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne */
34e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunnepublic class WordIterator implements Selection.PositionIterator {
35287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    // Size of the window for the word iterator, should be greater than the longest word's length
36287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    private static final int WINDOW_WIDTH = 50;
37287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne
38287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    private String mString;
39287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    private int mOffsetShift;
40e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
41e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    private BreakIterator mIterator;
42e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
43e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    /**
44e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * Constructs a WordIterator using the default locale.
45e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     */
46e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    public WordIterator() {
47e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        this(Locale.getDefault());
48e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
49e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
50e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    /**
51e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * Constructs a new WordIterator for the specified locale.
52e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * @param locale The locale to be used when analysing the text.
53e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     */
54e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    public WordIterator(Locale locale) {
55e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        mIterator = BreakIterator.getWordInstance(locale);
56e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
57e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
58287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    public void setCharSequence(CharSequence charSequence, int start, int end) {
59287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
60287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
61e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
62653d3a27878d5358b4a91518a756f6b9b3407b07Gilles Debunne        if (charSequence instanceof SpannableStringBuilder) {
63653d3a27878d5358b4a91518a756f6b9b3407b07Gilles Debunne            mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
64653d3a27878d5358b4a91518a756f6b9b3407b07Gilles Debunne        } else {
65653d3a27878d5358b4a91518a756f6b9b3407b07Gilles Debunne            mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
66653d3a27878d5358b4a91518a756f6b9b3407b07Gilles Debunne        }
67287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        mIterator.setText(mString);
68e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
69e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
70e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    /** {@inheritDoc} */
71e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    public int preceding(int offset) {
72287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        int shiftedOffset = offset - mOffsetShift;
73e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        do {
74287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            shiftedOffset = mIterator.preceding(shiftedOffset);
75287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (shiftedOffset == BreakIterator.DONE) {
76287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return BreakIterator.DONE;
77287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            }
78287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (isOnLetterOrDigit(shiftedOffset)) {
79287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return shiftedOffset + mOffsetShift;
80e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            }
81e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        } while (true);
82e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
83e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
84e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    /** {@inheritDoc} */
85e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    public int following(int offset) {
86287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        int shiftedOffset = offset - mOffsetShift;
87e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        do {
88287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            shiftedOffset = mIterator.following(shiftedOffset);
89287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (shiftedOffset == BreakIterator.DONE) {
90287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return BreakIterator.DONE;
91287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            }
92287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (isAfterLetterOrDigit(shiftedOffset)) {
93287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return shiftedOffset + mOffsetShift;
94e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            }
95e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        } while (true);
96e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
97e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
986c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    /** {@inheritDoc} */
996c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    public boolean isBoundary(int offset) {
1006c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        int shiftedOffset = offset - mOffsetShift;
1016c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        checkOffsetIsValid(shiftedOffset);
1026c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        return mIterator.isBoundary(shiftedOffset);
1036c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    }
1046c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor
1056c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    /**
1066c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * Returns the position of next boundary after the given offset. Returns
1076c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * {@code DONE} if there is no boundary after the given offset.
1086c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     *
1096c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * @param offset the given start position to search from.
1106c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * @return the position of the last boundary preceding the given offset.
1116c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     */
1126c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    public int nextBoundary(int offset) {
1136c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        int shiftedOffset = offset - mOffsetShift;
1146c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        shiftedOffset = mIterator.following(shiftedOffset);
1156c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        if (shiftedOffset == BreakIterator.DONE) {
1166c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor            return BreakIterator.DONE;
1176c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        }
1186c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        return shiftedOffset + mOffsetShift;
1196c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    }
1206c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor
1216c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    /**
1226c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * Returns the position of boundary preceding the given offset or
1236c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * {@code DONE} if the given offset specifies the starting position.
1246c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     *
1256c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * @param offset the given start position to search from.
1266c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     * @return the position of the last boundary preceding the given offset.
1276c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor     */
1286c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    public int prevBoundary(int offset) {
1296c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        int shiftedOffset = offset - mOffsetShift;
1306c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        shiftedOffset = mIterator.preceding(shiftedOffset);
1316c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        if (shiftedOffset == BreakIterator.DONE) {
1326c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor            return BreakIterator.DONE;
1336c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        }
1346c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor        return shiftedOffset + mOffsetShift;
1356c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor    }
1366c7b4ad690fe5c22c01ad79a232e567e835f676dMady Mellor
137e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    /** If <code>offset</code> is within a word, returns the index of the first character of that
138e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * word, otherwise returns BreakIterator.DONE.
139e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     *
140e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * The offsets that are considered to be part of a word are the indexes of its characters,
141e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * <i>as well as</i> the index of its last character plus one.
142e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
143e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     *
144e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * Valid range for offset is [0..textLength] (note the inclusive upper bound).
145e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * The returned value is within [0..offset] or BreakIterator.DONE.
146e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     *
147e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * @throws IllegalArgumentException is offset is not valid.
148e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     */
149e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    public int getBeginning(int offset) {
150e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        // TODO: Check if usage of this can be updated to getBeginning(offset, true) if
151e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        // so this method can be removed.
152e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        return getBeginning(offset, false);
153e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    }
154e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor
155e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    /**
156e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If <code>offset</code> is within a word, returns the index of the last character of that
157e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * word plus one, otherwise returns BreakIterator.DONE.
158e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
159e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * The offsets that are considered to be part of a word are the indexes of its characters,
160e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * <i>as well as</i> the index of its last character plus one.
161e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
162e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
163e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * Valid range for offset is [0..textLength] (note the inclusive upper bound).
164e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * The returned value is within [offset..textLength] or BreakIterator.DONE.
165e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
166e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * @throws IllegalArgumentException is offset is not valid.
167e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     */
168e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    public int getEnd(int offset) {
169e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        // TODO: Check if usage of this can be updated to getEnd(offset, true), if
170e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        // so this method can be removed.
171e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        return getEnd(offset, false);
172e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    }
173e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor
174e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    /**
175e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the <code>offset</code> is within a word or on a word boundary that can only be
176e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * considered the start of a word (e.g. _word where "_" is any character that would not
177e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * be considered part of the word) then this returns the index of the first character of
178e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * that word.
179e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
180e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the offset is on a word boundary that can be considered the start and end of a
181e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
182e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * between AA and BB, this would return the start of the previous word, AA.
183e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
184e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * Returns BreakIterator.DONE if there is no previous boundary.
185e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
186e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * @throws IllegalArgumentException is offset is not valid.
187e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     */
188e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    public int getPrevWordBeginningOnTwoWordsBoundary(int offset) {
189e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        return getBeginning(offset, true);
190e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    }
191e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor
192e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    /**
193e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the <code>offset</code> is within a word or on a word boundary that can only be
194e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * considered the end of a word (e.g. word_ where "_" is any character that would not
195e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * be considered part of the word) then this returns the index of the last character
196e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * plus one of that word.
197e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
198e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the offset is on a word boundary that can be considered the start and end of a
199e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
200e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * between AA and BB, this would return the end of the next word, BB.
201e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
202e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * Returns BreakIterator.DONE if there is no next boundary.
203e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
204e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * @throws IllegalArgumentException is offset is not valid.
205e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     */
206e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    public int getNextWordEndOnTwoWordBoundary(int offset) {
207e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor        return getEnd(offset, true);
208e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    }
209e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor
210e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    /**
211e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the <code>offset</code> is within a word or on a word boundary that can only be
212e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * considered the start of a word (e.g. _word where "_" is any character that would not
213e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * be considered part of the word) then this returns the index of the first character of
214e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * that word.
215e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
216e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the offset is on a word boundary that can be considered the start and end of a
217e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
218e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * between AA and BB, and getPrevWordBeginningOnTwoWordsBoundary is true then this would
219e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * return the start of the previous word, AA. Otherwise it would return the current offset,
220e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * the start of BB.
221e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
222e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * Returns BreakIterator.DONE if there is no previous boundary.
223e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     *
224e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * @throws IllegalArgumentException is offset is not valid.
225e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     */
226e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) {
227287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        final int shiftedOffset = offset - mOffsetShift;
228287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        checkOffsetIsValid(shiftedOffset);
229e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
230287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        if (isOnLetterOrDigit(shiftedOffset)) {
231e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor            if (mIterator.isBoundary(shiftedOffset)
232e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor                    && (!isAfterLetterOrDigit(shiftedOffset)
233e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor                            || !getPrevWordBeginningOnTwoWordsBoundary)) {
234287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return shiftedOffset + mOffsetShift;
235e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            } else {
236287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return mIterator.preceding(shiftedOffset) + mOffsetShift;
237e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            }
238e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        } else {
239287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (isAfterLetterOrDigit(shiftedOffset)) {
240287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return mIterator.preceding(shiftedOffset) + mOffsetShift;
241e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            }
242e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        }
243e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        return BreakIterator.DONE;
244e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
245e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
246e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    /**
247e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the <code>offset</code> is within a word or on a word boundary that can only be
248e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * considered the end of a word (e.g. word_ where "_" is any character that would not be
249e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * considered part of the word) then this returns the index of the last character plus one
250e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * of that word.
251e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     *
252e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * If the offset is on a word boundary that can be considered the start and end of a
253e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
254e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * between AA and BB, and getNextWordEndOnTwoWordBoundary is true then this would return
255e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * the end of the next word, BB. Otherwise it would return the current offset, the end
256e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * of AA.
257e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     *
258e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor     * Returns BreakIterator.DONE if there is no next boundary.
259e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     *
260e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     * @throws IllegalArgumentException is offset is not valid.
261e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne     */
262e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor    private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) {
263287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        final int shiftedOffset = offset - mOffsetShift;
264287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        checkOffsetIsValid(shiftedOffset);
265e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
266287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        if (isAfterLetterOrDigit(shiftedOffset)) {
267e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor            if (mIterator.isBoundary(shiftedOffset)
268e264ac392a886788ebfd1069e1d366e2b1edef72Mady Mellor                    && (!isOnLetterOrDigit(shiftedOffset) || !getNextWordEndOnTwoWordBoundary)) {
269287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return shiftedOffset + mOffsetShift;
270e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            } else {
271287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return mIterator.following(shiftedOffset) + mOffsetShift;
272e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            }
273e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        } else {
274287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (isOnLetterOrDigit(shiftedOffset)) {
275287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                return mIterator.following(shiftedOffset) + mOffsetShift;
276e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne            }
277e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        }
278e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        return BreakIterator.DONE;
279e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
280e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
28158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    /**
28258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * If <code>offset</code> is within a group of punctuation as defined
28358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * by {@link #isPunctuation(int)}, returns the index of the first character
28458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * of that group, otherwise returns BreakIterator.DONE.
28558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     *
28658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * @param offset the offset to search from.
28758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     */
28858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    public int getPunctuationBeginning(int offset) {
28958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) {
29058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor            offset = prevBoundary(offset);
29158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        }
29258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        // No need to shift offset, prevBoundary handles that.
29358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return offset;
29458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
29558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
29658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    /**
29758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * If <code>offset</code> is within a group of punctuation as defined
29858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * by {@link #isPunctuation(int)}, returns the index of the last character
29958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * of that group plus one, otherwise returns BreakIterator.DONE.
30058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     *
30158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * @param offset the offset to search from.
30258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     */
30358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    public int getPunctuationEnd(int offset) {
30458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) {
30558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor            offset = nextBoundary(offset);
30658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        }
30758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        // No need to shift offset, nextBoundary handles that.
30858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return offset;
30958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
31058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
31158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    /**
31258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * Indicates if the provided offset is after a punctuation character
31358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * as defined by {@link #isPunctuation(int)}.
31458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     *
31558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * @param offset the offset to check from.
31658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * @return Whether the offset is after a punctuation character.
31758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     */
31858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    public boolean isAfterPunctuation(int offset) {
31958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        final int shiftedOffset = offset - mOffsetShift;
32058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
32158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor            final int codePoint = mString.codePointBefore(shiftedOffset);
32258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor            return isPunctuation(codePoint);
32358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        }
32458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return false;
32558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
32658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
32758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    /**
32858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * Indicates if the provided offset is at a punctuation character
32958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * as defined by {@link #isPunctuation(int)}.
33058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     *
33158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * @param offset the offset to check from.
33258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     * @return Whether the offset is at a punctuation character.
33358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor     */
33458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    public boolean isOnPunctuation(int offset) {
33558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        final int shiftedOffset = offset - mOffsetShift;
33658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
33758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor            final int codePoint = mString.codePointAt(shiftedOffset);
33858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor            return isPunctuation(codePoint);
33958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        }
34058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return false;
34158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
34258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
34358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    private boolean isPunctuationStartBoundary(int offset) {
34458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return isOnPunctuation(offset) && !isAfterPunctuation(offset);
34558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
34658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
34758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    private boolean isPunctuationEndBoundary(int offset) {
34858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return !isOnPunctuation(offset) && isAfterPunctuation(offset);
34958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
35058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
35158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    private boolean isPunctuation(int cp) {
35258c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        int type = Character.getType(cp);
35358c9087137989da8411ffd212072f630d3fac4f3Mady Mellor        return (type == Character.CONNECTOR_PUNCTUATION ||
35458c9087137989da8411ffd212072f630d3fac4f3Mady Mellor                type == Character.DASH_PUNCTUATION ||
35558c9087137989da8411ffd212072f630d3fac4f3Mady Mellor                type == Character.END_PUNCTUATION ||
35658c9087137989da8411ffd212072f630d3fac4f3Mady Mellor                type == Character.FINAL_QUOTE_PUNCTUATION ||
35758c9087137989da8411ffd212072f630d3fac4f3Mady Mellor                type == Character.INITIAL_QUOTE_PUNCTUATION ||
35858c9087137989da8411ffd212072f630d3fac4f3Mady Mellor                type == Character.OTHER_PUNCTUATION ||
35958c9087137989da8411ffd212072f630d3fac4f3Mady Mellor                type == Character.START_PUNCTUATION);
36058c9087137989da8411ffd212072f630d3fac4f3Mady Mellor    }
36158c9087137989da8411ffd212072f630d3fac4f3Mady Mellor
362287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    private boolean isAfterLetterOrDigit(int shiftedOffset) {
363287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
364287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            final int codePoint = mString.codePointBefore(shiftedOffset);
365287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (Character.isLetterOrDigit(codePoint)) return true;
366e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        }
367e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        return false;
368e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
369e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
370287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    private boolean isOnLetterOrDigit(int shiftedOffset) {
371287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
372287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            final int codePoint = mString.codePointAt(shiftedOffset);
373287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            if (Character.isLetterOrDigit(codePoint)) return true;
374e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        }
375e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        return false;
376e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
377e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne
378287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne    private void checkOffsetIsValid(int shiftedOffset) {
379287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne        if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
380287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne            throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
381287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                    ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
382287d6c6e12a38864d019fa7b9184206bc8a31ea1Gilles Debunne                    "]");
383e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne        }
384e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne    }
385e193fd14346e6e808c6c266d2bb13c0c0cc6890eGilles Debunne}
386