WordIterator.java revision 6435a56a8c02de98befcc8cd743b2b638cffb327
1
2/*
3 * Copyright (C) 2011 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package android.text.method;
19
20import android.text.CharSequenceIterator;
21import android.text.Editable;
22import android.text.Selection;
23import android.text.Spanned;
24import android.text.TextWatcher;
25
26import java.text.BreakIterator;
27import java.text.CharacterIterator;
28import java.util.Locale;
29
30/**
31 * Walks through cursor positions at word boundaries. Internally uses
32 * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence}
33 * for performance reasons.
34 *
35 * Also provides methods to determine word boundaries.
36 * {@hide}
37 */
38public class WordIterator implements Selection.PositionIterator {
39    private CharSequence mCurrent;
40    private boolean mCurrentDirty = false;
41
42    private BreakIterator mIterator;
43
44    /**
45     * Constructs a WordIterator using the default locale.
46     */
47    public WordIterator() {
48        this(Locale.getDefault());
49    }
50
51    /**
52     * Constructs a new WordIterator for the specified locale.
53     * @param locale The locale to be used when analysing the text.
54     */
55    public WordIterator(Locale locale) {
56        mIterator = BreakIterator.getWordInstance(locale);
57    }
58
59    private final TextWatcher mWatcher = new TextWatcher() {
60        /** {@inheritDoc} */
61        public void beforeTextChanged(CharSequence s, int start, int count, int after) {
62            // ignored
63        }
64
65        /** {@inheritDoc} */
66        public void onTextChanged(CharSequence s, int start, int before, int count) {
67            mCurrentDirty = true;
68        }
69
70        /** {@inheritDoc} */
71        public void afterTextChanged(Editable s) {
72            // ignored
73        }
74    };
75
76    public void forceUpdate() {
77        mCurrentDirty = true;
78    }
79
80    public void setCharSequence(CharSequence incoming) {
81        // When incoming is different object, move listeners to new sequence
82        // and mark as dirty so we reload contents.
83        if (mCurrent != incoming) {
84            if (mCurrent instanceof Editable) {
85                ((Editable) mCurrent).removeSpan(mWatcher);
86            }
87
88            if (incoming instanceof Editable) {
89                ((Editable) incoming).setSpan(
90                        mWatcher, 0, incoming.length(), Spanned.SPAN_INCLUSIVE_INCLUSIVE);
91            }
92
93            mCurrent = incoming;
94            mCurrentDirty = true;
95        }
96
97        if (mCurrentDirty) {
98            final CharacterIterator charIterator = new CharSequenceIterator(mCurrent);
99            mIterator.setText(charIterator);
100
101            mCurrentDirty = false;
102        }
103    }
104
105    /** {@inheritDoc} */
106    public int preceding(int offset) {
107        do {
108            offset = mIterator.preceding(offset);
109            if (offset == BreakIterator.DONE || isOnLetterOrDigit(offset)) {
110                break;
111            }
112        } while (true);
113
114        return offset;
115    }
116
117    /** {@inheritDoc} */
118    public int following(int offset) {
119        do {
120            offset = mIterator.following(offset);
121            if (offset == BreakIterator.DONE || isAfterLetterOrDigit(offset)) {
122                break;
123            }
124        } while (true);
125
126        return offset;
127    }
128
129    /** If <code>offset</code> is within a word, returns the index of the first character of that
130     * word, otherwise returns BreakIterator.DONE.
131     *
132     * The offsets that are considered to be part of a word are the indexes of its characters,
133     * <i>as well as</i> the index of its last character plus one.
134     * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
135     *
136     * Valid range for offset is [0..textLength] (note the inclusive upper bound).
137     * The returned value is within [0..offset] or BreakIterator.DONE.
138     *
139     * @throws IllegalArgumentException is offset is not valid.
140     */
141    public int getBeginning(int offset) {
142        checkOffsetIsValid(offset);
143
144        if (isOnLetterOrDigit(offset)) {
145            if (mIterator.isBoundary(offset)) {
146                return offset;
147            } else {
148                return mIterator.preceding(offset);
149            }
150        } else {
151            if (isAfterLetterOrDigit(offset)) {
152                return mIterator.preceding(offset);
153            }
154        }
155        return BreakIterator.DONE;
156    }
157
158    /** If <code>offset</code> is within a word, returns the index of the last character of that
159     * word plus one, otherwise returns BreakIterator.DONE.
160     *
161     * The offsets that are considered to be part of a word are the indexes of its characters,
162     * <i>as well as</i> the index of its last character plus one.
163     * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
164     *
165     * Valid range for offset is [0..textLength] (note the inclusive upper bound).
166     * The returned value is within [offset..textLength] or BreakIterator.DONE.
167     *
168     * @throws IllegalArgumentException is offset is not valid.
169     */
170    public int getEnd(int offset) {
171        checkOffsetIsValid(offset);
172
173        if (isAfterLetterOrDigit(offset)) {
174            if (mIterator.isBoundary(offset)) {
175                return offset;
176            } else {
177                return mIterator.following(offset);
178            }
179        } else {
180            if (isOnLetterOrDigit(offset)) {
181                return mIterator.following(offset);
182            }
183        }
184        return BreakIterator.DONE;
185    }
186
187    private boolean isAfterLetterOrDigit(int offset) {
188        if (offset - 1 >= 0) {
189            final char previousChar = mCurrent.charAt(offset - 1);
190            if (Character.isLetterOrDigit(previousChar)) return true;
191            if (offset - 2 >= 0) {
192                final char previousPreviousChar = mCurrent.charAt(offset - 2);
193                if (Character.isSurrogatePair(previousPreviousChar, previousChar)) {
194                    final int codePoint = Character.toCodePoint(previousPreviousChar, previousChar);
195                    return Character.isLetterOrDigit(codePoint);
196                }
197            }
198        }
199        return false;
200    }
201
202    private boolean isOnLetterOrDigit(int offset) {
203        final int length = mCurrent.length();
204        if (offset < length) {
205            final char currentChar = mCurrent.charAt(offset);
206            if (Character.isLetterOrDigit(currentChar)) return true;
207            if (offset + 1 < length) {
208                final char nextChar = mCurrent.charAt(offset + 1);
209                if (Character.isSurrogatePair(currentChar, nextChar)) {
210                    final int codePoint = Character.toCodePoint(currentChar, nextChar);
211                    return Character.isLetterOrDigit(codePoint);
212                }
213            }
214        }
215        return false;
216    }
217
218    private void checkOffsetIsValid(int offset) {
219        if (offset < 0 || offset > mCurrent.length()) {
220            final String message = "Invalid offset: " + offset +
221                    ". Valid range is [0, " + mCurrent.length() + "]";
222            throw new IllegalArgumentException(message);
223        }
224    }
225}
226