1
2/*
3 * Copyright (C) 2011 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package android.text.method;
19
20import android.text.Selection;
21import android.text.SpannableStringBuilder;
22
23import java.text.BreakIterator;
24import java.util.Locale;
25
26/**
27 * Walks through cursor positions at word boundaries. Internally uses
28 * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence}
29 * for performance reasons.
30 *
31 * Also provides methods to determine word boundaries.
32 * {@hide}
33 */
34public class WordIterator implements Selection.PositionIterator {
35    // Size of the window for the word iterator, should be greater than the longest word's length
36    private static final int WINDOW_WIDTH = 50;
37
38    private String mString;
39    private int mOffsetShift;
40
41    private BreakIterator mIterator;
42
43    /**
44     * Constructs a WordIterator using the default locale.
45     */
46    public WordIterator() {
47        this(Locale.getDefault());
48    }
49
50    /**
51     * Constructs a new WordIterator for the specified locale.
52     * @param locale The locale to be used when analysing the text.
53     */
54    public WordIterator(Locale locale) {
55        mIterator = BreakIterator.getWordInstance(locale);
56    }
57
58    public void setCharSequence(CharSequence charSequence, int start, int end) {
59        mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
60        final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
61
62        if (charSequence instanceof SpannableStringBuilder) {
63            mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
64        } else {
65            mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
66        }
67        mIterator.setText(mString);
68    }
69
70    /** {@inheritDoc} */
71    public int preceding(int offset) {
72        int shiftedOffset = offset - mOffsetShift;
73        do {
74            shiftedOffset = mIterator.preceding(shiftedOffset);
75            if (shiftedOffset == BreakIterator.DONE) {
76                return BreakIterator.DONE;
77            }
78            if (isOnLetterOrDigit(shiftedOffset)) {
79                return shiftedOffset + mOffsetShift;
80            }
81        } while (true);
82    }
83
84    /** {@inheritDoc} */
85    public int following(int offset) {
86        int shiftedOffset = offset - mOffsetShift;
87        do {
88            shiftedOffset = mIterator.following(shiftedOffset);
89            if (shiftedOffset == BreakIterator.DONE) {
90                return BreakIterator.DONE;
91            }
92            if (isAfterLetterOrDigit(shiftedOffset)) {
93                return shiftedOffset + mOffsetShift;
94            }
95        } while (true);
96    }
97
98    /** If <code>offset</code> is within a word, returns the index of the first character of that
99     * word, otherwise returns BreakIterator.DONE.
100     *
101     * The offsets that are considered to be part of a word are the indexes of its characters,
102     * <i>as well as</i> the index of its last character plus one.
103     * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
104     *
105     * Valid range for offset is [0..textLength] (note the inclusive upper bound).
106     * The returned value is within [0..offset] or BreakIterator.DONE.
107     *
108     * @throws IllegalArgumentException is offset is not valid.
109     */
110    public int getBeginning(int offset) {
111        final int shiftedOffset = offset - mOffsetShift;
112        checkOffsetIsValid(shiftedOffset);
113
114        if (isOnLetterOrDigit(shiftedOffset)) {
115            if (mIterator.isBoundary(shiftedOffset)) {
116                return shiftedOffset + mOffsetShift;
117            } else {
118                return mIterator.preceding(shiftedOffset) + mOffsetShift;
119            }
120        } else {
121            if (isAfterLetterOrDigit(shiftedOffset)) {
122                return mIterator.preceding(shiftedOffset) + mOffsetShift;
123            }
124        }
125        return BreakIterator.DONE;
126    }
127
128    /** If <code>offset</code> is within a word, returns the index of the last character of that
129     * word plus one, otherwise returns BreakIterator.DONE.
130     *
131     * The offsets that are considered to be part of a word are the indexes of its characters,
132     * <i>as well as</i> the index of its last character plus one.
133     * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
134     *
135     * Valid range for offset is [0..textLength] (note the inclusive upper bound).
136     * The returned value is within [offset..textLength] or BreakIterator.DONE.
137     *
138     * @throws IllegalArgumentException is offset is not valid.
139     */
140    public int getEnd(int offset) {
141        final int shiftedOffset = offset - mOffsetShift;
142        checkOffsetIsValid(shiftedOffset);
143
144        if (isAfterLetterOrDigit(shiftedOffset)) {
145            if (mIterator.isBoundary(shiftedOffset)) {
146                return shiftedOffset + mOffsetShift;
147            } else {
148                return mIterator.following(shiftedOffset) + mOffsetShift;
149            }
150        } else {
151            if (isOnLetterOrDigit(shiftedOffset)) {
152                return mIterator.following(shiftedOffset) + mOffsetShift;
153            }
154        }
155        return BreakIterator.DONE;
156    }
157
158    private boolean isAfterLetterOrDigit(int shiftedOffset) {
159        if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
160            final int codePoint = mString.codePointBefore(shiftedOffset);
161            if (Character.isLetterOrDigit(codePoint)) return true;
162        }
163        return false;
164    }
165
166    private boolean isOnLetterOrDigit(int shiftedOffset) {
167        if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
168            final int codePoint = mString.codePointAt(shiftedOffset);
169            if (Character.isLetterOrDigit(codePoint)) return true;
170        }
171        return false;
172    }
173
174    private void checkOffsetIsValid(int shiftedOffset) {
175        if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
176            throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
177                    ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
178                    "]");
179        }
180    }
181}
182