WordIterator.java revision 6435a56a8c02de98befcc8cd743b2b638cffb327
1 2/* 3 * Copyright (C) 2011 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package android.text.method; 19 20import android.text.CharSequenceIterator; 21import android.text.Editable; 22import android.text.Selection; 23import android.text.Spanned; 24import android.text.TextWatcher; 25 26import java.text.BreakIterator; 27import java.text.CharacterIterator; 28import java.util.Locale; 29 30/** 31 * Walks through cursor positions at word boundaries. Internally uses 32 * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence} 33 * for performance reasons. 34 * 35 * Also provides methods to determine word boundaries. 36 * {@hide} 37 */ 38public class WordIterator implements Selection.PositionIterator { 39 private CharSequence mCurrent; 40 private boolean mCurrentDirty = false; 41 42 private BreakIterator mIterator; 43 44 /** 45 * Constructs a WordIterator using the default locale. 46 */ 47 public WordIterator() { 48 this(Locale.getDefault()); 49 } 50 51 /** 52 * Constructs a new WordIterator for the specified locale. 53 * @param locale The locale to be used when analysing the text. 54 */ 55 public WordIterator(Locale locale) { 56 mIterator = BreakIterator.getWordInstance(locale); 57 } 58 59 private final TextWatcher mWatcher = new TextWatcher() { 60 /** {@inheritDoc} */ 61 public void beforeTextChanged(CharSequence s, int start, int count, int after) { 62 // ignored 63 } 64 65 /** {@inheritDoc} */ 66 public void onTextChanged(CharSequence s, int start, int before, int count) { 67 mCurrentDirty = true; 68 } 69 70 /** {@inheritDoc} */ 71 public void afterTextChanged(Editable s) { 72 // ignored 73 } 74 }; 75 76 public void forceUpdate() { 77 mCurrentDirty = true; 78 } 79 80 public void setCharSequence(CharSequence incoming) { 81 // When incoming is different object, move listeners to new sequence 82 // and mark as dirty so we reload contents. 83 if (mCurrent != incoming) { 84 if (mCurrent instanceof Editable) { 85 ((Editable) mCurrent).removeSpan(mWatcher); 86 } 87 88 if (incoming instanceof Editable) { 89 ((Editable) incoming).setSpan( 90 mWatcher, 0, incoming.length(), Spanned.SPAN_INCLUSIVE_INCLUSIVE); 91 } 92 93 mCurrent = incoming; 94 mCurrentDirty = true; 95 } 96 97 if (mCurrentDirty) { 98 final CharacterIterator charIterator = new CharSequenceIterator(mCurrent); 99 mIterator.setText(charIterator); 100 101 mCurrentDirty = false; 102 } 103 } 104 105 /** {@inheritDoc} */ 106 public int preceding(int offset) { 107 do { 108 offset = mIterator.preceding(offset); 109 if (offset == BreakIterator.DONE || isOnLetterOrDigit(offset)) { 110 break; 111 } 112 } while (true); 113 114 return offset; 115 } 116 117 /** {@inheritDoc} */ 118 public int following(int offset) { 119 do { 120 offset = mIterator.following(offset); 121 if (offset == BreakIterator.DONE || isAfterLetterOrDigit(offset)) { 122 break; 123 } 124 } while (true); 125 126 return offset; 127 } 128 129 /** If <code>offset</code> is within a word, returns the index of the first character of that 130 * word, otherwise returns BreakIterator.DONE. 131 * 132 * The offsets that are considered to be part of a word are the indexes of its characters, 133 * <i>as well as</i> the index of its last character plus one. 134 * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned. 135 * 136 * Valid range for offset is [0..textLength] (note the inclusive upper bound). 137 * The returned value is within [0..offset] or BreakIterator.DONE. 138 * 139 * @throws IllegalArgumentException is offset is not valid. 140 */ 141 public int getBeginning(int offset) { 142 checkOffsetIsValid(offset); 143 144 if (isOnLetterOrDigit(offset)) { 145 if (mIterator.isBoundary(offset)) { 146 return offset; 147 } else { 148 return mIterator.preceding(offset); 149 } 150 } else { 151 if (isAfterLetterOrDigit(offset)) { 152 return mIterator.preceding(offset); 153 } 154 } 155 return BreakIterator.DONE; 156 } 157 158 /** If <code>offset</code> is within a word, returns the index of the last character of that 159 * word plus one, otherwise returns BreakIterator.DONE. 160 * 161 * The offsets that are considered to be part of a word are the indexes of its characters, 162 * <i>as well as</i> the index of its last character plus one. 163 * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned. 164 * 165 * Valid range for offset is [0..textLength] (note the inclusive upper bound). 166 * The returned value is within [offset..textLength] or BreakIterator.DONE. 167 * 168 * @throws IllegalArgumentException is offset is not valid. 169 */ 170 public int getEnd(int offset) { 171 checkOffsetIsValid(offset); 172 173 if (isAfterLetterOrDigit(offset)) { 174 if (mIterator.isBoundary(offset)) { 175 return offset; 176 } else { 177 return mIterator.following(offset); 178 } 179 } else { 180 if (isOnLetterOrDigit(offset)) { 181 return mIterator.following(offset); 182 } 183 } 184 return BreakIterator.DONE; 185 } 186 187 private boolean isAfterLetterOrDigit(int offset) { 188 if (offset - 1 >= 0) { 189 final char previousChar = mCurrent.charAt(offset - 1); 190 if (Character.isLetterOrDigit(previousChar)) return true; 191 if (offset - 2 >= 0) { 192 final char previousPreviousChar = mCurrent.charAt(offset - 2); 193 if (Character.isSurrogatePair(previousPreviousChar, previousChar)) { 194 final int codePoint = Character.toCodePoint(previousPreviousChar, previousChar); 195 return Character.isLetterOrDigit(codePoint); 196 } 197 } 198 } 199 return false; 200 } 201 202 private boolean isOnLetterOrDigit(int offset) { 203 final int length = mCurrent.length(); 204 if (offset < length) { 205 final char currentChar = mCurrent.charAt(offset); 206 if (Character.isLetterOrDigit(currentChar)) return true; 207 if (offset + 1 < length) { 208 final char nextChar = mCurrent.charAt(offset + 1); 209 if (Character.isSurrogatePair(currentChar, nextChar)) { 210 final int codePoint = Character.toCodePoint(currentChar, nextChar); 211 return Character.isLetterOrDigit(codePoint); 212 } 213 } 214 } 215 return false; 216 } 217 218 private void checkOffsetIsValid(int offset) { 219 if (offset < 0 || offset > mCurrent.length()) { 220 final String message = "Invalid offset: " + offset + 221 ". Valid range is [0, " + mCurrent.length() + "]"; 222 throw new IllegalArgumentException(message); 223 } 224 } 225} 226