1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.text; 19 20// BEGIN android-note 21// The icu implementation used was changed from icu4j to icu4jni. 22// END android-note 23 24/** 25 * Created by a {@code RuleBasedCollator} to iterate through a string. The 26 * result of each iteration is a 32-bit collation element that defines the 27 * ordering priority of the next character or sequence of characters in the 28 * source string. 29 * <p> 30 * For illustration, consider the following in Spanish: 31 * <p> 32 * "ca": the first collation element is collation_element('c') and second 33 * collation element is collation_element('a'). 34 * <p> 35 * Since "ch" in Spanish sorts as one entity, the example below returns one 36 * collation element for the two characters 'c' and 'h': 37 * <p> 38 * "cha": the first collation element is collation_element('ch') and the second 39 * one is collation_element('a'). 40 * <p> 41 * In German, since the character '\u0086' is a composed character of 'a' 42 * and 'e', the iterator returns two collation elements for the single character 43 * '\u0086': 44 * <p> 45 * "\u0086b": the first collation element is collation_element('a'), the 46 * second one is collation_element('e'), and the third collation element is 47 * collation_element('b'). 48 */ 49public final class CollationElementIterator { 50 51 /** 52 * This constant is returned by the iterator in the methods 53 * {@code next()} and {@code previous()} when the end or the 54 * beginning of the source string has been reached, and there are no more 55 * valid collation elements to return. 56 */ 57 public static final int NULLORDER = -1; 58 59 private com.ibm.icu4jni.text.CollationElementIterator icuIterator; 60 61 CollationElementIterator(com.ibm.icu4jni.text.CollationElementIterator iterator) { 62 this.icuIterator = iterator; 63 } 64 65 /** 66 * Obtains the maximum length of any expansion sequence that ends with the 67 * specified collation element. Returns {@code 1} if there is no expansion 68 * with this collation element as the last element. 69 * 70 * @param order 71 * a collation element that has been previously obtained from a 72 * call to either the {@link #next()} or {@link #previous()} 73 * method. 74 * @return the maximum length of any expansion sequence ending with the 75 * specified collation element. 76 */ 77 public int getMaxExpansion(int order) { 78 return this.icuIterator.getMaxExpansion(order); 79 } 80 81 /** 82 * Obtains the character offset in the source string corresponding to the 83 * next collation element. This value could be any of: 84 * <ul> 85 * <li>The index of the first character in the source string that matches 86 * the value of the next collation element. This means that if 87 * {@code setOffset(offset)} sets the index in the middle of a contraction, 88 * {@code getOffset()} returns the index of the first character in the 89 * contraction, which may not be equal to the original offset that was set. 90 * Hence calling {@code getOffset()} immediately after 91 * {@code setOffset(offset)} does not guarantee that the original offset set 92 * will be returned.</li> 93 * <li>If normalization is on, the index of the immediate subsequent 94 * character, or composite character with the first character, having a 95 * combining class of 0.</li> 96 * <li>The length of the source string, if iteration has reached the end. 97 * </li> 98 * </ul> 99 * 100 * @return The position of the collation element in the source string that 101 * will be returned by the next invocation of the {@link #next()} 102 * method. 103 */ 104 public int getOffset() { 105 return this.icuIterator.getOffset(); 106 } 107 108 /** 109 * Obtains the next collation element in the source string. 110 * 111 * @return the next collation element or {@code NULLORDER} if the end 112 * of the iteration has been reached. 113 */ 114 public int next() { 115 return this.icuIterator.next(); 116 } 117 118 /** 119 * Obtains the previous collation element in the source string. 120 * 121 * @return the previous collation element, or {@code NULLORDER} when 122 * the start of the iteration has been reached. 123 */ 124 public int previous() { 125 return this.icuIterator.previous(); 126 } 127 128 /** 129 * Obtains the primary order of the specified collation element, i.e. the 130 * first 16 bits. This value is unsigned. 131 * 132 * @param order 133 * the element of the collation. 134 * @return the element's 16 bit primary order. 135 */ 136 public static final int primaryOrder(int order) { 137 return com.ibm.icu4jni.text.CollationElementIterator.primaryOrder(order); 138 } 139 140 /** 141 * Repositions the cursor to point at the first element of the current 142 * string. The next call to {@link #next()} or {@link #previous()} will 143 * return the first and last collation element in the string, respectively. 144 * <p> 145 * If the {@code RuleBasedCollator} used by this iterator has had its 146 * attributes changed, calling {@code reset()} reinitializes the iterator to 147 * use the new attributes. 148 */ 149 public void reset() { 150 this.icuIterator.reset(); 151 } 152 153 /** 154 * Obtains the secondary order of the specified collation element, i.e. the 155 * 16th to 23th bits, inclusive. This value is unsigned. 156 * 157 * @param order 158 * the element of the collator. 159 * @return the 8 bit secondary order of the element. 160 */ 161 public static final short secondaryOrder(int order) { 162 return (short) com.ibm.icu4jni.text.CollationElementIterator 163 .secondaryOrder(order); 164 } 165 166 /** 167 * Points the iterator at the collation element associated with the 168 * character in the source string which is found at the supplied offset. 169 * After this call completes, an invocation of the {@link #next()} method 170 * will return this collation element. 171 * <p> 172 * If {@code newOffset} corresponds to a character which is part of a 173 * sequence that maps to a single collation element then the iterator is 174 * adjusted to the start of that sequence. As a result of this, any 175 * subsequent call made to {@code getOffset()} may not return the same value 176 * set by this method. 177 * <p> 178 * If the decomposition mode is on, and offset is in the middle of a 179 * decomposable range of source text, the iterator may not return a correct 180 * result for the next forwards or backwards iteration. The user must ensure 181 * that the offset is not in the middle of a decomposable range. 182 * 183 * @param newOffset 184 * the character offset into the original source string to set. 185 * Note that this is not an offset into the corresponding 186 * sequence of collation elements. 187 */ 188 public void setOffset(int newOffset) { 189 this.icuIterator.setOffset(newOffset); 190 } 191 192 /** 193 * Sets a new source string iterator for iteration, and resets the offset to 194 * the beginning of the text. 195 * 196 * @param source 197 * the new source string iterator for iteration. 198 */ 199 public void setText(CharacterIterator source) { 200 this.icuIterator.setText(source); 201 } 202 203 /** 204 * Sets a new source string for iteration, and resets the offset to the 205 * beginning of the text. 206 * 207 * @param source 208 * the new source string for iteration. 209 */ 210 public void setText(String source) { 211 this.icuIterator.setText(source); 212 } 213 214 /** 215 * Obtains the tertiary order of the specified collation element, i.e. the 216 * last 8 bits. This value is unsigned. 217 * 218 * @param order 219 * the element of the collation. 220 * @return the 8 bit tertiary order of the element. 221 */ 222 public static final short tertiaryOrder(int order) { 223 return (short) com.ibm.icu4jni.text.CollationElementIterator 224 .tertiaryOrder(order); 225 } 226} 227