1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.text; 19 20import libcore.icu.CollationElementIteratorICU; 21 22/** 23 * Created by a {@code RuleBasedCollator} to iterate through a string. The 24 * result of each iteration is a 32-bit collation element that defines the 25 * ordering priority of the next character or sequence of characters in the 26 * source string. 27 * <p> 28 * For illustration, consider the following in Spanish: 29 * <p> 30 * "ca": the first collation element is collation_element('c') and second 31 * collation element is collation_element('a'). 32 * <p> 33 * Since "ch" in Spanish sorts as one entity, the example below returns one 34 * collation element for the two characters 'c' and 'h': 35 * <p> 36 * "cha": the first collation element is collation_element('ch') and the second 37 * one is collation_element('a'). 38 * <p> 39 * In German, since the character '\u0086' is a composed character of 'a' 40 * and 'e', the iterator returns two collation elements for the single character 41 * '\u0086': 42 * <p> 43 * "\u0086b": the first collation element is collation_element('a'), the 44 * second one is collation_element('e'), and the third collation element is 45 * collation_element('b'). 46 */ 47public final class CollationElementIterator { 48 49 /** 50 * This constant is returned by the iterator in the methods 51 * {@code next()} and {@code previous()} when the end or the 52 * beginning of the source string has been reached, and there are no more 53 * valid collation elements to return. 54 */ 55 public static final int NULLORDER = -1; 56 57 private CollationElementIteratorICU icuIterator; 58 59 CollationElementIterator(CollationElementIteratorICU iterator) { 60 this.icuIterator = iterator; 61 } 62 63 /** 64 * Obtains the maximum length of any expansion sequence that ends with the 65 * specified collation element. Returns {@code 1} if there is no expansion 66 * with this collation element as the last element. 67 * 68 * @param order 69 * a collation element that has been previously obtained from a 70 * call to either the {@link #next()} or {@link #previous()} 71 * method. 72 * @return the maximum length of any expansion sequence ending with the 73 * specified collation element. 74 */ 75 public int getMaxExpansion(int order) { 76 return this.icuIterator.getMaxExpansion(order); 77 } 78 79 /** 80 * Obtains the character offset in the source string corresponding to the 81 * next collation element. This value could be any of: 82 * <ul> 83 * <li>The index of the first character in the source string that matches 84 * the value of the next collation element. This means that if 85 * {@code setOffset(offset)} sets the index in the middle of a contraction, 86 * {@code getOffset()} returns the index of the first character in the 87 * contraction, which may not be equal to the original offset that was set. 88 * Hence calling {@code getOffset()} immediately after 89 * {@code setOffset(offset)} does not guarantee that the original offset set 90 * will be returned.</li> 91 * <li>If normalization is on, the index of the immediate subsequent 92 * character, or composite character with the first character, having a 93 * combining class of 0.</li> 94 * <li>The length of the source string, if iteration has reached the end. 95 * </li> 96 * </ul> 97 * 98 * @return The position of the collation element in the source string that 99 * will be returned by the next invocation of the {@link #next()} 100 * method. 101 */ 102 public int getOffset() { 103 return this.icuIterator.getOffset(); 104 } 105 106 /** 107 * Obtains the next collation element in the source string. 108 * 109 * @return the next collation element or {@code NULLORDER} if the end 110 * of the iteration has been reached. 111 */ 112 public int next() { 113 return this.icuIterator.next(); 114 } 115 116 /** 117 * Obtains the previous collation element in the source string. 118 * 119 * @return the previous collation element, or {@code NULLORDER} when 120 * the start of the iteration has been reached. 121 */ 122 public int previous() { 123 return this.icuIterator.previous(); 124 } 125 126 /** 127 * Obtains the primary order of the specified collation element, i.e. the 128 * first 16 bits. This value is unsigned. 129 * 130 * @param order 131 * the element of the collation. 132 * @return the element's 16 bit primary order. 133 */ 134 public static final int primaryOrder(int order) { 135 return CollationElementIteratorICU.primaryOrder(order); 136 } 137 138 /** 139 * Repositions the cursor to point at the first element of the current 140 * string. The next call to {@link #next()} or {@link #previous()} will 141 * return the first and last collation element in the string, respectively. 142 * <p> 143 * If the {@code RuleBasedCollator} used by this iterator has had its 144 * attributes changed, calling {@code reset()} reinitializes the iterator to 145 * use the new attributes. 146 */ 147 public void reset() { 148 this.icuIterator.reset(); 149 } 150 151 /** 152 * Obtains the secondary order of the specified collation element, i.e. the 153 * 16th to 23th bits, inclusive. This value is unsigned. 154 * 155 * @param order 156 * the element of the collator. 157 * @return the 8 bit secondary order of the element. 158 */ 159 public static final short secondaryOrder(int order) { 160 return (short) CollationElementIteratorICU.secondaryOrder(order); 161 } 162 163 /** 164 * Points the iterator at the collation element associated with the 165 * character in the source string which is found at the supplied offset. 166 * After this call completes, an invocation of the {@link #next()} method 167 * will return this collation element. 168 * <p> 169 * If {@code newOffset} corresponds to a character which is part of a 170 * sequence that maps to a single collation element then the iterator is 171 * adjusted to the start of that sequence. As a result of this, any 172 * subsequent call made to {@code getOffset()} may not return the same value 173 * set by this method. 174 * <p> 175 * If the decomposition mode is on, and offset is in the middle of a 176 * decomposable range of source text, the iterator may not return a correct 177 * result for the next forwards or backwards iteration. The user must ensure 178 * that the offset is not in the middle of a decomposable range. 179 * 180 * @param newOffset 181 * the character offset into the original source string to set. 182 * Note that this is not an offset into the corresponding 183 * sequence of collation elements. 184 */ 185 public void setOffset(int newOffset) { 186 this.icuIterator.setOffset(newOffset); 187 } 188 189 /** 190 * Sets a new source string iterator for iteration, and resets the offset to 191 * the beginning of the text. 192 * 193 * @param source 194 * the new source string iterator for iteration. 195 */ 196 public void setText(CharacterIterator source) { 197 this.icuIterator.setText(source); 198 } 199 200 /** 201 * Sets a new source string for iteration, and resets the offset to the 202 * beginning of the text. 203 * 204 * @param source 205 * the new source string for iteration. 206 */ 207 public void setText(String source) { 208 this.icuIterator.setText(source); 209 } 210 211 /** 212 * Obtains the tertiary order of the specified collation element, i.e. the 213 * last 8 bits. This value is unsigned. 214 * 215 * @param order 216 * the element of the collation. 217 * @return the 8 bit tertiary order of the element. 218 */ 219 public static final short tertiaryOrder(int order) { 220 return (short) CollationElementIteratorICU.tertiaryOrder(order); 221 } 222} 223