CollationElementIterator.java revision cec4dd4b1d33f78997603d0f89c0d0e56e64dbcd
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.text; 19 20/** 21 * Created by a {@code RuleBasedCollator} to iterate through a string. The 22 * result of each iteration is a 32-bit collation element that defines the 23 * ordering priority of the next character or sequence of characters in the 24 * source string. 25 * <p> 26 * For illustration, consider the following in Spanish: 27 * <p> 28 * "ca": the first collation element is collation_element('c') and second 29 * collation element is collation_element('a'). 30 * <p> 31 * Since "ch" in Spanish sorts as one entity, the example below returns one 32 * collation element for the two characters 'c' and 'h': 33 * <p> 34 * "cha": the first collation element is collation_element('ch') and the second 35 * one is collation_element('a'). 36 * <p> 37 * In German, since the character '\u0086' is a composed character of 'a' 38 * and 'e', the iterator returns two collation elements for the single character 39 * '\u0086': 40 * <p> 41 * "\u0086b": the first collation element is collation_element('a'), the 42 * second one is collation_element('e'), and the third collation element is 43 * collation_element('b'). 44 */ 45public final class CollationElementIterator { 46 47 /** 48 * This constant is returned by the iterator in the methods 49 * {@code next()} and {@code previous()} when the end or the 50 * beginning of the source string has been reached, and there are no more 51 * valid collation elements to return. 52 */ 53 public static final int NULLORDER = -1; 54 55 private com.ibm.icu4jni.text.CollationElementIterator icuIterator; 56 57 CollationElementIterator(com.ibm.icu4jni.text.CollationElementIterator iterator) { 58 this.icuIterator = iterator; 59 } 60 61 /** 62 * Obtains the maximum length of any expansion sequence that ends with the 63 * specified collation element. Returns {@code 1} if there is no expansion 64 * with this collation element as the last element. 65 * 66 * @param order 67 * a collation element that has been previously obtained from a 68 * call to either the {@link #next()} or {@link #previous()} 69 * method. 70 * @return the maximum length of any expansion sequence ending with the 71 * specified collation element. 72 */ 73 public int getMaxExpansion(int order) { 74 return this.icuIterator.getMaxExpansion(order); 75 } 76 77 /** 78 * Obtains the character offset in the source string corresponding to the 79 * next collation element. This value could be any of: 80 * <ul> 81 * <li>The index of the first character in the source string that matches 82 * the value of the next collation element. This means that if 83 * {@code setOffset(offset)} sets the index in the middle of a contraction, 84 * {@code getOffset()} returns the index of the first character in the 85 * contraction, which may not be equal to the original offset that was set. 86 * Hence calling {@code getOffset()} immediately after 87 * {@code setOffset(offset)} does not guarantee that the original offset set 88 * will be returned.</li> 89 * <li>If normalization is on, the index of the immediate subsequent 90 * character, or composite character with the first character, having a 91 * combining class of 0.</li> 92 * <li>The length of the source string, if iteration has reached the end. 93 * </li> 94 * </ul> 95 * 96 * @return The position of the collation element in the source string that 97 * will be returned by the next invocation of the {@link #next()} 98 * method. 99 */ 100 public int getOffset() { 101 return this.icuIterator.getOffset(); 102 } 103 104 /** 105 * Obtains the next collation element in the source string. 106 * 107 * @return the next collation element or {@code NULLORDER} if the end 108 * of the iteration has been reached. 109 */ 110 public int next() { 111 return this.icuIterator.next(); 112 } 113 114 /** 115 * Obtains the previous collation element in the source string. 116 * 117 * @return the previous collation element, or {@code NULLORDER} when 118 * the start of the iteration has been reached. 119 */ 120 public int previous() { 121 return this.icuIterator.previous(); 122 } 123 124 /** 125 * Obtains the primary order of the specified collation element, i.e. the 126 * first 16 bits. This value is unsigned. 127 * 128 * @param order 129 * the element of the collation. 130 * @return the element's 16 bit primary order. 131 */ 132 public static final int primaryOrder(int order) { 133 return com.ibm.icu4jni.text.CollationElementIterator.primaryOrder(order); 134 } 135 136 /** 137 * Repositions the cursor to point at the first element of the current 138 * string. The next call to {@link #next()} or {@link #previous()} will 139 * return the first and last collation element in the string, respectively. 140 * <p> 141 * If the {@code RuleBasedCollator} used by this iterator has had its 142 * attributes changed, calling {@code reset()} reinitializes the iterator to 143 * use the new attributes. 144 */ 145 public void reset() { 146 this.icuIterator.reset(); 147 } 148 149 /** 150 * Obtains the secondary order of the specified collation element, i.e. the 151 * 16th to 23th bits, inclusive. This value is unsigned. 152 * 153 * @param order 154 * the element of the collator. 155 * @return the 8 bit secondary order of the element. 156 */ 157 public static final short secondaryOrder(int order) { 158 return (short) com.ibm.icu4jni.text.CollationElementIterator 159 .secondaryOrder(order); 160 } 161 162 /** 163 * Points the iterator at the collation element associated with the 164 * character in the source string which is found at the supplied offset. 165 * After this call completes, an invocation of the {@link #next()} method 166 * will return this collation element. 167 * <p> 168 * If {@code newOffset} corresponds to a character which is part of a 169 * sequence that maps to a single collation element then the iterator is 170 * adjusted to the start of that sequence. As a result of this, any 171 * subsequent call made to {@code getOffset()} may not return the same value 172 * set by this method. 173 * <p> 174 * If the decomposition mode is on, and offset is in the middle of a 175 * decomposable range of source text, the iterator may not return a correct 176 * result for the next forwards or backwards iteration. The user must ensure 177 * that the offset is not in the middle of a decomposable range. 178 * 179 * @param newOffset 180 * the character offset into the original source string to set. 181 * Note that this is not an offset into the corresponding 182 * sequence of collation elements. 183 */ 184 public void setOffset(int newOffset) { 185 this.icuIterator.setOffset(newOffset); 186 } 187 188 /** 189 * Sets a new source string iterator for iteration, and resets the offset to 190 * the beginning of the text. 191 * 192 * @param source 193 * the new source string iterator for iteration. 194 */ 195 public void setText(CharacterIterator source) { 196 this.icuIterator.setText(source); 197 } 198 199 /** 200 * Sets a new source string for iteration, and resets the offset to the 201 * beginning of the text. 202 * 203 * @param source 204 * the new source string for iteration. 205 */ 206 public void setText(String source) { 207 this.icuIterator.setText(source); 208 } 209 210 /** 211 * Obtains the tertiary order of the specified collation element, i.e. the 212 * last 8 bits. This value is unsigned. 213 * 214 * @param order 215 * the element of the collation. 216 * @return the 8 bit tertiary order of the element. 217 */ 218 public static final short tertiaryOrder(int order) { 219 return (short) com.ibm.icu4jni.text.CollationElementIterator 220 .tertiaryOrder(order); 221 } 222} 223