CollationElementIterator.java revision cec4dd4b1d33f78997603d0f89c0d0e56e64dbcd
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.text;
19
20/**
21 * Created by a {@code RuleBasedCollator} to iterate through a string. The
22 * result of each iteration is a 32-bit collation element that defines the
23 * ordering priority of the next character or sequence of characters in the
24 * source string.
25 * <p>
26 * For illustration, consider the following in Spanish:
27 * <p>
28 * "ca": the first collation element is collation_element('c') and second
29 * collation element is collation_element('a').
30 * <p>
31 * Since "ch" in Spanish sorts as one entity, the example below returns one
32 * collation element for the two characters 'c' and 'h':
33 * <p>
34 * "cha": the first collation element is collation_element('ch') and the second
35 * one is collation_element('a').
36 * <p>
37 * In German, since the character '&#92;u0086' is a composed character of 'a'
38 * and 'e', the iterator returns two collation elements for the single character
39 * '&#92;u0086':
40 * <p>
41 * "&#92;u0086b": the first collation element is collation_element('a'), the
42 * second one is collation_element('e'), and the third collation element is
43 * collation_element('b').
44 */
45public final class CollationElementIterator {
46
47    /**
48     * This constant is returned by the iterator in the methods
49     * {@code next()} and {@code previous()} when the end or the
50     * beginning of the source string has been reached, and there are no more
51     * valid collation elements to return.
52     */
53    public static final int NULLORDER = -1;
54
55    private com.ibm.icu4jni.text.CollationElementIterator icuIterator;
56
57    CollationElementIterator(com.ibm.icu4jni.text.CollationElementIterator iterator) {
58        this.icuIterator = iterator;
59    }
60
61    /**
62     * Obtains the maximum length of any expansion sequence that ends with the
63     * specified collation element. Returns {@code 1} if there is no expansion
64     * with this collation element as the last element.
65     *
66     * @param order
67     *            a collation element that has been previously obtained from a
68     *            call to either the {@link #next()} or {@link #previous()}
69     *            method.
70     * @return the maximum length of any expansion sequence ending with the
71     *         specified collation element.
72     */
73    public int getMaxExpansion(int order) {
74        return this.icuIterator.getMaxExpansion(order);
75    }
76
77    /**
78     * Obtains the character offset in the source string corresponding to the
79     * next collation element. This value could be any of:
80     * <ul>
81     * <li>The index of the first character in the source string that matches
82     * the value of the next collation element. This means that if
83     * {@code setOffset(offset)} sets the index in the middle of a contraction,
84     * {@code getOffset()} returns the index of the first character in the
85     * contraction, which may not be equal to the original offset that was set.
86     * Hence calling {@code getOffset()} immediately after
87     * {@code setOffset(offset)} does not guarantee that the original offset set
88     * will be returned.</li>
89     * <li>If normalization is on, the index of the immediate subsequent
90     * character, or composite character with the first character, having a
91     * combining class of 0.</li>
92     * <li>The length of the source string, if iteration has reached the end.
93     * </li>
94     * </ul>
95     *
96     * @return The position of the collation element in the source string that
97     *         will be returned by the next invocation of the {@link #next()}
98     *         method.
99     */
100    public int getOffset() {
101        return this.icuIterator.getOffset();
102    }
103
104    /**
105     * Obtains the next collation element in the source string.
106     *
107     * @return the next collation element or {@code NULLORDER} if the end
108     *         of the iteration has been reached.
109     */
110    public int next() {
111        return this.icuIterator.next();
112    }
113
114    /**
115     * Obtains the previous collation element in the source string.
116     *
117     * @return the previous collation element, or {@code NULLORDER} when
118     *         the start of the iteration has been reached.
119     */
120    public int previous() {
121        return this.icuIterator.previous();
122    }
123
124    /**
125     * Obtains the primary order of the specified collation element, i.e. the
126     * first 16 bits. This value is unsigned.
127     *
128     * @param order
129     *            the element of the collation.
130     * @return the element's 16 bit primary order.
131     */
132    public static final int primaryOrder(int order) {
133        return com.ibm.icu4jni.text.CollationElementIterator.primaryOrder(order);
134    }
135
136    /**
137     * Repositions the cursor to point at the first element of the current
138     * string. The next call to {@link #next()} or {@link #previous()} will
139     * return the first and last collation element in the string, respectively.
140     * <p>
141     * If the {@code RuleBasedCollator} used by this iterator has had its
142     * attributes changed, calling {@code reset()} reinitializes the iterator to
143     * use the new attributes.
144     */
145    public void reset() {
146        this.icuIterator.reset();
147    }
148
149    /**
150     * Obtains the secondary order of the specified collation element, i.e. the
151     * 16th to 23th bits, inclusive. This value is unsigned.
152     *
153     * @param order
154     *            the element of the collator.
155     * @return the 8 bit secondary order of the element.
156     */
157    public static final short secondaryOrder(int order) {
158        return (short) com.ibm.icu4jni.text.CollationElementIterator
159                .secondaryOrder(order);
160    }
161
162    /**
163     * Points the iterator at the collation element associated with the
164     * character in the source string which is found at the supplied offset.
165     * After this call completes, an invocation of the {@link #next()} method
166     * will return this collation element.
167     * <p>
168     * If {@code newOffset} corresponds to a character which is part of a
169     * sequence that maps to a single collation element then the iterator is
170     * adjusted to the start of that sequence. As a result of this, any
171     * subsequent call made to {@code getOffset()} may not return the same value
172     * set by this method.
173     * <p>
174     * If the decomposition mode is on, and offset is in the middle of a
175     * decomposable range of source text, the iterator may not return a correct
176     * result for the next forwards or backwards iteration. The user must ensure
177     * that the offset is not in the middle of a decomposable range.
178     *
179     * @param newOffset
180     *            the character offset into the original source string to set.
181     *            Note that this is not an offset into the corresponding
182     *            sequence of collation elements.
183     */
184    public void setOffset(int newOffset) {
185        this.icuIterator.setOffset(newOffset);
186    }
187
188    /**
189     * Sets a new source string iterator for iteration, and resets the offset to
190     * the beginning of the text.
191     *
192     * @param source
193     *            the new source string iterator for iteration.
194     */
195    public void setText(CharacterIterator source) {
196        this.icuIterator.setText(source);
197    }
198
199    /**
200     * Sets a new source string for iteration, and resets the offset to the
201     * beginning of the text.
202     *
203     * @param source
204     *            the new source string for iteration.
205     */
206    public void setText(String source) {
207        this.icuIterator.setText(source);
208    }
209
210    /**
211     * Obtains the tertiary order of the specified collation element, i.e. the
212     * last 8 bits. This value is unsigned.
213     *
214     * @param order
215     *            the element of the collation.
216     * @return the 8 bit tertiary order of the element.
217     */
218    public static final short tertiaryOrder(int order) {
219        return (short) com.ibm.icu4jni.text.CollationElementIterator
220                .tertiaryOrder(order);
221    }
222}
223