1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.text;
19
20// BEGIN android-note
21// The icu implementation used was changed from icu4j to icu4jni.
22// END android-note
23
24/**
25 * Created by a {@code RuleBasedCollator} to iterate through a string. The
26 * result of each iteration is a 32-bit collation element that defines the
27 * ordering priority of the next character or sequence of characters in the
28 * source string.
29 * <p>
30 * For illustration, consider the following in Spanish:
31 * <p>
32 * "ca": the first collation element is collation_element('c') and second
33 * collation element is collation_element('a').
34 * <p>
35 * Since "ch" in Spanish sorts as one entity, the example below returns one
36 * collation element for the two characters 'c' and 'h':
37 * <p>
38 * "cha": the first collation element is collation_element('ch') and the second
39 * one is collation_element('a').
40 * <p>
41 * In German, since the character '&#92;u0086' is a composed character of 'a'
42 * and 'e', the iterator returns two collation elements for the single character
43 * '&#92;u0086':
44 * <p>
45 * "&#92;u0086b": the first collation element is collation_element('a'), the
46 * second one is collation_element('e'), and the third collation element is
47 * collation_element('b').
48 */
49public final class CollationElementIterator {
50
51    /**
52     * This constant is returned by the iterator in the methods
53     * {@code next()} and {@code previous()} when the end or the
54     * beginning of the source string has been reached, and there are no more
55     * valid collation elements to return.
56     */
57    public static final int NULLORDER = -1;
58
59    private com.ibm.icu4jni.text.CollationElementIterator icuIterator;
60
61    CollationElementIterator(com.ibm.icu4jni.text.CollationElementIterator iterator) {
62        this.icuIterator = iterator;
63    }
64
65    /**
66     * Obtains the maximum length of any expansion sequence that ends with the
67     * specified collation element. Returns {@code 1} if there is no expansion
68     * with this collation element as the last element.
69     *
70     * @param order
71     *            a collation element that has been previously obtained from a
72     *            call to either the {@link #next()} or {@link #previous()}
73     *            method.
74     * @return the maximum length of any expansion sequence ending with the
75     *         specified collation element.
76     */
77    public int getMaxExpansion(int order) {
78        return this.icuIterator.getMaxExpansion(order);
79    }
80
81    /**
82     * Obtains the character offset in the source string corresponding to the
83     * next collation element. This value could be any of:
84     * <ul>
85     * <li>The index of the first character in the source string that matches
86     * the value of the next collation element. This means that if
87     * {@code setOffset(offset)} sets the index in the middle of a contraction,
88     * {@code getOffset()} returns the index of the first character in the
89     * contraction, which may not be equal to the original offset that was set.
90     * Hence calling {@code getOffset()} immediately after
91     * {@code setOffset(offset)} does not guarantee that the original offset set
92     * will be returned.</li>
93     * <li>If normalization is on, the index of the immediate subsequent
94     * character, or composite character with the first character, having a
95     * combining class of 0.</li>
96     * <li>The length of the source string, if iteration has reached the end.
97     * </li>
98     * </ul>
99     *
100     * @return The position of the collation element in the source string that
101     *         will be returned by the next invocation of the {@link #next()}
102     *         method.
103     */
104    public int getOffset() {
105        return this.icuIterator.getOffset();
106    }
107
108    /**
109     * Obtains the next collation element in the source string.
110     *
111     * @return the next collation element or {@code NULLORDER} if the end
112     *         of the iteration has been reached.
113     */
114    public int next() {
115        return this.icuIterator.next();
116    }
117
118    /**
119     * Obtains the previous collation element in the source string.
120     *
121     * @return the previous collation element, or {@code NULLORDER} when
122     *         the start of the iteration has been reached.
123     */
124    public int previous() {
125        return this.icuIterator.previous();
126    }
127
128    /**
129     * Obtains the primary order of the specified collation element, i.e. the
130     * first 16 bits. This value is unsigned.
131     *
132     * @param order
133     *            the element of the collation.
134     * @return the element's 16 bit primary order.
135     */
136    public static final int primaryOrder(int order) {
137        return com.ibm.icu4jni.text.CollationElementIterator.primaryOrder(order);
138    }
139
140    /**
141     * Repositions the cursor to point at the first element of the current
142     * string. The next call to {@link #next()} or {@link #previous()} will
143     * return the first and last collation element in the string, respectively.
144     * <p>
145     * If the {@code RuleBasedCollator} used by this iterator has had its
146     * attributes changed, calling {@code reset()} reinitializes the iterator to
147     * use the new attributes.
148     */
149    public void reset() {
150        this.icuIterator.reset();
151    }
152
153    /**
154     * Obtains the secondary order of the specified collation element, i.e. the
155     * 16th to 23th bits, inclusive. This value is unsigned.
156     *
157     * @param order
158     *            the element of the collator.
159     * @return the 8 bit secondary order of the element.
160     */
161    public static final short secondaryOrder(int order) {
162        return (short) com.ibm.icu4jni.text.CollationElementIterator
163                .secondaryOrder(order);
164    }
165
166    /**
167     * Points the iterator at the collation element associated with the
168     * character in the source string which is found at the supplied offset.
169     * After this call completes, an invocation of the {@link #next()} method
170     * will return this collation element.
171     * <p>
172     * If {@code newOffset} corresponds to a character which is part of a
173     * sequence that maps to a single collation element then the iterator is
174     * adjusted to the start of that sequence. As a result of this, any
175     * subsequent call made to {@code getOffset()} may not return the same value
176     * set by this method.
177     * <p>
178     * If the decomposition mode is on, and offset is in the middle of a
179     * decomposable range of source text, the iterator may not return a correct
180     * result for the next forwards or backwards iteration. The user must ensure
181     * that the offset is not in the middle of a decomposable range.
182     *
183     * @param newOffset
184     *            the character offset into the original source string to set.
185     *            Note that this is not an offset into the corresponding
186     *            sequence of collation elements.
187     */
188    public void setOffset(int newOffset) {
189        this.icuIterator.setOffset(newOffset);
190    }
191
192    /**
193     * Sets a new source string iterator for iteration, and resets the offset to
194     * the beginning of the text.
195     *
196     * @param source
197     *            the new source string iterator for iteration.
198     */
199    public void setText(CharacterIterator source) {
200        this.icuIterator.setText(source);
201    }
202
203    /**
204     * Sets a new source string for iteration, and resets the offset to the
205     * beginning of the text.
206     *
207     * @param source
208     *            the new source string for iteration.
209     */
210    public void setText(String source) {
211        this.icuIterator.setText(source);
212    }
213
214    /**
215     * Obtains the tertiary order of the specified collation element, i.e. the
216     * last 8 bits. This value is unsigned.
217     *
218     * @param order
219     *            the element of the collation.
220     * @return the 8 bit tertiary order of the element.
221     */
222    public static final short tertiaryOrder(int order) {
223        return (short) com.ibm.icu4jni.text.CollationElementIterator
224                .tertiaryOrder(order);
225    }
226}
227