1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.text;
19
20import libcore.icu.CollationElementIteratorICU;
21
22/**
23 * Created by a {@code RuleBasedCollator} to iterate through a string. The
24 * result of each iteration is a 32-bit collation element that defines the
25 * ordering priority of the next character or sequence of characters in the
26 * source string.
27 * <p>
28 * For illustration, consider the following in Spanish:
29 * <p>
30 * "ca": the first collation element is collation_element('c') and second
31 * collation element is collation_element('a').
32 * <p>
33 * Since "ch" in Spanish sorts as one entity, the example below returns one
34 * collation element for the two characters 'c' and 'h':
35 * <p>
36 * "cha": the first collation element is collation_element('ch') and the second
37 * one is collation_element('a').
38 * <p>
39 * In German, since the character '&#92;u0086' is a composed character of 'a'
40 * and 'e', the iterator returns two collation elements for the single character
41 * '&#92;u0086':
42 * <p>
43 * "&#92;u0086b": the first collation element is collation_element('a'), the
44 * second one is collation_element('e'), and the third collation element is
45 * collation_element('b').
46 */
47public final class CollationElementIterator {
48
49    /**
50     * This constant is returned by the iterator in the methods
51     * {@code next()} and {@code previous()} when the end or the
52     * beginning of the source string has been reached, and there are no more
53     * valid collation elements to return.
54     */
55    public static final int NULLORDER = -1;
56
57    private CollationElementIteratorICU icuIterator;
58
59    CollationElementIterator(CollationElementIteratorICU iterator) {
60        this.icuIterator = iterator;
61    }
62
63    /**
64     * Obtains the maximum length of any expansion sequence that ends with the
65     * specified collation element. Returns {@code 1} if there is no expansion
66     * with this collation element as the last element.
67     *
68     * @param order
69     *            a collation element that has been previously obtained from a
70     *            call to either the {@link #next()} or {@link #previous()}
71     *            method.
72     * @return the maximum length of any expansion sequence ending with the
73     *         specified collation element.
74     */
75    public int getMaxExpansion(int order) {
76        return this.icuIterator.getMaxExpansion(order);
77    }
78
79    /**
80     * Obtains the character offset in the source string corresponding to the
81     * next collation element. This value could be any of:
82     * <ul>
83     * <li>The index of the first character in the source string that matches
84     * the value of the next collation element. This means that if
85     * {@code setOffset(offset)} sets the index in the middle of a contraction,
86     * {@code getOffset()} returns the index of the first character in the
87     * contraction, which may not be equal to the original offset that was set.
88     * Hence calling {@code getOffset()} immediately after
89     * {@code setOffset(offset)} does not guarantee that the original offset set
90     * will be returned.</li>
91     * <li>If normalization is on, the index of the immediate subsequent
92     * character, or composite character with the first character, having a
93     * combining class of 0.</li>
94     * <li>The length of the source string, if iteration has reached the end.
95     * </li>
96     * </ul>
97     *
98     * @return The position of the collation element in the source string that
99     *         will be returned by the next invocation of the {@link #next()}
100     *         method.
101     */
102    public int getOffset() {
103        return this.icuIterator.getOffset();
104    }
105
106    /**
107     * Obtains the next collation element in the source string.
108     *
109     * @return the next collation element or {@code NULLORDER} if the end
110     *         of the iteration has been reached.
111     */
112    public int next() {
113        return this.icuIterator.next();
114    }
115
116    /**
117     * Obtains the previous collation element in the source string.
118     *
119     * @return the previous collation element, or {@code NULLORDER} when
120     *         the start of the iteration has been reached.
121     */
122    public int previous() {
123        return this.icuIterator.previous();
124    }
125
126    /**
127     * Obtains the primary order of the specified collation element, i.e. the
128     * first 16 bits. This value is unsigned.
129     *
130     * @param order
131     *            the element of the collation.
132     * @return the element's 16 bit primary order.
133     */
134    public static final int primaryOrder(int order) {
135        return CollationElementIteratorICU.primaryOrder(order);
136    }
137
138    /**
139     * Repositions the cursor to point at the first element of the current
140     * string. The next call to {@link #next()} or {@link #previous()} will
141     * return the first and last collation element in the string, respectively.
142     * <p>
143     * If the {@code RuleBasedCollator} used by this iterator has had its
144     * attributes changed, calling {@code reset()} reinitializes the iterator to
145     * use the new attributes.
146     */
147    public void reset() {
148        this.icuIterator.reset();
149    }
150
151    /**
152     * Obtains the secondary order of the specified collation element, i.e. the
153     * 16th to 23th bits, inclusive. This value is unsigned.
154     *
155     * @param order
156     *            the element of the collator.
157     * @return the 8 bit secondary order of the element.
158     */
159    public static final short secondaryOrder(int order) {
160        return (short) CollationElementIteratorICU.secondaryOrder(order);
161    }
162
163    /**
164     * Points the iterator at the collation element associated with the
165     * character in the source string which is found at the supplied offset.
166     * After this call completes, an invocation of the {@link #next()} method
167     * will return this collation element.
168     * <p>
169     * If {@code newOffset} corresponds to a character which is part of a
170     * sequence that maps to a single collation element then the iterator is
171     * adjusted to the start of that sequence. As a result of this, any
172     * subsequent call made to {@code getOffset()} may not return the same value
173     * set by this method.
174     * <p>
175     * If the decomposition mode is on, and offset is in the middle of a
176     * decomposable range of source text, the iterator may not return a correct
177     * result for the next forwards or backwards iteration. The user must ensure
178     * that the offset is not in the middle of a decomposable range.
179     *
180     * @param newOffset
181     *            the character offset into the original source string to set.
182     *            Note that this is not an offset into the corresponding
183     *            sequence of collation elements.
184     */
185    public void setOffset(int newOffset) {
186        this.icuIterator.setOffset(newOffset);
187    }
188
189    /**
190     * Sets a new source string iterator for iteration, and resets the offset to
191     * the beginning of the text.
192     *
193     * @param source
194     *            the new source string iterator for iteration.
195     */
196    public void setText(CharacterIterator source) {
197        this.icuIterator.setText(source);
198    }
199
200    /**
201     * Sets a new source string for iteration, and resets the offset to the
202     * beginning of the text.
203     *
204     * @param source
205     *            the new source string for iteration.
206     */
207    public void setText(String source) {
208        this.icuIterator.setText(source);
209    }
210
211    /**
212     * Obtains the tertiary order of the specified collation element, i.e. the
213     * last 8 bits. This value is unsigned.
214     *
215     * @param order
216     *            the element of the collation.
217     * @return the 8 bit tertiary order of the element.
218     */
219    public static final short tertiaryOrder(int order) {
220        return (short) CollationElementIteratorICU.tertiaryOrder(order);
221    }
222}
223