1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5**********************************************************************
6* Copyright (c) 2003-2011, International Business Machines
7* Corporation and others.  All Rights Reserved.
8**********************************************************************
9* Author: Alan Liu
10* Created: September 23 2003
11* Since: ICU 2.8
12**********************************************************************
13*/
14package android.icu.impl;
15
16import java.text.ParsePosition;
17
18import android.icu.text.SymbolTable;
19import android.icu.text.UTF16;
20
21/**
22 * An iterator that returns 32-bit code points.  This class is deliberately
23 * <em>not</em> related to any of the JDK or ICU4J character iterator classes
24 * in order to minimize complexity.
25 * @author Alan Liu
26 * @hide Only a subset of ICU is exposed in Android
27 */
28public class RuleCharacterIterator {
29
30    // TODO: Ideas for later.  (Do not implement if not needed, lest the
31    // code coverage numbers go down due to unused methods.)
32    // 1. Add a copy constructor, equals() method, clone() method.
33    // 2. Rather than return DONE, throw an exception if the end
34    // is reached -- this is an alternate usage model, probably not useful.
35    // 3. Return isEscaped from next().  If this happens,
36    // don't keep an isEscaped member variable.
37
38    /**
39     * Text being iterated.
40     */
41    private String text;
42
43    /**
44     * Position of iterator.
45     */
46    private ParsePosition pos;
47
48    /**
49     * Symbol table used to parse and dereference variables.  May be null.
50     */
51    private SymbolTable sym;
52
53    /**
54     * Current variable expansion, or null if none.
55     */
56    private char[] buf;
57
58    /**
59     * Position within buf[].  Meaningless if buf == null.
60     */
61    private int bufPos;
62
63    /**
64     * Flag indicating whether the last character was parsed from an escape.
65     */
66    private boolean isEscaped;
67
68    /**
69     * Value returned when there are no more characters to iterate.
70     */
71    public static final int DONE = -1;
72
73    /**
74     * Bitmask option to enable parsing of variable names.  If (options &
75     * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
76     * its value.  Variables are parsed using the SymbolTable API.
77     */
78    public static final int PARSE_VARIABLES = 1;
79
80    /**
81     * Bitmask option to enable parsing of escape sequences.  If (options &
82     * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
83     * to its value.  Escapes are parsed using Utility.unescapeAt().
84     */
85    public static final int PARSE_ESCAPES   = 2;
86
87    /**
88     * Bitmask option to enable skipping of whitespace.  If (options &
89     * SKIP_WHITESPACE) != 0, then Unicode Pattern_White_Space characters will be silently
90     * skipped, as if they were not present in the input.
91     */
92    public static final int SKIP_WHITESPACE = 4;
93
94    /**
95     * Constructs an iterator over the given text, starting at the given
96     * position.
97     * @param text the text to be iterated
98     * @param sym the symbol table, or null if there is none.  If sym is null,
99     * then variables will not be deferenced, even if the PARSE_VARIABLES
100     * option is set.
101     * @param pos upon input, the index of the next character to return.  If a
102     * variable has been dereferenced, then pos will <em>not</em> increment as
103     * characters of the variable value are iterated.
104     */
105    public RuleCharacterIterator(String text, SymbolTable sym,
106                                 ParsePosition pos) {
107        if (text == null || pos.getIndex() > text.length()) {
108            throw new IllegalArgumentException();
109        }
110        this.text = text;
111        this.sym = sym;
112        this.pos = pos;
113        buf = null;
114    }
115
116    /**
117     * Returns true if this iterator has no more characters to return.
118     */
119    public boolean atEnd() {
120        return buf == null && pos.getIndex() == text.length();
121    }
122
123    /**
124     * Returns the next character using the given options, or DONE if there
125     * are no more characters, and advance the position to the next
126     * character.
127     * @param options one or more of the following options, bitwise-OR-ed
128     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
129     * @return the current 32-bit code point, or DONE
130     */
131    public int next(int options) {
132        int c = DONE;
133        isEscaped = false;
134
135        for (;;) {
136            c = _current();
137            _advance(UTF16.getCharCount(c));
138
139            if (c == SymbolTable.SYMBOL_REF && buf == null &&
140                (options & PARSE_VARIABLES) != 0 && sym != null) {
141                String name = sym.parseReference(text, pos, text.length());
142                // If name == null there was an isolated SYMBOL_REF;
143                // return it.  Caller must be prepared for this.
144                if (name == null) {
145                    break;
146                }
147                bufPos = 0;
148                buf = sym.lookup(name);
149                if (buf == null) {
150                    throw new IllegalArgumentException(
151                                "Undefined variable: " + name);
152                }
153                // Handle empty variable value
154                if (buf.length == 0) {
155                    buf = null;
156                }
157                continue;
158            }
159
160            if ((options & SKIP_WHITESPACE) != 0 &&
161                PatternProps.isWhiteSpace(c)) {
162                continue;
163            }
164
165            if (c == '\\' && (options & PARSE_ESCAPES) != 0) {
166                int offset[] = new int[] { 0 };
167                c = Utility.unescapeAt(lookahead(), offset);
168                jumpahead(offset[0]);
169                isEscaped = true;
170                if (c < 0) {
171                    throw new IllegalArgumentException("Invalid escape");
172                }
173            }
174
175            break;
176        }
177
178        return c;
179    }
180
181    /**
182     * Returns true if the last character returned by next() was
183     * escaped.  This will only be the case if the option passed in to
184     * next() included PARSE_ESCAPED and the next character was an
185     * escape sequence.
186     */
187    public boolean isEscaped() {
188        return isEscaped;
189    }
190
191    /**
192     * Returns true if this iterator is currently within a variable expansion.
193     */
194    public boolean inVariable() {
195        return buf != null;
196    }
197
198    /**
199     * Returns an object which, when later passed to setPos(), will
200     * restore this iterator's position.  Usage idiom:
201     *
202     * RuleCharacterIterator iterator = ...;
203     * Object pos = iterator.getPos(null); // allocate position object
204     * for (;;) {
205     *   pos = iterator.getPos(pos); // reuse position object
206     *   int c = iterator.next(...);
207     *   ...
208     * }
209     * iterator.setPos(pos);
210     *
211     * @param p a position object previously returned by getPos(),
212     * or null.  If not null, it will be updated and returned.  If
213     * null, a new position object will be allocated and returned.
214     * @return a position object which may be passed to setPos(),
215     * either `p,' or if `p' == null, a newly-allocated object
216     */
217    public Object getPos(Object p) {
218        if (p == null) {
219            return new Object[] {buf, new int[] {pos.getIndex(), bufPos}};
220        }
221        Object[] a = (Object[]) p;
222        a[0] = buf;
223        int[] v = (int[]) a[1];
224        v[0] = pos.getIndex();
225        v[1] = bufPos;
226        return p;
227    }
228
229    /**
230     * Restores this iterator to the position it had when getPos()
231     * returned the given object.
232     * @param p a position object previously returned by getPos()
233     */
234    public void setPos(Object p) {
235        Object[] a = (Object[]) p;
236        buf = (char[]) a[0];
237        int[] v = (int[]) a[1];
238        pos.setIndex(v[0]);
239        bufPos = v[1];
240    }
241
242    /**
243     * Skips ahead past any ignored characters, as indicated by the given
244     * options.  This is useful in conjunction with the lookahead() method.
245     *
246     * Currently, this only has an effect for SKIP_WHITESPACE.
247     * @param options one or more of the following options, bitwise-OR-ed
248     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
249     */
250    public void skipIgnored(int options) {
251        if ((options & SKIP_WHITESPACE) != 0) {
252            for (;;) {
253                int a = _current();
254                if (!PatternProps.isWhiteSpace(a)) break;
255                _advance(UTF16.getCharCount(a));
256            }
257        }
258    }
259
260    /**
261     * Returns a string containing the remainder of the characters to be
262     * returned by this iterator, without any option processing.  If the
263     * iterator is currently within a variable expansion, this will only
264     * extend to the end of the variable expansion.  This method is provided
265     * so that iterators may interoperate with string-based APIs.  The typical
266     * sequence of calls is to call skipIgnored(), then call lookahead(), then
267     * parse the string returned by lookahead(), then call jumpahead() to
268     * resynchronize the iterator.
269     * @return a string containing the characters to be returned by future
270     * calls to next()
271     */
272    public String lookahead() {
273        if (buf != null) {
274            return new String(buf, bufPos, buf.length - bufPos);
275        } else {
276            return text.substring(pos.getIndex());
277        }
278    }
279
280    /**
281     * Advances the position by the given number of 16-bit code units.
282     * This is useful in conjunction with the lookahead() method.
283     * @param count the number of 16-bit code units to jump over
284     */
285    public void jumpahead(int count) {
286        if (count < 0) {
287            throw new IllegalArgumentException();
288        }
289        if (buf != null) {
290            bufPos += count;
291            if (bufPos > buf.length) {
292                throw new IllegalArgumentException();
293            }
294            if (bufPos == buf.length) {
295                buf = null;
296            }
297        } else {
298            int i = pos.getIndex() + count;
299            pos.setIndex(i);
300            if (i > text.length()) {
301                throw new IllegalArgumentException();
302            }
303        }
304    }
305
306    /**
307     * Returns a string representation of this object, consisting of the
308     * characters being iterated, with a '|' marking the current position.
309     * Position within an expanded variable is <em>not</em> indicated.
310     * @return a string representation of this object
311     */
312    @Override
313    public String toString() {
314        int b = pos.getIndex();
315        return text.substring(0, b) + '|' + text.substring(b);
316    }
317
318    /**
319     * Returns the current 32-bit code point without parsing escapes, parsing
320     * variables, or skipping whitespace.
321     * @return the current 32-bit code point
322     */
323    private int _current() {
324        if (buf != null) {
325            return UTF16.charAt(buf, 0, buf.length, bufPos);
326        } else {
327            int i = pos.getIndex();
328            return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
329        }
330    }
331
332    /**
333     * Advances the position by the given amount.
334     * @param count the number of 16-bit code units to advance past
335     */
336    private void _advance(int count) {
337        if (buf != null) {
338            bufPos += count;
339            if (bufPos == buf.length) {
340                buf = null;
341            }
342        } else {
343            pos.setIndex(pos.getIndex() + count);
344            if (pos.getIndex() > text.length()) {
345                pos.setIndex(text.length());
346            }
347        }
348    }
349}