1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4**********************************************************************
5* Copyright (c) 2004-2015, International Business Machines
6* Corporation and others.  All Rights Reserved.
7**********************************************************************
8* Author: Alan Liu
9* Created: March 16 2004
10* Since: ICU 3.0
11**********************************************************************
12*/
13package com.ibm.icu.impl.data;
14
15import java.io.IOException;
16
17import com.ibm.icu.impl.PatternProps;
18import com.ibm.icu.impl.Utility;
19import com.ibm.icu.text.UTF16;
20
21/**
22 * An iterator class that returns successive string tokens from some
23 * source.  String tokens are, in general, separated by Pattern_White_Space
24 * in the source test.  Furthermore, they may be delimited by
25 * either single or double quotes (opening and closing quotes must
26 * match).  Escapes are processed using standard ICU unescaping.
27 *
28 * <p>2015-sep-03 TODO: Only used in com.ibm.icu.dev.test.format, move there.
29 */
30public class TokenIterator {
31
32    private ResourceReader reader;
33    private String line;
34    private StringBuffer buf;
35    private boolean done;
36    private int pos;
37    private int lastpos;
38
39    /**
40     * Construct an iterator over the tokens returned by the given
41     * ResourceReader, ignoring blank lines and comment lines (first
42     * non-blank character is '#').  Note that trailing comments on a
43     * line, beginning with the first unquoted '#', are recognized.
44     */
45    public TokenIterator(ResourceReader r) {
46        reader = r;
47        line = null;
48        done = false;
49        buf = new StringBuffer();
50        pos = lastpos = -1;
51    }
52
53    /**
54     * Return the next token from this iterator, or null if the last
55     * token has been returned.
56     */
57    public String next() throws IOException {
58        if (done) {
59            return null;
60        }
61        for (;;) {
62            if (line == null) {
63                line = reader.readLineSkippingComments();
64                if (line == null) {
65                    done = true;
66                    return null;
67                }
68                pos = 0;
69            }
70            buf.setLength(0);
71            lastpos = pos;
72            pos = nextToken(pos);
73            if (pos < 0) {
74                line = null;
75                continue;
76            }
77            return buf.toString();
78        }
79    }
80
81    /**
82     * Return the one-based line number of the line of the last token returned by
83     * next(). Should only be called
84     * after a call to next(); otherwise the return
85     * value is undefined.
86     */
87    public int getLineNumber() {
88        return reader.getLineNumber();
89    }
90
91    /**
92     * Return a string description of the position of the last line
93     * returned by readLine() or readLineSkippingComments().
94     */
95    public String describePosition() {
96        return reader.describePosition() + ':' + (lastpos+1);
97    }
98
99    /**
100     * Read the next token from 'this.line' and append it to
101     * 'this.buf'.  Tokens are separated by Pattern_White_Space.  Tokens
102     * may also be delimited by double or single quotes.  The closing
103     * quote must match the opening quote.  If a '#' is encountered,
104     * the rest of the line is ignored, unless it is backslash-escaped
105     * or within quotes.
106     * @param position the offset into the string
107     * @return offset to the next character to read from line, or if
108     * the end of the line is reached without scanning a valid token,
109     * -1
110     */
111    private int nextToken(int position) {
112        position = PatternProps.skipWhiteSpace(line, position);
113        if (position == line.length()) {
114            return -1;
115        }
116        int startpos = position;
117        char c = line.charAt(position++);
118        char quote = 0;
119        switch (c) {
120        case '"':
121        case '\'':
122            quote = c;
123            break;
124        case '#':
125            return -1;
126        default:
127            buf.append(c);
128            break;
129        }
130        int[] posref = null;
131        while (position < line.length()) {
132            c = line.charAt(position); // 16-bit ok
133            if (c == '\\') {
134                if (posref == null) {
135                    posref = new int[1];
136                }
137                posref[0] = position+1;
138                int c32 = Utility.unescapeAt(line, posref);
139                if (c32 < 0) {
140                    throw new RuntimeException("Invalid escape at " +
141                                               reader.describePosition() + ':' +
142                                               position);
143                }
144                UTF16.append(buf, c32);
145                position = posref[0];
146            } else if ((quote != 0 && c == quote) ||
147                       (quote == 0 && PatternProps.isWhiteSpace(c))) {
148                return ++position;
149            } else if (quote == 0 && c == '#') {
150                return position; // do NOT increment
151            } else {
152                buf.append(c);
153                ++position;
154            }
155        }
156        if (quote != 0) {
157            throw new RuntimeException("Unterminated quote at " +
158                                       reader.describePosition() + ':' +
159                                       startpos);
160        }
161        return position;
162    }
163}
164