1/*
2**********************************************************************
3* Copyright (c) 2004-2011, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: March 16 2004
8* Since: ICU 3.0
9**********************************************************************
10*/
11package com.ibm.icu.impl.data;
12
13import java.io.IOException;
14
15import com.ibm.icu.impl.PatternProps;
16import com.ibm.icu.impl.Utility;
17import com.ibm.icu.text.UTF16;
18
19/**
20 * An iterator class that returns successive string tokens from some
21 * source.  String tokens are, in general, separated by Pattern_White_Space
22 * in the source test.  Furthermore, they may be delimited by
23 * either single or double quotes (opening and closing quotes must
24 * match).  Escapes are processed using standard ICU unescaping.
25 */
26public class TokenIterator {
27
28    private ResourceReader reader;
29    private String line;
30    private StringBuffer buf;
31    private boolean done;
32    private int pos;
33    private int lastpos;
34
35    /**
36     * Construct an iterator over the tokens returned by the given
37     * ResourceReader, ignoring blank lines and comment lines (first
38     * non-blank character is '#').  Note that trailing comments on a
39     * line, beginning with the first unquoted '#', are recognized.
40     */
41    public TokenIterator(ResourceReader r) {
42        reader = r;
43        line = null;
44        done = false;
45        buf = new StringBuffer();
46        pos = lastpos = -1;
47    }
48
49    /**
50     * Return the next token from this iterator, or null if the last
51     * token has been returned.
52     */
53    public String next() throws IOException {
54        if (done) {
55            return null;
56        }
57        for (;;) {
58            if (line == null) {
59                line = reader.readLineSkippingComments();
60                if (line == null) {
61                    done = true;
62                    return null;
63                }
64                pos = 0;
65            }
66            buf.setLength(0);
67            lastpos = pos;
68            pos = nextToken(pos);
69            if (pos < 0) {
70                line = null;
71                continue;
72            }
73            return buf.toString();
74        }
75    }
76
77    /**
78     * Return the one-based line number of the line of the last token returned by
79     * next(). Should only be called
80     * after a call to next(); otherwise the return
81     * value is undefined.
82     */
83    public int getLineNumber() {
84        return reader.getLineNumber();
85    }
86
87    /**
88     * Return a string description of the position of the last line
89     * returned by readLine() or readLineSkippingComments().
90     */
91    public String describePosition() {
92        return reader.describePosition() + ':' + (lastpos+1);
93    }
94
95    /**
96     * Read the next token from 'this.line' and append it to
97     * 'this.buf'.  Tokens are separated by Pattern_White_Space.  Tokens
98     * may also be delimited by double or single quotes.  The closing
99     * quote must match the opening quote.  If a '#' is encountered,
100     * the rest of the line is ignored, unless it is backslash-escaped
101     * or within quotes.
102     * @param position the offset into the string
103     * @return offset to the next character to read from line, or if
104     * the end of the line is reached without scanning a valid token,
105     * -1
106     */
107    private int nextToken(int position) {
108        position = PatternProps.skipWhiteSpace(line, position);
109        if (position == line.length()) {
110            return -1;
111        }
112        int startpos = position;
113        char c = line.charAt(position++);
114        char quote = 0;
115        switch (c) {
116        case '"':
117        case '\'':
118            quote = c;
119            break;
120        case '#':
121            return -1;
122        default:
123            buf.append(c);
124            break;
125        }
126        int[] posref = null;
127        while (position < line.length()) {
128            c = line.charAt(position); // 16-bit ok
129            if (c == '\\') {
130                if (posref == null) {
131                    posref = new int[1];
132                }
133                posref[0] = position+1;
134                int c32 = Utility.unescapeAt(line, posref);
135                if (c32 < 0) {
136                    throw new RuntimeException("Invalid escape at " +
137                                               reader.describePosition() + ':' +
138                                               position);
139                }
140                UTF16.append(buf, c32);
141                position = posref[0];
142            } else if ((quote != 0 && c == quote) ||
143                       (quote == 0 && PatternProps.isWhiteSpace(c))) {
144                return ++position;
145            } else if (quote == 0 && c == '#') {
146                return position; // do NOT increment
147            } else {
148                buf.append(c);
149                ++position;
150            }
151        }
152        if (quote != 0) {
153            throw new RuntimeException("Unterminated quote at " +
154                                       reader.describePosition() + ':' +
155                                       startpos);
156        }
157        return position;
158    }
159}
160