1/*
2**********************************************************************
3* Copyright (c) 2004-2011, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: March 22 2004
8* Since: ICU 3.0
9**********************************************************************
10*/
11#include "tokiter.h"
12#include "textfile.h"
13#include "patternprops.h"
14#include "util.h"
15#include "uprops.h"
16
17TokenIterator::TokenIterator(TextFile* r) {
18    reader = r;
19    done = haveLine = FALSE;
20    pos = lastpos = -1;
21}
22
23TokenIterator::~TokenIterator() {
24}
25
26UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
27    if (done || U_FAILURE(ec)) {
28        return FALSE;
29    }
30    token.truncate(0);
31    for (;;) {
32        if (!haveLine) {
33            if (!reader->readLineSkippingComments(line, ec)) {
34                done = TRUE;
35                return FALSE;
36            }
37            haveLine = TRUE;
38            pos = 0;
39        }
40        lastpos = pos;
41        if (!nextToken(token, ec)) {
42            haveLine = FALSE;
43            if (U_FAILURE(ec)) return FALSE;
44            continue;
45        }
46        return TRUE;
47    }
48}
49
50int32_t TokenIterator::getLineNumber() const {
51    return reader->getLineNumber();
52}
53
54/**
55 * Read the next token from 'this->line' and append it to 'token'.
56 * Tokens are separated by Pattern_White_Space.  Tokens may also be
57 * delimited by double or single quotes.  The closing quote must match
58 * the opening quote.  If a '#' is encountered, the rest of the line
59 * is ignored, unless it is backslash-escaped or within quotes.
60 * @param token the token is appended to this StringBuffer
61 * @param ec input-output error code
62 * @return TRUE if a valid token is found, or FALSE if the end
63 * of the line is reached or an error occurs
64 */
65UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
66    ICU_Utility::skipWhitespace(line, pos, TRUE);
67    if (pos == line.length()) {
68        return FALSE;
69    }
70    UChar c = line.charAt(pos++);
71    UChar quote = 0;
72    switch (c) {
73    case 34/*'"'*/:
74    case 39/*'\\'*/:
75        quote = c;
76        break;
77    case 35/*'#'*/:
78        return FALSE;
79    default:
80        token.append(c);
81        break;
82    }
83    while (pos < line.length()) {
84        c = line.charAt(pos); // 16-bit ok
85        if (c == 92/*'\\'*/) {
86            UChar32 c32 = line.unescapeAt(pos);
87            if (c32 < 0) {
88                ec = U_MALFORMED_UNICODE_ESCAPE;
89                return FALSE;
90            }
91            token.append(c32);
92        } else if ((quote != 0 && c == quote) ||
93                   (quote == 0 && PatternProps::isWhiteSpace(c))) {
94            ++pos;
95            return TRUE;
96        } else if (quote == 0 && c == '#') {
97            return TRUE; // do NOT increment
98        } else {
99            token.append(c);
100            ++pos;
101        }
102    }
103    if (quote != 0) {
104        ec = U_UNTERMINATED_QUOTE;
105        return FALSE;
106    }
107    return TRUE;
108}
109