1/*
2**********************************************************************
3* Copyright (c) 2003-2011, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: September 24 2003
8* Since: ICU 2.8
9**********************************************************************
10*/
11#include "ruleiter.h"
12#include "unicode/parsepos.h"
13#include "unicode/symtable.h"
14#include "unicode/unistr.h"
15#include "unicode/utf16.h"
16#include "patternprops.h"
17
18/* \U87654321 or \ud800\udc00 */
19#define MAX_U_NOTATION_LEN 12
20
21U_NAMESPACE_BEGIN
22
23RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
24                      ParsePosition& thePos) :
25    text(theText),
26    pos(thePos),
27    sym(theSym),
28    buf(0),
29    bufPos(0)
30{}
31
32UBool RuleCharacterIterator::atEnd() const {
33    return buf == 0 && pos.getIndex() == text.length();
34}
35
36UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
37    if (U_FAILURE(ec)) return DONE;
38
39    UChar32 c = DONE;
40    isEscaped = FALSE;
41
42    for (;;) {
43        c = _current();
44        _advance(U16_LENGTH(c));
45
46        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
47            (options & PARSE_VARIABLES) != 0 && sym != 0) {
48            UnicodeString name = sym->parseReference(text, pos, text.length());
49            // If name is empty there was an isolated SYMBOL_REF;
50            // return it.  Caller must be prepared for this.
51            if (name.length() == 0) {
52                break;
53            }
54            bufPos = 0;
55            buf = sym->lookup(name);
56            if (buf == 0) {
57                ec = U_UNDEFINED_VARIABLE;
58                return DONE;
59            }
60            // Handle empty variable value
61            if (buf->length() == 0) {
62                buf = 0;
63            }
64            continue;
65        }
66
67        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
68            continue;
69        }
70
71        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
72            UnicodeString tempEscape;
73            int32_t offset = 0;
74            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
75            jumpahead(offset);
76            isEscaped = TRUE;
77            if (c < 0) {
78                ec = U_MALFORMED_UNICODE_ESCAPE;
79                return DONE;
80            }
81        }
82
83        break;
84    }
85
86    return c;
87}
88
89void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
90    p.buf = buf;
91    p.pos = pos.getIndex();
92    p.bufPos = bufPos;
93}
94
95void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
96    buf = p.buf;
97    pos.setIndex(p.pos);
98    bufPos = p.bufPos;
99}
100
101void RuleCharacterIterator::skipIgnored(int32_t options) {
102    if ((options & SKIP_WHITESPACE) != 0) {
103        for (;;) {
104            UChar32 a = _current();
105            if (!PatternProps::isWhiteSpace(a)) break;
106            _advance(U16_LENGTH(a));
107        }
108    }
109}
110
111UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
112    if (maxLookAhead < 0) {
113        maxLookAhead = 0x7FFFFFFF;
114    }
115    if (buf != 0) {
116        buf->extract(bufPos, maxLookAhead, result);
117    } else {
118        text.extract(pos.getIndex(), maxLookAhead, result);
119    }
120    return result;
121}
122
123void RuleCharacterIterator::jumpahead(int32_t count) {
124    _advance(count);
125}
126
127/*
128UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
129    int32_t b = pos.getIndex();
130    text.extract(0, b, result);
131    return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
132}
133*/
134
135UChar32 RuleCharacterIterator::_current() const {
136    if (buf != 0) {
137        return buf->char32At(bufPos);
138    } else {
139        int i = pos.getIndex();
140        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
141    }
142}
143
144void RuleCharacterIterator::_advance(int32_t count) {
145    if (buf != 0) {
146        bufPos += count;
147        if (bufPos == buf->length()) {
148            buf = 0;
149        }
150    } else {
151        pos.setIndex(pos.getIndex() + count);
152        if (pos.getIndex() > text.length()) {
153            pos.setIndex(text.length());
154        }
155    }
156}
157
158U_NAMESPACE_END
159
160//eof
161