1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (c) 2003-2011, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Author: Alan Liu
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created: September 24 2003
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Since: ICU 2.8
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ruleiter.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parsepos.h"
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/symtable.h"
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* \U87654321 or \ud800\udc00 */
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_U_NOTATION_LEN 12
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      ParsePosition& thePos) :
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    text(theText),
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos(thePos),
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sym(theSym),
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buf(0),
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    bufPos(0)
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{}
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool RuleCharacterIterator::atEnd() const {
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return buf == 0 && pos.getIndex() == text.length();
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(ec)) return DONE;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c = DONE;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isEscaped = FALSE;
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (;;) {
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c = _current();
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        _advance(UTF_CHAR_LENGTH(c));
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (options & PARSE_VARIABLES) != 0 && sym != 0) {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString name = sym->parseReference(text, pos, text.length());
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // If name is empty there was an isolated SYMBOL_REF;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // return it.  Caller must be prepared for this.
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (name.length() == 0) {
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            bufPos = 0;
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf = sym->lookup(name);
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (buf == 0) {
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ec = U_UNDEFINED_VARIABLE;
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return DONE;
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Handle empty variable value
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (buf->length() == 0) {
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf = 0;
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString tempEscape;
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t offset = 0;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            jumpahead(offset);
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            isEscaped = TRUE;
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (c < 0) {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ec = U_MALFORMED_UNICODE_ESCAPE;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return DONE;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    p.buf = buf;
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    p.pos = pos.getIndex();
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    p.bufPos = bufPos;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buf = p.buf;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos.setIndex(p.pos);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    bufPos = p.bufPos;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::skipIgnored(int32_t options) {
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if ((options & SKIP_WHITESPACE) != 0) {
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (;;) {
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar32 a = _current();
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (!PatternProps::isWhiteSpace(a)) break;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            _advance(UTF_CHAR_LENGTH(a));
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (maxLookAhead < 0) {
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        maxLookAhead = 0x7FFFFFFF;
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buf != 0) {
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->extract(bufPos, maxLookAhead, result);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        text.extract(pos.getIndex(), maxLookAhead, result);
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::jumpahead(int32_t count) {
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _advance(count);
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t b = pos.getIndex();
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    text.extract(0, b, result);
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar32 RuleCharacterIterator::_current() const {
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buf != 0) {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return buf->char32At(bufPos);
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int i = pos.getIndex();
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::_advance(int32_t count) {
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buf != 0) {
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        bufPos += count;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (bufPos == buf->length()) {
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf = 0;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos.setIndex(pos.getIndex() + count);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (pos.getIndex() > text.length()) {
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pos.setIndex(text.length());
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof
160