16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (c) 2003-2011, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Author: Alan Liu
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Created: September 24 2003
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Since: ICU 2.8
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ruleiter.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/parsepos.h"
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/symtable.h"
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h"
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "patternprops.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* \U87654321 or \ud800\udc00 */
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_U_NOTATION_LEN 12
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      ParsePosition& thePos) :
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text(theText),
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pos(thePos),
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sym(theSym),
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buf(0),
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bufPos(0)
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{}
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RuleCharacterIterator::atEnd() const {
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return buf == 0 && pos.getIndex() == text.length();
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(ec)) return DONE;
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c = DONE;
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isEscaped = FALSE;
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c = _current();
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        _advance(U16_LENGTH(c));
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (options & PARSE_VARIABLES) != 0 && sym != 0) {
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString name = sym->parseReference(text, pos, text.length());
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // If name is empty there was an isolated SYMBOL_REF;
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // return it.  Caller must be prepared for this.
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (name.length() == 0) {
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            bufPos = 0;
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buf = sym->lookup(name);
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (buf == 0) {
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ec = U_UNDEFINED_VARIABLE;
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return DONE;
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Handle empty variable value
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (buf->length() == 0) {
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                buf = 0;
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString tempEscape;
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t offset = 0;
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            jumpahead(offset);
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isEscaped = TRUE;
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c < 0) {
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ec = U_MALFORMED_UNICODE_ESCAPE;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return DONE;
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return c;
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    p.buf = buf;
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    p.pos = pos.getIndex();
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    p.bufPos = bufPos;
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buf = p.buf;
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pos.setIndex(p.pos);
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bufPos = p.bufPos;
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleCharacterIterator::skipIgnored(int32_t options) {
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((options & SKIP_WHITESPACE) != 0) {
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (;;) {
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 a = _current();
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (!PatternProps::isWhiteSpace(a)) break;
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            _advance(U16_LENGTH(a));
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (maxLookAhead < 0) {
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        maxLookAhead = 0x7FFFFFFF;
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (buf != 0) {
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        buf->extract(bufPos, maxLookAhead, result);
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        text.extract(pos.getIndex(), maxLookAhead, result);
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result;
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleCharacterIterator::jumpahead(int32_t count) {
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    _advance(count);
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t b = pos.getIndex();
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text.extract(0, b, result);
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 RuleCharacterIterator::_current() const {
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (buf != 0) {
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return buf->char32At(bufPos);
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int i = pos.getIndex();
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleCharacterIterator::_advance(int32_t count) {
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (buf != 0) {
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bufPos += count;
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (bufPos == buf->length()) {
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buf = 0;
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos.setIndex(pos.getIndex() + count);
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (pos.getIndex() > text.length()) {
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pos.setIndex(text.length());
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//eof
161