1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (c) 2003-2011, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Author: Alan Liu 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created: September 24 2003 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Since: ICU 2.8 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ruleiter.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parsepos.h" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/symtable.h" 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* \U87654321 or \ud800\udc00 */ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_U_NOTATION_LEN 12 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ParsePosition& thePos) : 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text(theText), 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos(thePos), 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sym(theSym), 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf(0), 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufPos(0) 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{} 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool RuleCharacterIterator::atEnd() const { 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return buf == 0 && pos.getIndex() == text.length(); 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec)) return DONE; 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c = DONE; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isEscaped = FALSE; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = _current(); 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _advance(UTF_CHAR_LENGTH(c)); 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == SymbolTable::SYMBOL_REF && buf == 0 && 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (options & PARSE_VARIABLES) != 0 && sym != 0) { 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString name = sym->parseReference(text, pos, text.length()); 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If name is empty there was an isolated SYMBOL_REF; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // return it. Caller must be prepared for this. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() == 0) { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufPos = 0; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf = sym->lookup(name); 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf == 0) { 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_UNDEFINED_VARIABLE; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return DONE; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Handle empty variable value 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf->length() == 0) { 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf = 0; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString tempEscape; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset = 0; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru jumpahead(offset); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isEscaped = TRUE; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c < 0) { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_MALFORMED_UNICODE_ESCAPE; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return DONE; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p.buf = buf; 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p.pos = pos.getIndex(); 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p.bufPos = bufPos; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf = p.buf; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.setIndex(p.pos); 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufPos = p.bufPos; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::skipIgnored(int32_t options) { 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((options & SKIP_WHITESPACE) != 0) { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 a = _current(); 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!PatternProps::isWhiteSpace(a)) break; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _advance(UTF_CHAR_LENGTH(a)); 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (maxLookAhead < 0) { 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxLookAhead = 0x7FFFFFFF; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf != 0) { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->extract(bufPos, maxLookAhead, result); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.extract(pos.getIndex(), maxLookAhead, result); 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::jumpahead(int32_t count) { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _advance(count); 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t b = pos.getIndex(); 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.extract(0, b, result); 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar32 RuleCharacterIterator::_current() const { 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf != 0) { 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return buf->char32At(bufPos); 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i = pos.getIndex(); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleCharacterIterator::_advance(int32_t count) { 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf != 0) { 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufPos += count; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (bufPos == buf->length()) { 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf = 0; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.setIndex(pos.getIndex() + count); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pos.getIndex() > text.length()) { 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.setIndex(text.length()); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 160