1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (c) 2004-2011, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Author: Alan Liu 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created: March 22 2004 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Since: ICU 3.0 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tokiter.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "textfile.h" 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uprops.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTokenIterator::TokenIterator(TextFile* r) { 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru reader = r; 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru done = haveLine = FALSE; 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos = lastpos = -1; 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTokenIterator::~TokenIterator() { 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (done || U_FAILURE(ec)) { 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru token.truncate(0); 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!haveLine) { 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!reader->readLineSkippingComments(line, ec)) { 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru done = TRUE; 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru haveLine = TRUE; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos = 0; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lastpos = pos; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!nextToken(token, ec)) { 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru haveLine = FALSE; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec)) return FALSE; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t TokenIterator::getLineNumber() const { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return reader->getLineNumber(); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Read the next token from 'this->line' and append it to 'token'. 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Tokens are separated by Pattern_White_Space. Tokens may also be 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * delimited by double or single quotes. The closing quote must match 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the opening quote. If a '#' is encountered, the rest of the line 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is ignored, unless it is backslash-escaped or within quotes. 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param token the token is appended to this StringBuffer 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param ec input-output error code 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if a valid token is found, or FALSE if the end 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the line is reached or an error occurs 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::skipWhitespace(line, pos, TRUE); 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pos == line.length()) { 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c = line.charAt(pos++); 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar quote = 0; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (c) { 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 34/*'"'*/: 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 39/*'\\'*/: 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quote = c; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 35/*'#'*/: 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru token.append(c); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (pos < line.length()) { 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = line.charAt(pos); // 16-bit ok 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == 92/*'\\'*/) { 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c32 = line.unescapeAt(pos); 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c32 < 0) { 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_MALFORMED_UNICODE_ESCAPE; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru token.append(c32); 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if ((quote != 0 && c == quote) || 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (quote == 0 && PatternProps::isWhiteSpace(c))) { 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++pos; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (quote == 0 && c == '#') { 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; // do NOT increment 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru token.append(c); 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++pos; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (quote != 0) { 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_UNTERMINATED_QUOTE; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 109