1/* 2********************************************************************** 3* Copyright (c) 2004-2011, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Author: Alan Liu 7* Created: March 22 2004 8* Since: ICU 3.0 9********************************************************************** 10*/ 11#include "tokiter.h" 12#include "textfile.h" 13#include "patternprops.h" 14#include "util.h" 15#include "uprops.h" 16 17TokenIterator::TokenIterator(TextFile* r) { 18 reader = r; 19 done = haveLine = FALSE; 20 pos = lastpos = -1; 21} 22 23TokenIterator::~TokenIterator() { 24} 25 26UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { 27 if (done || U_FAILURE(ec)) { 28 return FALSE; 29 } 30 token.truncate(0); 31 for (;;) { 32 if (!haveLine) { 33 if (!reader->readLineSkippingComments(line, ec)) { 34 done = TRUE; 35 return FALSE; 36 } 37 haveLine = TRUE; 38 pos = 0; 39 } 40 lastpos = pos; 41 if (!nextToken(token, ec)) { 42 haveLine = FALSE; 43 if (U_FAILURE(ec)) return FALSE; 44 continue; 45 } 46 return TRUE; 47 } 48} 49 50int32_t TokenIterator::getLineNumber() const { 51 return reader->getLineNumber(); 52} 53 54/** 55 * Read the next token from 'this->line' and append it to 'token'. 56 * Tokens are separated by Pattern_White_Space. Tokens may also be 57 * delimited by double or single quotes. The closing quote must match 58 * the opening quote. If a '#' is encountered, the rest of the line 59 * is ignored, unless it is backslash-escaped or within quotes. 60 * @param token the token is appended to this StringBuffer 61 * @param ec input-output error code 62 * @return TRUE if a valid token is found, or FALSE if the end 63 * of the line is reached or an error occurs 64 */ 65UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { 66 ICU_Utility::skipWhitespace(line, pos, TRUE); 67 if (pos == line.length()) { 68 return FALSE; 69 } 70 UChar c = line.charAt(pos++); 71 UChar quote = 0; 72 switch (c) { 73 case 34/*'"'*/: 74 case 39/*'\\'*/: 75 quote = c; 76 break; 77 case 35/*'#'*/: 78 return FALSE; 79 default: 80 token.append(c); 81 break; 82 } 83 while (pos < line.length()) { 84 c = line.charAt(pos); // 16-bit ok 85 if (c == 92/*'\\'*/) { 86 UChar32 c32 = line.unescapeAt(pos); 87 if (c32 < 0) { 88 ec = U_MALFORMED_UNICODE_ESCAPE; 89 return FALSE; 90 } 91 token.append(c32); 92 } else if ((quote != 0 && c == quote) || 93 (quote == 0 && PatternProps::isWhiteSpace(c))) { 94 ++pos; 95 return TRUE; 96 } else if (quote == 0 && c == '#') { 97 return TRUE; // do NOT increment 98 } else { 99 token.append(c); 100 ++pos; 101 } 102 } 103 if (quote != 0) { 104 ec = U_UNTERMINATED_QUOTE; 105 return FALSE; 106 } 107 return TRUE; 108} 109