1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4********************************************************************** 5* Copyright (c) 2004-2011, International Business Machines 6* Corporation and others. All Rights Reserved. 7********************************************************************** 8* Author: Alan Liu 9* Created: March 22 2004 10* Since: ICU 3.0 11********************************************************************** 12*/ 13#include "tokiter.h" 14#include "textfile.h" 15#include "patternprops.h" 16#include "util.h" 17#include "uprops.h" 18 19TokenIterator::TokenIterator(TextFile* r) { 20 reader = r; 21 done = haveLine = FALSE; 22 pos = lastpos = -1; 23} 24 25TokenIterator::~TokenIterator() { 26} 27 28UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { 29 if (done || U_FAILURE(ec)) { 30 return FALSE; 31 } 32 token.truncate(0); 33 for (;;) { 34 if (!haveLine) { 35 if (!reader->readLineSkippingComments(line, ec)) { 36 done = TRUE; 37 return FALSE; 38 } 39 haveLine = TRUE; 40 pos = 0; 41 } 42 lastpos = pos; 43 if (!nextToken(token, ec)) { 44 haveLine = FALSE; 45 if (U_FAILURE(ec)) return FALSE; 46 continue; 47 } 48 return TRUE; 49 } 50} 51 52int32_t TokenIterator::getLineNumber() const { 53 return reader->getLineNumber(); 54} 55 56/** 57 * Read the next token from 'this->line' and append it to 'token'. 58 * Tokens are separated by Pattern_White_Space. Tokens may also be 59 * delimited by double or single quotes. The closing quote must match 60 * the opening quote. If a '#' is encountered, the rest of the line 61 * is ignored, unless it is backslash-escaped or within quotes. 62 * @param token the token is appended to this StringBuffer 63 * @param ec input-output error code 64 * @return TRUE if a valid token is found, or FALSE if the end 65 * of the line is reached or an error occurs 66 */ 67UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { 68 ICU_Utility::skipWhitespace(line, pos, TRUE); 69 if (pos == line.length()) { 70 return FALSE; 71 } 72 UChar c = line.charAt(pos++); 73 UChar quote = 0; 74 switch (c) { 75 case 34/*'"'*/: 76 case 39/*'\\'*/: 77 quote = c; 78 break; 79 case 35/*'#'*/: 80 return FALSE; 81 default: 82 token.append(c); 83 break; 84 } 85 while (pos < line.length()) { 86 c = line.charAt(pos); // 16-bit ok 87 if (c == 92/*'\\'*/) { 88 UChar32 c32 = line.unescapeAt(pos); 89 if (c32 < 0) { 90 ec = U_MALFORMED_UNICODE_ESCAPE; 91 return FALSE; 92 } 93 token.append(c32); 94 } else if ((quote != 0 && c == quote) || 95 (quote == 0 && PatternProps::isWhiteSpace(c))) { 96 ++pos; 97 return TRUE; 98 } else if (quote == 0 && c == '#') { 99 return TRUE; // do NOT increment 100 } else { 101 token.append(c); 102 ++pos; 103 } 104 } 105 if (quote != 0) { 106 ec = U_UNTERMINATED_QUOTE; 107 return FALSE; 108 } 109 return TRUE; 110} 111