1/* 2********************************************************************** 3* Copyright (c) 2004-2011, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Author: Alan Liu 7* Created: March 16 2004 8* Since: ICU 3.0 9********************************************************************** 10*/ 11package com.ibm.icu.impl.data; 12 13import java.io.IOException; 14 15import com.ibm.icu.impl.PatternProps; 16import com.ibm.icu.impl.Utility; 17import com.ibm.icu.text.UTF16; 18 19/** 20 * An iterator class that returns successive string tokens from some 21 * source. String tokens are, in general, separated by Pattern_White_Space 22 * in the source test. Furthermore, they may be delimited by 23 * either single or double quotes (opening and closing quotes must 24 * match). Escapes are processed using standard ICU unescaping. 25 */ 26public class TokenIterator { 27 28 private ResourceReader reader; 29 private String line; 30 private StringBuffer buf; 31 private boolean done; 32 private int pos; 33 private int lastpos; 34 35 /** 36 * Construct an iterator over the tokens returned by the given 37 * ResourceReader, ignoring blank lines and comment lines (first 38 * non-blank character is '#'). Note that trailing comments on a 39 * line, beginning with the first unquoted '#', are recognized. 40 */ 41 public TokenIterator(ResourceReader r) { 42 reader = r; 43 line = null; 44 done = false; 45 buf = new StringBuffer(); 46 pos = lastpos = -1; 47 } 48 49 /** 50 * Return the next token from this iterator, or null if the last 51 * token has been returned. 52 */ 53 public String next() throws IOException { 54 if (done) { 55 return null; 56 } 57 for (;;) { 58 if (line == null) { 59 line = reader.readLineSkippingComments(); 60 if (line == null) { 61 done = true; 62 return null; 63 } 64 pos = 0; 65 } 66 buf.setLength(0); 67 lastpos = pos; 68 pos = nextToken(pos); 69 if (pos < 0) { 70 line = null; 71 continue; 72 } 73 return buf.toString(); 74 } 75 } 76 77 /** 78 * Return the one-based line number of the line of the last token returned by 79 * next(). Should only be called 80 * after a call to next(); otherwise the return 81 * value is undefined. 82 */ 83 public int getLineNumber() { 84 return reader.getLineNumber(); 85 } 86 87 /** 88 * Return a string description of the position of the last line 89 * returned by readLine() or readLineSkippingComments(). 90 */ 91 public String describePosition() { 92 return reader.describePosition() + ':' + (lastpos+1); 93 } 94 95 /** 96 * Read the next token from 'this.line' and append it to 97 * 'this.buf'. Tokens are separated by Pattern_White_Space. Tokens 98 * may also be delimited by double or single quotes. The closing 99 * quote must match the opening quote. If a '#' is encountered, 100 * the rest of the line is ignored, unless it is backslash-escaped 101 * or within quotes. 102 * @param position the offset into the string 103 * @return offset to the next character to read from line, or if 104 * the end of the line is reached without scanning a valid token, 105 * -1 106 */ 107 private int nextToken(int position) { 108 position = PatternProps.skipWhiteSpace(line, position); 109 if (position == line.length()) { 110 return -1; 111 } 112 int startpos = position; 113 char c = line.charAt(position++); 114 char quote = 0; 115 switch (c) { 116 case '"': 117 case '\'': 118 quote = c; 119 break; 120 case '#': 121 return -1; 122 default: 123 buf.append(c); 124 break; 125 } 126 int[] posref = null; 127 while (position < line.length()) { 128 c = line.charAt(position); // 16-bit ok 129 if (c == '\\') { 130 if (posref == null) { 131 posref = new int[1]; 132 } 133 posref[0] = position+1; 134 int c32 = Utility.unescapeAt(line, posref); 135 if (c32 < 0) { 136 throw new RuntimeException("Invalid escape at " + 137 reader.describePosition() + ':' + 138 position); 139 } 140 UTF16.append(buf, c32); 141 position = posref[0]; 142 } else if ((quote != 0 && c == quote) || 143 (quote == 0 && PatternProps.isWhiteSpace(c))) { 144 return ++position; 145 } else if (quote == 0 && c == '#') { 146 return position; // do NOT increment 147 } else { 148 buf.append(c); 149 ++position; 150 } 151 } 152 if (quote != 0) { 153 throw new RuntimeException("Unterminated quote at " + 154 reader.describePosition() + ':' + 155 startpos); 156 } 157 return position; 158 } 159} 160