CanonicalScanner.java revision 406eb4b3a5607e972b8718e0740236a3ea18051b
1/** 2 * Copyright (c) 2008-2010, http://www.snakeyaml.org 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package org.pyyaml; 18 19import java.util.ArrayList; 20import java.util.List; 21import java.util.Map; 22 23import org.yaml.snakeyaml.error.Mark; 24import org.yaml.snakeyaml.nodes.Tag; 25import org.yaml.snakeyaml.scanner.Scanner; 26import org.yaml.snakeyaml.scanner.ScannerImpl; 27import org.yaml.snakeyaml.tokens.AliasToken; 28import org.yaml.snakeyaml.tokens.AnchorToken; 29import org.yaml.snakeyaml.tokens.DirectiveToken; 30import org.yaml.snakeyaml.tokens.DocumentStartToken; 31import org.yaml.snakeyaml.tokens.FlowEntryToken; 32import org.yaml.snakeyaml.tokens.FlowMappingEndToken; 33import org.yaml.snakeyaml.tokens.FlowMappingStartToken; 34import org.yaml.snakeyaml.tokens.FlowSequenceEndToken; 35import org.yaml.snakeyaml.tokens.FlowSequenceStartToken; 36import org.yaml.snakeyaml.tokens.KeyToken; 37import org.yaml.snakeyaml.tokens.ScalarToken; 38import org.yaml.snakeyaml.tokens.StreamEndToken; 39import org.yaml.snakeyaml.tokens.StreamStartToken; 40import org.yaml.snakeyaml.tokens.TagToken; 41import org.yaml.snakeyaml.tokens.TagTuple; 42import org.yaml.snakeyaml.tokens.Token; 43import org.yaml.snakeyaml.tokens.ValueToken; 44 45public class CanonicalScanner implements Scanner { 46 private static final String DIRECTIVE = "%YAML 1.1"; 47 private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES; 48 49 private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS; 50 51 private String data; 52 private int index; 53 public ArrayList<Token> tokens; 54 private boolean scanned; 55 private Mark mark; 56 57 public CanonicalScanner(String data) { 58 this.data = data + "\0"; 59 this.index = 0; 60 this.tokens = new ArrayList<Token>(); 61 this.scanned = false; 62 this.mark = new Mark("test", 0, 0, 0, data, 0); 63 } 64 65 public boolean checkToken(Token.ID... choices) { 66 if (!scanned) { 67 scan(); 68 } 69 if (!tokens.isEmpty()) { 70 if (choices.length == 0) { 71 return true; 72 } 73 Token first = this.tokens.get(0); 74 for (Token.ID choice : choices) { 75 if (first.getTokenId() == choice) { 76 return true; 77 } 78 } 79 } 80 return false; 81 } 82 83 public Token peekToken() { 84 if (!scanned) { 85 scan(); 86 } 87 if (!tokens.isEmpty()) { 88 return this.tokens.get(0); 89 } 90 return null; 91 } 92 93 public Token getToken() { 94 if (!scanned) { 95 scan(); 96 } 97 return this.tokens.remove(0); 98 } 99 100 public Token getToken(Token.ID choice) { 101 Token token = getToken(); 102 if (choice != null && token.getTokenId() != choice) { 103 throw new CanonicalException("unexpected token " + token); 104 } 105 return token; 106 } 107 108 private void scan() { 109 this.tokens.add(new StreamStartToken(mark, mark)); 110 boolean stop = false; 111 while (!stop) { 112 findToken(); 113 char ch = data.charAt(index); 114 switch (ch) { 115 case '\0': 116 tokens.add(new StreamEndToken(mark, mark)); 117 stop = true; 118 break; 119 120 case '%': 121 tokens.add(scanDirective()); 122 break; 123 124 case '-': 125 if ("---".equals(data.substring(index, index + 3))) { 126 index += 3; 127 tokens.add(new DocumentStartToken(mark, mark)); 128 } 129 break; 130 131 case '[': 132 index++; 133 tokens.add(new FlowSequenceStartToken(mark, mark)); 134 break; 135 136 case '{': 137 index++; 138 tokens.add(new FlowMappingStartToken(mark, mark)); 139 break; 140 141 case ']': 142 index++; 143 tokens.add(new FlowSequenceEndToken(mark, mark)); 144 break; 145 146 case '}': 147 index++; 148 tokens.add(new FlowMappingEndToken(mark, mark)); 149 break; 150 151 case '?': 152 index++; 153 tokens.add(new KeyToken(mark, mark)); 154 break; 155 156 case ':': 157 index++; 158 tokens.add(new ValueToken(mark, mark)); 159 break; 160 161 case ',': 162 index++; 163 tokens.add(new FlowEntryToken(mark, mark)); 164 break; 165 166 case '*': 167 tokens.add(scanAlias()); 168 break; 169 170 case '&': 171 tokens.add(scanAlias()); 172 break; 173 174 case '!': 175 tokens.add(scanTag()); 176 break; 177 178 case '"': 179 tokens.add(scanScalar()); 180 break; 181 182 default: 183 throw new CanonicalException("invalid token"); 184 } 185 } 186 scanned = true; 187 } 188 189 private Token scanDirective() { 190 String chunk1 = data.substring(index, index + DIRECTIVE.length()); 191 char chunk2 = data.charAt(index + DIRECTIVE.length()); 192 if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) { 193 index += DIRECTIVE.length(); 194 List<Integer> implicit = new ArrayList<Integer>(2); 195 implicit.add(new Integer(1)); 196 implicit.add(new Integer(1)); 197 return new DirectiveToken<Integer>("YAML", implicit, mark, mark); 198 } else { 199 throw new CanonicalException("invalid directive"); 200 } 201 } 202 203 private Token scanAlias() { 204 boolean isTokenClassAlias; 205 if (data.charAt(index) == '*') { 206 isTokenClassAlias = true; 207 } else { 208 isTokenClassAlias = false; 209 } 210 index++; 211 int start = index; 212 while (", \n\0".indexOf(data.charAt(index)) == -1) { 213 index++; 214 } 215 String value = data.substring(start, index); 216 Token token; 217 if (isTokenClassAlias) { 218 token = new AliasToken(value, mark, mark); 219 } else { 220 token = new AnchorToken(value, mark, mark); 221 } 222 return token; 223 } 224 225 private Token scanTag() { 226 index++; 227 int start = index; 228 while (" \n\0".indexOf(data.charAt(index)) == -1) { 229 index++; 230 } 231 String value = data.substring(start, index); 232 if (value.length() == 0) { 233 value = "!"; 234 } else if (value.charAt(0) == '!') { 235 value = Tag.PREFIX + value.substring(1); 236 } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') { 237 value = value.substring(1, value.length() - 1); 238 } else { 239 value = "!" + value; 240 } 241 return new TagToken(new TagTuple("", value), mark, mark); 242 } 243 244 private Token scanScalar() { 245 index++; 246 StringBuilder chunks = new StringBuilder(); 247 int start = index; 248 boolean ignoreSpaces = false; 249 while (data.charAt(index) != '"') { 250 if (data.charAt(index) == '\\') { 251 ignoreSpaces = false; 252 chunks.append(data.substring(start, index)); 253 index++; 254 char ch = data.charAt(index); 255 index++; 256 if (ch == '\n') { 257 ignoreSpaces = true; 258 } else if (QUOTE_CODES.keySet().contains(ch)) { 259 int length = QUOTE_CODES.get(ch); 260 int code = Integer.parseInt(data.substring(index, index + length), 16); 261 chunks.append(String.valueOf((char) code)); 262 index += length; 263 } else { 264 if (!QUOTE_REPLACES.keySet().contains(ch)) { 265 throw new CanonicalException("invalid escape code"); 266 } 267 chunks.append(QUOTE_REPLACES.get(ch)); 268 } 269 start = index; 270 } else if (data.charAt(index) == '\n') { 271 chunks.append(data.substring(start, index)); 272 chunks.append(" "); 273 index++; 274 start = index; 275 ignoreSpaces = true; 276 } else if (ignoreSpaces && data.charAt(index) == ' ') { 277 index++; 278 start = index; 279 } else { 280 ignoreSpaces = false; 281 index++; 282 } 283 } 284 chunks.append(data.substring(start, index)); 285 index++; 286 return new ScalarToken(chunks.toString(), mark, mark, false); 287 } 288 289 private void findToken() { 290 boolean found = false; 291 while (!found) { 292 while (" \t".indexOf(data.charAt(index)) != -1) { 293 index++; 294 } 295 if (data.charAt(index) == '#') { 296 while (data.charAt(index) != '\n') { 297 index++; 298 } 299 } 300 if (data.charAt(index) == '\n') { 301 index++; 302 } else { 303 found = true; 304 } 305 } 306 } 307} 308