CanonicalScanner.java revision 406eb4b3a5607e972b8718e0740236a3ea18051b
1/**
2 * Copyright (c) 2008-2010, http://www.snakeyaml.org
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.pyyaml;
18
19import java.util.ArrayList;
20import java.util.List;
21import java.util.Map;
22
23import org.yaml.snakeyaml.error.Mark;
24import org.yaml.snakeyaml.nodes.Tag;
25import org.yaml.snakeyaml.scanner.Scanner;
26import org.yaml.snakeyaml.scanner.ScannerImpl;
27import org.yaml.snakeyaml.tokens.AliasToken;
28import org.yaml.snakeyaml.tokens.AnchorToken;
29import org.yaml.snakeyaml.tokens.DirectiveToken;
30import org.yaml.snakeyaml.tokens.DocumentStartToken;
31import org.yaml.snakeyaml.tokens.FlowEntryToken;
32import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
33import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
34import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
35import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
36import org.yaml.snakeyaml.tokens.KeyToken;
37import org.yaml.snakeyaml.tokens.ScalarToken;
38import org.yaml.snakeyaml.tokens.StreamEndToken;
39import org.yaml.snakeyaml.tokens.StreamStartToken;
40import org.yaml.snakeyaml.tokens.TagToken;
41import org.yaml.snakeyaml.tokens.TagTuple;
42import org.yaml.snakeyaml.tokens.Token;
43import org.yaml.snakeyaml.tokens.ValueToken;
44
45public class CanonicalScanner implements Scanner {
46    private static final String DIRECTIVE = "%YAML 1.1";
47    private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
48
49    private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
50
51    private String data;
52    private int index;
53    public ArrayList<Token> tokens;
54    private boolean scanned;
55    private Mark mark;
56
57    public CanonicalScanner(String data) {
58        this.data = data + "\0";
59        this.index = 0;
60        this.tokens = new ArrayList<Token>();
61        this.scanned = false;
62        this.mark = new Mark("test", 0, 0, 0, data, 0);
63    }
64
65    public boolean checkToken(Token.ID... choices) {
66        if (!scanned) {
67            scan();
68        }
69        if (!tokens.isEmpty()) {
70            if (choices.length == 0) {
71                return true;
72            }
73            Token first = this.tokens.get(0);
74            for (Token.ID choice : choices) {
75                if (first.getTokenId() == choice) {
76                    return true;
77                }
78            }
79        }
80        return false;
81    }
82
83    public Token peekToken() {
84        if (!scanned) {
85            scan();
86        }
87        if (!tokens.isEmpty()) {
88            return this.tokens.get(0);
89        }
90        return null;
91    }
92
93    public Token getToken() {
94        if (!scanned) {
95            scan();
96        }
97        return this.tokens.remove(0);
98    }
99
100    public Token getToken(Token.ID choice) {
101        Token token = getToken();
102        if (choice != null && token.getTokenId() != choice) {
103            throw new CanonicalException("unexpected token " + token);
104        }
105        return token;
106    }
107
108    private void scan() {
109        this.tokens.add(new StreamStartToken(mark, mark));
110        boolean stop = false;
111        while (!stop) {
112            findToken();
113            char ch = data.charAt(index);
114            switch (ch) {
115            case '\0':
116                tokens.add(new StreamEndToken(mark, mark));
117                stop = true;
118                break;
119
120            case '%':
121                tokens.add(scanDirective());
122                break;
123
124            case '-':
125                if ("---".equals(data.substring(index, index + 3))) {
126                    index += 3;
127                    tokens.add(new DocumentStartToken(mark, mark));
128                }
129                break;
130
131            case '[':
132                index++;
133                tokens.add(new FlowSequenceStartToken(mark, mark));
134                break;
135
136            case '{':
137                index++;
138                tokens.add(new FlowMappingStartToken(mark, mark));
139                break;
140
141            case ']':
142                index++;
143                tokens.add(new FlowSequenceEndToken(mark, mark));
144                break;
145
146            case '}':
147                index++;
148                tokens.add(new FlowMappingEndToken(mark, mark));
149                break;
150
151            case '?':
152                index++;
153                tokens.add(new KeyToken(mark, mark));
154                break;
155
156            case ':':
157                index++;
158                tokens.add(new ValueToken(mark, mark));
159                break;
160
161            case ',':
162                index++;
163                tokens.add(new FlowEntryToken(mark, mark));
164                break;
165
166            case '*':
167                tokens.add(scanAlias());
168                break;
169
170            case '&':
171                tokens.add(scanAlias());
172                break;
173
174            case '!':
175                tokens.add(scanTag());
176                break;
177
178            case '"':
179                tokens.add(scanScalar());
180                break;
181
182            default:
183                throw new CanonicalException("invalid token");
184            }
185        }
186        scanned = true;
187    }
188
189    private Token scanDirective() {
190        String chunk1 = data.substring(index, index + DIRECTIVE.length());
191        char chunk2 = data.charAt(index + DIRECTIVE.length());
192        if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
193            index += DIRECTIVE.length();
194            List<Integer> implicit = new ArrayList<Integer>(2);
195            implicit.add(new Integer(1));
196            implicit.add(new Integer(1));
197            return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
198        } else {
199            throw new CanonicalException("invalid directive");
200        }
201    }
202
203    private Token scanAlias() {
204        boolean isTokenClassAlias;
205        if (data.charAt(index) == '*') {
206            isTokenClassAlias = true;
207        } else {
208            isTokenClassAlias = false;
209        }
210        index++;
211        int start = index;
212        while (", \n\0".indexOf(data.charAt(index)) == -1) {
213            index++;
214        }
215        String value = data.substring(start, index);
216        Token token;
217        if (isTokenClassAlias) {
218            token = new AliasToken(value, mark, mark);
219        } else {
220            token = new AnchorToken(value, mark, mark);
221        }
222        return token;
223    }
224
225    private Token scanTag() {
226        index++;
227        int start = index;
228        while (" \n\0".indexOf(data.charAt(index)) == -1) {
229            index++;
230        }
231        String value = data.substring(start, index);
232        if (value.length() == 0) {
233            value = "!";
234        } else if (value.charAt(0) == '!') {
235            value = Tag.PREFIX + value.substring(1);
236        } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
237            value = value.substring(1, value.length() - 1);
238        } else {
239            value = "!" + value;
240        }
241        return new TagToken(new TagTuple("", value), mark, mark);
242    }
243
244    private Token scanScalar() {
245        index++;
246        StringBuilder chunks = new StringBuilder();
247        int start = index;
248        boolean ignoreSpaces = false;
249        while (data.charAt(index) != '"') {
250            if (data.charAt(index) == '\\') {
251                ignoreSpaces = false;
252                chunks.append(data.substring(start, index));
253                index++;
254                char ch = data.charAt(index);
255                index++;
256                if (ch == '\n') {
257                    ignoreSpaces = true;
258                } else if (QUOTE_CODES.keySet().contains(ch)) {
259                    int length = QUOTE_CODES.get(ch);
260                    int code = Integer.parseInt(data.substring(index, index + length), 16);
261                    chunks.append(String.valueOf((char) code));
262                    index += length;
263                } else {
264                    if (!QUOTE_REPLACES.keySet().contains(ch)) {
265                        throw new CanonicalException("invalid escape code");
266                    }
267                    chunks.append(QUOTE_REPLACES.get(ch));
268                }
269                start = index;
270            } else if (data.charAt(index) == '\n') {
271                chunks.append(data.substring(start, index));
272                chunks.append(" ");
273                index++;
274                start = index;
275                ignoreSpaces = true;
276            } else if (ignoreSpaces && data.charAt(index) == ' ') {
277                index++;
278                start = index;
279            } else {
280                ignoreSpaces = false;
281                index++;
282            }
283        }
284        chunks.append(data.substring(start, index));
285        index++;
286        return new ScalarToken(chunks.toString(), mark, mark, false);
287    }
288
289    private void findToken() {
290        boolean found = false;
291        while (!found) {
292            while (" \t".indexOf(data.charAt(index)) != -1) {
293                index++;
294            }
295            if (data.charAt(index) == '#') {
296                while (data.charAt(index) != '\n') {
297                    index++;
298                }
299            }
300            if (data.charAt(index) == '\n') {
301                index++;
302            } else {
303                found = true;
304            }
305        }
306    }
307}
308