1/**
2 * Copyright (c) 2008, http://www.snakeyaml.org
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.pyyaml;
17
18import java.util.ArrayList;
19import java.util.List;
20import java.util.Map;
21
22import org.yaml.snakeyaml.error.Mark;
23import org.yaml.snakeyaml.nodes.Tag;
24import org.yaml.snakeyaml.scanner.Scanner;
25import org.yaml.snakeyaml.scanner.ScannerImpl;
26import org.yaml.snakeyaml.tokens.AliasToken;
27import org.yaml.snakeyaml.tokens.AnchorToken;
28import org.yaml.snakeyaml.tokens.DirectiveToken;
29import org.yaml.snakeyaml.tokens.DocumentStartToken;
30import org.yaml.snakeyaml.tokens.FlowEntryToken;
31import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
32import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
33import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
34import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
35import org.yaml.snakeyaml.tokens.KeyToken;
36import org.yaml.snakeyaml.tokens.ScalarToken;
37import org.yaml.snakeyaml.tokens.StreamEndToken;
38import org.yaml.snakeyaml.tokens.StreamStartToken;
39import org.yaml.snakeyaml.tokens.TagToken;
40import org.yaml.snakeyaml.tokens.TagTuple;
41import org.yaml.snakeyaml.tokens.Token;
42import org.yaml.snakeyaml.tokens.ValueToken;
43
44public class CanonicalScanner implements Scanner {
45    private static final String DIRECTIVE = "%YAML 1.1";
46    private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
47
48    private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
49
50    private String data;
51    private int index;
52    public ArrayList<Token> tokens;
53    private boolean scanned;
54    private Mark mark;
55
56    public CanonicalScanner(String data) {
57        this.data = data + "\0";
58        this.index = 0;
59        this.tokens = new ArrayList<Token>();
60        this.scanned = false;
61        this.mark = new Mark("test", 0, 0, 0, data, 0);
62    }
63
64    public boolean checkToken(Token.ID... choices) {
65        if (!scanned) {
66            scan();
67        }
68        if (!tokens.isEmpty()) {
69            if (choices.length == 0) {
70                return true;
71            }
72            Token first = this.tokens.get(0);
73            for (Token.ID choice : choices) {
74                if (first.getTokenId() == choice) {
75                    return true;
76                }
77            }
78        }
79        return false;
80    }
81
82    public Token peekToken() {
83        if (!scanned) {
84            scan();
85        }
86        if (!tokens.isEmpty()) {
87            return this.tokens.get(0);
88        }
89        return null;
90    }
91
92    public Token getToken() {
93        if (!scanned) {
94            scan();
95        }
96        return this.tokens.remove(0);
97    }
98
99    public Token getToken(Token.ID choice) {
100        Token token = getToken();
101        if (choice != null && token.getTokenId() != choice) {
102            throw new CanonicalException("unexpected token " + token);
103        }
104        return token;
105    }
106
107    private void scan() {
108        this.tokens.add(new StreamStartToken(mark, mark));
109        boolean stop = false;
110        while (!stop) {
111            findToken();
112            char ch = data.charAt(index);
113            switch (ch) {
114            case '\0':
115                tokens.add(new StreamEndToken(mark, mark));
116                stop = true;
117                break;
118
119            case '%':
120                tokens.add(scanDirective());
121                break;
122
123            case '-':
124                if ("---".equals(data.substring(index, index + 3))) {
125                    index += 3;
126                    tokens.add(new DocumentStartToken(mark, mark));
127                }
128                break;
129
130            case '[':
131                index++;
132                tokens.add(new FlowSequenceStartToken(mark, mark));
133                break;
134
135            case '{':
136                index++;
137                tokens.add(new FlowMappingStartToken(mark, mark));
138                break;
139
140            case ']':
141                index++;
142                tokens.add(new FlowSequenceEndToken(mark, mark));
143                break;
144
145            case '}':
146                index++;
147                tokens.add(new FlowMappingEndToken(mark, mark));
148                break;
149
150            case '?':
151                index++;
152                tokens.add(new KeyToken(mark, mark));
153                break;
154
155            case ':':
156                index++;
157                tokens.add(new ValueToken(mark, mark));
158                break;
159
160            case ',':
161                index++;
162                tokens.add(new FlowEntryToken(mark, mark));
163                break;
164
165            case '*':
166                tokens.add(scanAlias());
167                break;
168
169            case '&':
170                tokens.add(scanAlias());
171                break;
172
173            case '!':
174                tokens.add(scanTag());
175                break;
176
177            case '"':
178                tokens.add(scanScalar());
179                break;
180
181            default:
182                throw new CanonicalException("invalid token");
183            }
184        }
185        scanned = true;
186    }
187
188    private Token scanDirective() {
189        String chunk1 = data.substring(index, index + DIRECTIVE.length());
190        char chunk2 = data.charAt(index + DIRECTIVE.length());
191        if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
192            index += DIRECTIVE.length();
193            List<Integer> implicit = new ArrayList<Integer>(2);
194            implicit.add(new Integer(1));
195            implicit.add(new Integer(1));
196            return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
197        } else {
198            throw new CanonicalException("invalid directive");
199        }
200    }
201
202    private Token scanAlias() {
203        boolean isTokenClassAlias;
204        if (data.charAt(index) == '*') {
205            isTokenClassAlias = true;
206        } else {
207            isTokenClassAlias = false;
208        }
209        index++;
210        int start = index;
211        while (", \n\0".indexOf(data.charAt(index)) == -1) {
212            index++;
213        }
214        String value = data.substring(start, index);
215        Token token;
216        if (isTokenClassAlias) {
217            token = new AliasToken(value, mark, mark);
218        } else {
219            token = new AnchorToken(value, mark, mark);
220        }
221        return token;
222    }
223
224    private Token scanTag() {
225        index++;
226        int start = index;
227        while (" \n\0".indexOf(data.charAt(index)) == -1) {
228            index++;
229        }
230        String value = data.substring(start, index);
231        if (value.length() == 0) {
232            value = "!";
233        } else if (value.charAt(0) == '!') {
234            value = Tag.PREFIX + value.substring(1);
235        } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
236            value = value.substring(1, value.length() - 1);
237        } else {
238            value = "!" + value;
239        }
240        return new TagToken(new TagTuple("", value), mark, mark);
241    }
242
243    private Token scanScalar() {
244        index++;
245        StringBuilder chunks = new StringBuilder();
246        int start = index;
247        boolean ignoreSpaces = false;
248        while (data.charAt(index) != '"') {
249            if (data.charAt(index) == '\\') {
250                ignoreSpaces = false;
251                chunks.append(data.substring(start, index));
252                index++;
253                char ch = data.charAt(index);
254                index++;
255                if (ch == '\n') {
256                    ignoreSpaces = true;
257                } else if (QUOTE_CODES.keySet().contains(ch)) {
258                    int length = QUOTE_CODES.get(ch);
259                    int code = Integer.parseInt(data.substring(index, index + length), 16);
260                    chunks.append(String.valueOf((char) code));
261                    index += length;
262                } else {
263                    if (!QUOTE_REPLACES.keySet().contains(ch)) {
264                        throw new CanonicalException("invalid escape code");
265                    }
266                    chunks.append(QUOTE_REPLACES.get(ch));
267                }
268                start = index;
269            } else if (data.charAt(index) == '\n') {
270                chunks.append(data.substring(start, index));
271                chunks.append(" ");
272                index++;
273                start = index;
274                ignoreSpaces = true;
275            } else if (ignoreSpaces && data.charAt(index) == ' ') {
276                index++;
277                start = index;
278            } else {
279                ignoreSpaces = false;
280                index++;
281            }
282        }
283        chunks.append(data.substring(start, index));
284        index++;
285        return new ScalarToken(chunks.toString(), mark, mark, false);
286    }
287
288    private void findToken() {
289        boolean found = false;
290        while (!found) {
291            while (" \t".indexOf(data.charAt(index)) != -1) {
292                index++;
293            }
294            if (data.charAt(index) == '#') {
295                while (data.charAt(index) != '\n') {
296                    index++;
297                }
298            }
299            if (data.charAt(index) == '\n') {
300                index++;
301            } else {
302                found = true;
303            }
304        }
305    }
306}
307