1/*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 *  Anthony Liguori   <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14#include <stdarg.h>
15
16#include "qemu-common.h"
17#include "qstring.h"
18#include "qint.h"
19#include "qdict.h"
20#include "qlist.h"
21#include "qfloat.h"
22#include "qbool.h"
23#include "json-parser.h"
24#include "json-lexer.h"
25
26typedef struct JSONParserContext
27{
28} JSONParserContext;
29
30#define BUG_ON(cond) assert(!(cond))
31
32/**
33 * TODO
34 *
35 * 0) make errors meaningful again
36 * 1) add geometry information to tokens
37 * 3) should we return a parsed size?
38 * 4) deal with premature EOI
39 */
40
41static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42
43/**
44 * Token manipulators
45 *
46 * tokens are dictionaries that contain a type, a string value, and geometry information
47 * about a token identified by the lexer.  These are routines that make working with
48 * these objects a bit easier.
49 */
50static const char *token_get_value(QObject *obj)
51{
52    return qdict_get_str(qobject_to_qdict(obj), "token");
53}
54
55static JSONTokenType token_get_type(QObject *obj)
56{
57    return qdict_get_int(qobject_to_qdict(obj), "type");
58}
59
60static int token_is_operator(QObject *obj, char op)
61{
62    const char *val;
63
64    if (token_get_type(obj) != JSON_OPERATOR) {
65        return 0;
66    }
67
68    val = token_get_value(obj);
69
70    return (val[0] == op) && (val[1] == 0);
71}
72
73static int token_is_keyword(QObject *obj, const char *value)
74{
75    if (token_get_type(obj) != JSON_KEYWORD) {
76        return 0;
77    }
78
79    return strcmp(token_get_value(obj), value) == 0;
80}
81
82static int token_is_escape(QObject *obj, const char *value)
83{
84    if (token_get_type(obj) != JSON_ESCAPE) {
85        return 0;
86    }
87
88    return (strcmp(token_get_value(obj), value) == 0);
89}
90
91/**
92 * Error handler
93 */
94static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
95                                           QObject *token, const char *msg, ...)
96{
97    va_list ap;
98    va_start(ap, msg);
99    fprintf(stderr, "parse error: ");
100    vfprintf(stderr, msg, ap);
101    fprintf(stderr, "\n");
102    va_end(ap);
103}
104
105/**
106 * String helpers
107 *
108 * These helpers are used to unescape strings.
109 */
110static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111{
112    if (wchar <= 0x007F) {
113        BUG_ON(buffer_length < 2);
114
115        buffer[0] = wchar & 0x7F;
116        buffer[1] = 0;
117    } else if (wchar <= 0x07FF) {
118        BUG_ON(buffer_length < 3);
119
120        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
121        buffer[1] = 0x80 | (wchar & 0x3F);
122        buffer[2] = 0;
123    } else {
124        BUG_ON(buffer_length < 4);
125
126        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
127        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
128        buffer[2] = 0x80 | (wchar & 0x3F);
129        buffer[3] = 0;
130    }
131}
132
133static int hex2decimal(char ch)
134{
135    if (ch >= '0' && ch <= '9') {
136        return (ch - '0');
137    } else if (ch >= 'a' && ch <= 'f') {
138        return 10 + (ch - 'a');
139    } else if (ch >= 'A' && ch <= 'F') {
140        return 10 + (ch - 'A');
141    }
142
143    return -1;
144}
145
146/**
147 * parse_string(): Parse a json string and return a QObject
148 *
149 *  string
150 *      ""
151 *      " chars "
152 *  chars
153 *      char
154 *      char chars
155 *  char
156 *      any-Unicode-character-
157 *          except-"-or-\-or-
158 *          control-character
159 *      \"
160 *      \\
161 *      \/
162 *      \b
163 *      \f
164 *      \n
165 *      \r
166 *      \t
167 *      \u four-hex-digits
168 */
169static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170{
171    const char *ptr = token_get_value(token);
172    QString *str;
173    int double_quote = 1;
174
175    if (*ptr == '"') {
176        double_quote = 1;
177    } else {
178        double_quote = 0;
179    }
180    ptr++;
181
182    str = qstring_new();
183    while (*ptr &&
184           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
185        if (*ptr == '\\') {
186            ptr++;
187
188            switch (*ptr) {
189            case '"':
190                qstring_append(str, "\"");
191                ptr++;
192                break;
193            case '\'':
194                qstring_append(str, "'");
195                ptr++;
196                break;
197            case '\\':
198                qstring_append(str, "\\");
199                ptr++;
200                break;
201            case '/':
202                qstring_append(str, "/");
203                ptr++;
204                break;
205            case 'b':
206                qstring_append(str, "\b");
207                ptr++;
208                break;
209            case 'f':
210                qstring_append(str, "\f");
211                ptr++;
212                break;
213            case 'n':
214                qstring_append(str, "\n");
215                ptr++;
216                break;
217            case 'r':
218                qstring_append(str, "\r");
219                ptr++;
220                break;
221            case 't':
222                qstring_append(str, "\t");
223                ptr++;
224                break;
225            case 'u': {
226                uint16_t unicode_char = 0;
227                char utf8_char[4];
228                int i = 0;
229
230                ptr++;
231
232                for (i = 0; i < 4; i++) {
233                    if (qemu_isxdigit(*ptr)) {
234                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
235                    } else {
236                        parse_error(ctxt, token,
237                                    "invalid hex escape sequence in string");
238                        goto out;
239                    }
240                    ptr++;
241                }
242
243                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
244                qstring_append(str, utf8_char);
245            }   break;
246            default:
247                parse_error(ctxt, token, "invalid escape sequence in string");
248                goto out;
249            }
250        } else {
251            char dummy[2];
252
253            dummy[0] = *ptr++;
254            dummy[1] = 0;
255
256            qstring_append(str, dummy);
257        }
258    }
259
260    return str;
261
262out:
263    QDECREF(str);
264    return NULL;
265}
266
267/**
268 * Parsing rules
269 */
270static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
271{
272    QObject *key, *token = NULL, *value, *peek;
273    QList *working = qlist_copy(*tokens);
274
275    peek = qlist_peek(working);
276    key = parse_value(ctxt, &working, ap);
277    if (!key || qobject_type(key) != QTYPE_QSTRING) {
278        parse_error(ctxt, peek, "key is not a string in object");
279        goto out;
280    }
281
282    token = qlist_pop(working);
283    if (!token_is_operator(token, ':')) {
284        parse_error(ctxt, token, "missing : in object pair");
285        goto out;
286    }
287
288    value = parse_value(ctxt, &working, ap);
289    if (value == NULL) {
290        parse_error(ctxt, token, "Missing value in dict");
291        goto out;
292    }
293
294    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
295
296    qobject_decref(token);
297    qobject_decref(key);
298    QDECREF(*tokens);
299    *tokens = working;
300
301    return 0;
302
303out:
304    qobject_decref(token);
305    qobject_decref(key);
306    QDECREF(working);
307
308    return -1;
309}
310
311static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
312{
313    QDict *dict = NULL;
314    QObject *token, *peek;
315    QList *working = qlist_copy(*tokens);
316
317    token = qlist_pop(working);
318    if (!token_is_operator(token, '{')) {
319        goto out;
320    }
321    qobject_decref(token);
322    token = NULL;
323
324    dict = qdict_new();
325
326    peek = qlist_peek(working);
327    if (!token_is_operator(peek, '}')) {
328        if (parse_pair(ctxt, dict, &working, ap) == -1) {
329            goto out;
330        }
331
332        token = qlist_pop(working);
333        while (!token_is_operator(token, '}')) {
334            if (!token_is_operator(token, ',')) {
335                parse_error(ctxt, token, "expected separator in dict");
336                goto out;
337            }
338            qobject_decref(token);
339            token = NULL;
340
341            if (parse_pair(ctxt, dict, &working, ap) == -1) {
342                goto out;
343            }
344
345            token = qlist_pop(working);
346        }
347        qobject_decref(token);
348        token = NULL;
349    } else {
350        token = qlist_pop(working);
351        qobject_decref(token);
352        token = NULL;
353    }
354
355    QDECREF(*tokens);
356    *tokens = working;
357
358    return QOBJECT(dict);
359
360out:
361    qobject_decref(token);
362    QDECREF(working);
363    QDECREF(dict);
364    return NULL;
365}
366
367static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
368{
369    QList *list = NULL;
370    QObject *token, *peek;
371    QList *working = qlist_copy(*tokens);
372
373    token = qlist_pop(working);
374    if (!token_is_operator(token, '[')) {
375        goto out;
376    }
377    qobject_decref(token);
378    token = NULL;
379
380    list = qlist_new();
381
382    peek = qlist_peek(working);
383    if (!token_is_operator(peek, ']')) {
384        QObject *obj;
385
386        obj = parse_value(ctxt, &working, ap);
387        if (obj == NULL) {
388            parse_error(ctxt, token, "expecting value");
389            goto out;
390        }
391
392        qlist_append_obj(list, obj);
393
394        token = qlist_pop(working);
395        while (!token_is_operator(token, ']')) {
396            if (!token_is_operator(token, ',')) {
397                parse_error(ctxt, token, "expected separator in list");
398                goto out;
399            }
400
401            qobject_decref(token);
402            token = NULL;
403
404            obj = parse_value(ctxt, &working, ap);
405            if (obj == NULL) {
406                parse_error(ctxt, token, "expecting value");
407                goto out;
408            }
409
410            qlist_append_obj(list, obj);
411
412            token = qlist_pop(working);
413        }
414
415        qobject_decref(token);
416        token = NULL;
417    } else {
418        token = qlist_pop(working);
419        qobject_decref(token);
420        token = NULL;
421    }
422
423    QDECREF(*tokens);
424    *tokens = working;
425
426    return QOBJECT(list);
427
428out:
429    qobject_decref(token);
430    QDECREF(working);
431    QDECREF(list);
432    return NULL;
433}
434
435static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
436{
437    QObject *token, *ret;
438    QList *working = qlist_copy(*tokens);
439
440    token = qlist_pop(working);
441
442    if (token_get_type(token) != JSON_KEYWORD) {
443        goto out;
444    }
445
446    if (token_is_keyword(token, "true")) {
447        ret = QOBJECT(qbool_from_int(true));
448    } else if (token_is_keyword(token, "false")) {
449        ret = QOBJECT(qbool_from_int(false));
450    } else {
451        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
452        goto out;
453    }
454
455    qobject_decref(token);
456    QDECREF(*tokens);
457    *tokens = working;
458
459    return ret;
460
461out:
462    qobject_decref(token);
463    QDECREF(working);
464
465    return NULL;
466}
467
468static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
469{
470    QObject *token = NULL, *obj;
471    QList *working = qlist_copy(*tokens);
472
473    if (ap == NULL) {
474        goto out;
475    }
476
477    token = qlist_pop(working);
478
479    if (token_is_escape(token, "%p")) {
480        obj = va_arg(*ap, QObject *);
481    } else if (token_is_escape(token, "%i")) {
482        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
483    } else if (token_is_escape(token, "%d")) {
484        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
485    } else if (token_is_escape(token, "%ld")) {
486        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
487    } else if (token_is_escape(token, "%lld") ||
488               token_is_escape(token, "%I64d")) {
489        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
490    } else if (token_is_escape(token, "%s")) {
491        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
492    } else if (token_is_escape(token, "%f")) {
493        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
494    } else {
495        goto out;
496    }
497
498    qobject_decref(token);
499    QDECREF(*tokens);
500    *tokens = working;
501
502    return obj;
503
504out:
505    qobject_decref(token);
506    QDECREF(working);
507
508    return NULL;
509}
510
511static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
512{
513    QObject *token, *obj;
514    QList *working = qlist_copy(*tokens);
515
516    token = qlist_pop(working);
517    switch (token_get_type(token)) {
518    case JSON_STRING:
519        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
520        break;
521    case JSON_INTEGER:
522        obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
523        break;
524    case JSON_FLOAT:
525        /* FIXME dependent on locale */
526        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
527        break;
528    default:
529        goto out;
530    }
531
532    qobject_decref(token);
533    QDECREF(*tokens);
534    *tokens = working;
535
536    return obj;
537
538out:
539    qobject_decref(token);
540    QDECREF(working);
541
542    return NULL;
543}
544
545static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
546{
547    QObject *obj;
548
549    obj = parse_object(ctxt, tokens, ap);
550    if (obj == NULL) {
551        obj = parse_array(ctxt, tokens, ap);
552    }
553    if (obj == NULL) {
554        obj = parse_escape(ctxt, tokens, ap);
555    }
556    if (obj == NULL) {
557        obj = parse_keyword(ctxt, tokens);
558    }
559    if (obj == NULL) {
560        obj = parse_literal(ctxt, tokens);
561    }
562
563    return obj;
564}
565
566QObject *json_parser_parse(QList *tokens, va_list *ap)
567{
568    JSONParserContext ctxt = {};
569    QList *working = qlist_copy(tokens);
570    QObject *result;
571
572    result = parse_value(&ctxt, &working, ap);
573
574    QDECREF(working);
575
576    return result;
577}
578