1/*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 *  Anthony Liguori   <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14#include <stdarg.h>
15
16#include "qemu-common.h"
17#include "qapi/qmp/qstring.h"
18#include "qapi/qmp/qint.h"
19#include "qapi/qmp/qdict.h"
20#include "qapi/qmp/qlist.h"
21#include "qapi/qmp/qfloat.h"
22#include "qapi/qmp/qbool.h"
23#include "qapi/qmp/json-parser.h"
24#include "qapi/qmp/json-lexer.h"
25#include "qapi/qmp/qerror.h"
26
27typedef struct JSONParserContext
28{
29    Error *err;
30    struct {
31        QObject **buf;
32        size_t pos;
33        size_t count;
34    } tokens;
35} JSONParserContext;
36
37#define BUG_ON(cond) assert(!(cond))
38
39/**
40 * TODO
41 *
42 * 0) make errors meaningful again
43 * 1) add geometry information to tokens
44 * 3) should we return a parsed size?
45 * 4) deal with premature EOI
46 */
47
48static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
49
50/**
51 * Token manipulators
52 *
53 * tokens are dictionaries that contain a type, a string value, and geometry information
54 * about a token identified by the lexer.  These are routines that make working with
55 * these objects a bit easier.
56 */
57static const char *token_get_value(QObject *obj)
58{
59    return qdict_get_str(qobject_to_qdict(obj), "token");
60}
61
62static JSONTokenType token_get_type(QObject *obj)
63{
64    return qdict_get_int(qobject_to_qdict(obj), "type");
65}
66
67static int token_is_operator(QObject *obj, char op)
68{
69    const char *val;
70
71    if (token_get_type(obj) != JSON_OPERATOR) {
72        return 0;
73    }
74
75    val = token_get_value(obj);
76
77    return (val[0] == op) && (val[1] == 0);
78}
79
80static int token_is_keyword(QObject *obj, const char *value)
81{
82    if (token_get_type(obj) != JSON_KEYWORD) {
83        return 0;
84    }
85
86    return strcmp(token_get_value(obj), value) == 0;
87}
88
89static int token_is_escape(QObject *obj, const char *value)
90{
91    if (token_get_type(obj) != JSON_ESCAPE) {
92        return 0;
93    }
94
95    return (strcmp(token_get_value(obj), value) == 0);
96}
97
98/**
99 * Error handler
100 */
101static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
102                                           QObject *token, const char *msg, ...)
103{
104    va_list ap;
105    char message[1024];
106    va_start(ap, msg);
107    vsnprintf(message, sizeof(message), msg, ap);
108    va_end(ap);
109    if (ctxt->err) {
110        error_free(ctxt->err);
111        ctxt->err = NULL;
112    }
113    error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message);
114}
115
116/**
117 * String helpers
118 *
119 * These helpers are used to unescape strings.
120 */
121static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
122{
123    if (wchar <= 0x007F) {
124        BUG_ON(buffer_length < 2);
125
126        buffer[0] = wchar & 0x7F;
127        buffer[1] = 0;
128    } else if (wchar <= 0x07FF) {
129        BUG_ON(buffer_length < 3);
130
131        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
132        buffer[1] = 0x80 | (wchar & 0x3F);
133        buffer[2] = 0;
134    } else {
135        BUG_ON(buffer_length < 4);
136
137        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
138        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
139        buffer[2] = 0x80 | (wchar & 0x3F);
140        buffer[3] = 0;
141    }
142}
143
144static int hex2decimal(char ch)
145{
146    if (ch >= '0' && ch <= '9') {
147        return (ch - '0');
148    } else if (ch >= 'a' && ch <= 'f') {
149        return 10 + (ch - 'a');
150    } else if (ch >= 'A' && ch <= 'F') {
151        return 10 + (ch - 'A');
152    }
153
154    return -1;
155}
156
157/**
158 * parse_string(): Parse a json string and return a QObject
159 *
160 *  string
161 *      ""
162 *      " chars "
163 *  chars
164 *      char
165 *      char chars
166 *  char
167 *      any-Unicode-character-
168 *          except-"-or-\-or-
169 *          control-character
170 *      \"
171 *      \\
172 *      \/
173 *      \b
174 *      \f
175 *      \n
176 *      \r
177 *      \t
178 *      \u four-hex-digits
179 */
180static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
181{
182    const char *ptr = token_get_value(token);
183    QString *str;
184    int double_quote = 1;
185
186    if (*ptr == '"') {
187        double_quote = 1;
188    } else {
189        double_quote = 0;
190    }
191    ptr++;
192
193    str = qstring_new();
194    while (*ptr &&
195           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
196        if (*ptr == '\\') {
197            ptr++;
198
199            switch (*ptr) {
200            case '"':
201                qstring_append(str, "\"");
202                ptr++;
203                break;
204            case '\'':
205                qstring_append(str, "'");
206                ptr++;
207                break;
208            case '\\':
209                qstring_append(str, "\\");
210                ptr++;
211                break;
212            case '/':
213                qstring_append(str, "/");
214                ptr++;
215                break;
216            case 'b':
217                qstring_append(str, "\b");
218                ptr++;
219                break;
220            case 'f':
221                qstring_append(str, "\f");
222                ptr++;
223                break;
224            case 'n':
225                qstring_append(str, "\n");
226                ptr++;
227                break;
228            case 'r':
229                qstring_append(str, "\r");
230                ptr++;
231                break;
232            case 't':
233                qstring_append(str, "\t");
234                ptr++;
235                break;
236            case 'u': {
237                uint16_t unicode_char = 0;
238                char utf8_char[4];
239                int i = 0;
240
241                ptr++;
242
243                for (i = 0; i < 4; i++) {
244                    if (qemu_isxdigit(*ptr)) {
245                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
246                    } else {
247                        parse_error(ctxt, token,
248                                    "invalid hex escape sequence in string");
249                        goto out;
250                    }
251                    ptr++;
252                }
253
254                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
255                qstring_append(str, utf8_char);
256            }   break;
257            default:
258                parse_error(ctxt, token, "invalid escape sequence in string");
259                goto out;
260            }
261        } else {
262            char dummy[2];
263
264            dummy[0] = *ptr++;
265            dummy[1] = 0;
266
267            qstring_append(str, dummy);
268        }
269    }
270
271    return str;
272
273out:
274    QDECREF(str);
275    return NULL;
276}
277
278static QObject *parser_context_pop_token(JSONParserContext *ctxt)
279{
280    QObject *token;
281    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
282    token = ctxt->tokens.buf[ctxt->tokens.pos];
283    ctxt->tokens.pos++;
284    return token;
285}
286
287/* Note: parser_context_{peek|pop}_token do not increment the
288 * token object's refcount. In both cases the references will continue
289 * to be tracked and cleaned up in parser_context_free(), so do not
290 * attempt to free the token object.
291 */
292static QObject *parser_context_peek_token(JSONParserContext *ctxt)
293{
294    QObject *token;
295    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
296    token = ctxt->tokens.buf[ctxt->tokens.pos];
297    return token;
298}
299
300static JSONParserContext parser_context_save(JSONParserContext *ctxt)
301{
302    JSONParserContext saved_ctxt = {0};
303    saved_ctxt.tokens.pos = ctxt->tokens.pos;
304    saved_ctxt.tokens.count = ctxt->tokens.count;
305    saved_ctxt.tokens.buf = ctxt->tokens.buf;
306    return saved_ctxt;
307}
308
309static void parser_context_restore(JSONParserContext *ctxt,
310                                   JSONParserContext saved_ctxt)
311{
312    ctxt->tokens.pos = saved_ctxt.tokens.pos;
313    ctxt->tokens.count = saved_ctxt.tokens.count;
314    ctxt->tokens.buf = saved_ctxt.tokens.buf;
315}
316
317static void tokens_append_from_iter(QObject *obj, void *opaque)
318{
319    JSONParserContext *ctxt = opaque;
320    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
321    ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
322    qobject_incref(obj);
323}
324
325static JSONParserContext *parser_context_new(QList *tokens)
326{
327    JSONParserContext *ctxt;
328    size_t count;
329
330    if (!tokens) {
331        return NULL;
332    }
333
334    count = qlist_size(tokens);
335    if (count == 0) {
336        return NULL;
337    }
338
339    ctxt = g_malloc0(sizeof(JSONParserContext));
340    ctxt->tokens.pos = 0;
341    ctxt->tokens.count = count;
342    ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
343    qlist_iter(tokens, tokens_append_from_iter, ctxt);
344    ctxt->tokens.pos = 0;
345
346    return ctxt;
347}
348
349/* to support error propagation, ctxt->err must be freed separately */
350static void parser_context_free(JSONParserContext *ctxt)
351{
352    int i;
353    if (ctxt) {
354        for (i = 0; i < ctxt->tokens.count; i++) {
355            qobject_decref(ctxt->tokens.buf[i]);
356        }
357        g_free(ctxt->tokens.buf);
358        g_free(ctxt);
359    }
360}
361
362/**
363 * Parsing rules
364 */
365static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
366{
367    QObject *key = NULL, *token = NULL, *value, *peek;
368    JSONParserContext saved_ctxt = parser_context_save(ctxt);
369
370    peek = parser_context_peek_token(ctxt);
371    if (peek == NULL) {
372        parse_error(ctxt, NULL, "premature EOI");
373        goto out;
374    }
375
376    key = parse_value(ctxt, ap);
377    if (!key || qobject_type(key) != QTYPE_QSTRING) {
378        parse_error(ctxt, peek, "key is not a string in object");
379        goto out;
380    }
381
382    token = parser_context_pop_token(ctxt);
383    if (token == NULL) {
384        parse_error(ctxt, NULL, "premature EOI");
385        goto out;
386    }
387
388    if (!token_is_operator(token, ':')) {
389        parse_error(ctxt, token, "missing : in object pair");
390        goto out;
391    }
392
393    value = parse_value(ctxt, ap);
394    if (value == NULL) {
395        parse_error(ctxt, token, "Missing value in dict");
396        goto out;
397    }
398
399    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
400
401    qobject_decref(key);
402
403    return 0;
404
405out:
406    parser_context_restore(ctxt, saved_ctxt);
407    qobject_decref(key);
408
409    return -1;
410}
411
412static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
413{
414    QDict *dict = NULL;
415    QObject *token, *peek;
416    JSONParserContext saved_ctxt = parser_context_save(ctxt);
417
418    token = parser_context_pop_token(ctxt);
419    if (token == NULL) {
420        goto out;
421    }
422
423    if (!token_is_operator(token, '{')) {
424        goto out;
425    }
426    token = NULL;
427
428    dict = qdict_new();
429
430    peek = parser_context_peek_token(ctxt);
431    if (peek == NULL) {
432        parse_error(ctxt, NULL, "premature EOI");
433        goto out;
434    }
435
436    if (!token_is_operator(peek, '}')) {
437        if (parse_pair(ctxt, dict, ap) == -1) {
438            goto out;
439        }
440
441        token = parser_context_pop_token(ctxt);
442        if (token == NULL) {
443            parse_error(ctxt, NULL, "premature EOI");
444            goto out;
445        }
446
447        while (!token_is_operator(token, '}')) {
448            if (!token_is_operator(token, ',')) {
449                parse_error(ctxt, token, "expected separator in dict");
450                goto out;
451            }
452            token = NULL;
453
454            if (parse_pair(ctxt, dict, ap) == -1) {
455                goto out;
456            }
457
458            token = parser_context_pop_token(ctxt);
459            if (token == NULL) {
460                parse_error(ctxt, NULL, "premature EOI");
461                goto out;
462            }
463        }
464        token = NULL;
465    } else {
466        token = parser_context_pop_token(ctxt);
467        token = NULL;
468    }
469
470    return QOBJECT(dict);
471
472out:
473    parser_context_restore(ctxt, saved_ctxt);
474    QDECREF(dict);
475    return NULL;
476}
477
478static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
479{
480    QList *list = NULL;
481    QObject *token, *peek;
482    JSONParserContext saved_ctxt = parser_context_save(ctxt);
483
484    token = parser_context_pop_token(ctxt);
485    if (token == NULL) {
486        goto out;
487    }
488
489    if (!token_is_operator(token, '[')) {
490        token = NULL;
491        goto out;
492    }
493    token = NULL;
494
495    list = qlist_new();
496
497    peek = parser_context_peek_token(ctxt);
498    if (peek == NULL) {
499        parse_error(ctxt, NULL, "premature EOI");
500        goto out;
501    }
502
503    if (!token_is_operator(peek, ']')) {
504        QObject *obj;
505
506        obj = parse_value(ctxt, ap);
507        if (obj == NULL) {
508            parse_error(ctxt, token, "expecting value");
509            goto out;
510        }
511
512        qlist_append_obj(list, obj);
513
514        token = parser_context_pop_token(ctxt);
515        if (token == NULL) {
516            parse_error(ctxt, NULL, "premature EOI");
517            goto out;
518        }
519
520        while (!token_is_operator(token, ']')) {
521            if (!token_is_operator(token, ',')) {
522                parse_error(ctxt, token, "expected separator in list");
523                goto out;
524            }
525
526            token = NULL;
527
528            obj = parse_value(ctxt, ap);
529            if (obj == NULL) {
530                parse_error(ctxt, token, "expecting value");
531                goto out;
532            }
533
534            qlist_append_obj(list, obj);
535
536            token = parser_context_pop_token(ctxt);
537            if (token == NULL) {
538                parse_error(ctxt, NULL, "premature EOI");
539                goto out;
540            }
541        }
542
543        token = NULL;
544    } else {
545        token = parser_context_pop_token(ctxt);
546        token = NULL;
547    }
548
549    return QOBJECT(list);
550
551out:
552    parser_context_restore(ctxt, saved_ctxt);
553    QDECREF(list);
554    return NULL;
555}
556
557static QObject *parse_keyword(JSONParserContext *ctxt)
558{
559    QObject *token, *ret;
560    JSONParserContext saved_ctxt = parser_context_save(ctxt);
561
562    token = parser_context_pop_token(ctxt);
563    if (token == NULL) {
564        goto out;
565    }
566
567    if (token_get_type(token) != JSON_KEYWORD) {
568        goto out;
569    }
570
571    if (token_is_keyword(token, "true")) {
572        ret = QOBJECT(qbool_from_int(true));
573    } else if (token_is_keyword(token, "false")) {
574        ret = QOBJECT(qbool_from_int(false));
575    } else {
576        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
577        goto out;
578    }
579
580    return ret;
581
582out:
583    parser_context_restore(ctxt, saved_ctxt);
584
585    return NULL;
586}
587
588static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
589{
590    QObject *token = NULL, *obj;
591    JSONParserContext saved_ctxt = parser_context_save(ctxt);
592
593    if (ap == NULL) {
594        goto out;
595    }
596
597    token = parser_context_pop_token(ctxt);
598    if (token == NULL) {
599        goto out;
600    }
601
602    if (token_is_escape(token, "%p")) {
603        obj = va_arg(*ap, QObject *);
604    } else if (token_is_escape(token, "%i")) {
605        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
606    } else if (token_is_escape(token, "%d")) {
607        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
608    } else if (token_is_escape(token, "%ld")) {
609        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
610    } else if (token_is_escape(token, "%lld") ||
611               token_is_escape(token, "%I64d")) {
612        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
613    } else if (token_is_escape(token, "%s")) {
614        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
615    } else if (token_is_escape(token, "%f")) {
616        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
617    } else {
618        goto out;
619    }
620
621    return obj;
622
623out:
624    parser_context_restore(ctxt, saved_ctxt);
625
626    return NULL;
627}
628
629static QObject *parse_literal(JSONParserContext *ctxt)
630{
631    QObject *token, *obj;
632    JSONParserContext saved_ctxt = parser_context_save(ctxt);
633
634    token = parser_context_pop_token(ctxt);
635    if (token == NULL) {
636        goto out;
637    }
638
639    switch (token_get_type(token)) {
640    case JSON_STRING:
641        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
642        break;
643    case JSON_INTEGER: {
644        /* A possibility exists that this is a whole-valued float where the
645         * fractional part was left out due to being 0 (.0). It's not a big
646         * deal to treat these as ints in the parser, so long as users of the
647         * resulting QObject know to expect a QInt in place of a QFloat in
648         * cases like these.
649         *
650         * However, in some cases these values will overflow/underflow a
651         * QInt/int64 container, thus we should assume these are to be handled
652         * as QFloats/doubles rather than silently changing their values.
653         *
654         * strtoll() indicates these instances by setting errno to ERANGE
655         */
656        int64_t value;
657
658        errno = 0; /* strtoll doesn't set errno on success */
659        value = strtoll(token_get_value(token), NULL, 10);
660        if (errno != ERANGE) {
661            obj = QOBJECT(qint_from_int(value));
662            break;
663        }
664        /* fall through to JSON_FLOAT */
665    }
666    case JSON_FLOAT:
667        /* FIXME dependent on locale */
668        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
669        break;
670    default:
671        goto out;
672    }
673
674    return obj;
675
676out:
677    parser_context_restore(ctxt, saved_ctxt);
678
679    return NULL;
680}
681
682static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
683{
684    QObject *obj;
685
686    obj = parse_object(ctxt, ap);
687    if (obj == NULL) {
688        obj = parse_array(ctxt, ap);
689    }
690    if (obj == NULL) {
691        obj = parse_escape(ctxt, ap);
692    }
693    if (obj == NULL) {
694        obj = parse_keyword(ctxt);
695    }
696    if (obj == NULL) {
697        obj = parse_literal(ctxt);
698    }
699
700    return obj;
701}
702
703QObject *json_parser_parse(QList *tokens, va_list *ap)
704{
705    return json_parser_parse_err(tokens, ap, NULL);
706}
707
708QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
709{
710    JSONParserContext *ctxt = parser_context_new(tokens);
711    QObject *result;
712
713    if (!ctxt) {
714        return NULL;
715    }
716
717    result = parse_value(ctxt, ap);
718
719    error_propagate(errp, ctxt->err);
720
721    parser_context_free(ctxt);
722
723    return result;
724}
725