1#include "Python.h"
2#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define DEFAULT_ENCODING "utf-8"
24
25#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34    PyObject_HEAD
35    PyObject *encoding;
36    PyObject *strict;
37    PyObject *object_hook;
38    PyObject *pairs_hook;
39    PyObject *parse_float;
40    PyObject *parse_int;
41    PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52    {NULL}
53};
54
55typedef struct _PyEncoderObject {
56    PyObject_HEAD
57    PyObject *markers;
58    PyObject *defaultfn;
59    PyObject *encoder;
60    PyObject *indent;
61    PyObject *key_separator;
62    PyObject *item_separator;
63    PyObject *sort_keys;
64    PyObject *skipkeys;
65    int fast_encode;
66    int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78    {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
119_encoded_const(PyObject *obj);
120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144    /* PyObject to Py_ssize_t converter */
145    *size_ptr = PyInt_AsSsize_t(o);
146    if (*size_ptr == -1 && PyErr_Occurred())
147        return 0;
148    return 1;
149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154    /* Py_ssize_t to PyObject converter */
155    return PyInt_FromSsize_t(*size_ptr);
156}
157
158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
161    /* Escape unicode code point c to ASCII escape sequences
162    in char *output. output must have at least 12 bytes unused to
163    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164    output[chars++] = '\\';
165    switch (c) {
166        case '\\': output[chars++] = (char)c; break;
167        case '"': output[chars++] = (char)c; break;
168        case '\b': output[chars++] = 'b'; break;
169        case '\f': output[chars++] = 'f'; break;
170        case '\n': output[chars++] = 'n'; break;
171        case '\r': output[chars++] = 'r'; break;
172        case '\t': output[chars++] = 't'; break;
173        default:
174#ifdef Py_UNICODE_WIDE
175            if (c >= 0x10000) {
176                /* UTF-16 surrogate pair */
177                Py_UNICODE v = c - 0x10000;
178                c = 0xd800 | ((v >> 10) & 0x3ff);
179                output[chars++] = 'u';
180                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
182                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
183                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
184                c = 0xdc00 | (v & 0x3ff);
185                output[chars++] = '\\';
186            }
187#endif
188            output[chars++] = 'u';
189            output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190            output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
191            output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
192            output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
193    }
194    return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
200    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201    Py_ssize_t i;
202    Py_ssize_t input_chars;
203    Py_ssize_t output_size;
204    Py_ssize_t max_output_size;
205    Py_ssize_t chars;
206    PyObject *rval;
207    char *output;
208    Py_UNICODE *input_unicode;
209
210    input_chars = PyUnicode_GET_SIZE(pystr);
211    input_unicode = PyUnicode_AS_UNICODE(pystr);
212
213    /* One char input can be up to 6 chars output, estimate 4 of these */
214    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
215    max_output_size = 2 + (input_chars * MAX_EXPANSION);
216    rval = PyString_FromStringAndSize(NULL, output_size);
217    if (rval == NULL) {
218        return NULL;
219    }
220    output = PyString_AS_STRING(rval);
221    chars = 0;
222    output[chars++] = '"';
223    for (i = 0; i < input_chars; i++) {
224        Py_UNICODE c = input_unicode[i];
225        if (S_CHAR(c)) {
226            output[chars++] = (char)c;
227        }
228        else {
229            chars = ascii_escape_char(c, output, chars);
230        }
231        if (output_size - chars < (1 + MAX_EXPANSION)) {
232            /* There's more than four, so let's resize by a lot */
233            Py_ssize_t new_output_size = output_size * 2;
234            /* This is an upper bound */
235            if (new_output_size > max_output_size) {
236                new_output_size = max_output_size;
237            }
238            /* Make sure that the output size changed before resizing */
239            if (new_output_size != output_size) {
240                output_size = new_output_size;
241                if (_PyString_Resize(&rval, output_size) == -1) {
242                    return NULL;
243                }
244                output = PyString_AS_STRING(rval);
245            }
246        }
247    }
248    output[chars++] = '"';
249    if (_PyString_Resize(&rval, chars) == -1) {
250        return NULL;
251    }
252    return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
258    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
259    Py_ssize_t i;
260    Py_ssize_t input_chars;
261    Py_ssize_t output_size;
262    Py_ssize_t chars;
263    PyObject *rval;
264    char *output;
265    char *input_str;
266
267    input_chars = PyString_GET_SIZE(pystr);
268    input_str = PyString_AS_STRING(pystr);
269
270    /* Fast path for a string that's already ASCII */
271    for (i = 0; i < input_chars; i++) {
272        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273        if (!S_CHAR(c)) {
274            /* If we have to escape something, scan the string for unicode */
275            Py_ssize_t j;
276            for (j = i; j < input_chars; j++) {
277                c = (Py_UNICODE)(unsigned char)input_str[j];
278                if (c > 0x7f) {
279                    /* We hit a non-ASCII character, bail to unicode mode */
280                    PyObject *uni;
281                    uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282                    if (uni == NULL) {
283                        return NULL;
284                    }
285                    rval = ascii_escape_unicode(uni);
286                    Py_DECREF(uni);
287                    return rval;
288                }
289            }
290            break;
291        }
292    }
293
294    if (i == input_chars) {
295        /* Input is already ASCII */
296        output_size = 2 + input_chars;
297    }
298    else {
299        /* One char input can be up to 6 chars output, estimate 4 of these */
300        output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301    }
302    rval = PyString_FromStringAndSize(NULL, output_size);
303    if (rval == NULL) {
304        return NULL;
305    }
306    output = PyString_AS_STRING(rval);
307    output[0] = '"';
308
309    /* We know that everything up to i is ASCII already */
310    chars = i + 1;
311    memcpy(&output[1], input_str, i);
312
313    for (; i < input_chars; i++) {
314        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
315        if (S_CHAR(c)) {
316            output[chars++] = (char)c;
317        }
318        else {
319            chars = ascii_escape_char(c, output, chars);
320        }
321        /* An ASCII char can't possibly expand to a surrogate! */
322        if (output_size - chars < (1 + MIN_EXPANSION)) {
323            /* There's more than four, so let's resize by a lot */
324            output_size *= 2;
325            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326                output_size = 2 + (input_chars * MIN_EXPANSION);
327            }
328            if (_PyString_Resize(&rval, output_size) == -1) {
329                return NULL;
330            }
331            output = PyString_AS_STRING(rval);
332        }
333    }
334    output[chars++] = '"';
335    if (_PyString_Resize(&rval, chars) == -1) {
336        return NULL;
337    }
338    return rval;
339}
340
341static void
342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
344    /* Use the Python function json.decoder.errmsg to raise a nice
345    looking ValueError exception */
346    static PyObject *errmsg_fn = NULL;
347    PyObject *pymsg;
348    if (errmsg_fn == NULL) {
349        PyObject *decoder = PyImport_ImportModule("json.decoder");
350        if (decoder == NULL)
351            return;
352        errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
353        Py_DECREF(decoder);
354        if (errmsg_fn == NULL)
355            return;
356    }
357    pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
358    if (pymsg) {
359        PyErr_SetObject(PyExc_ValueError, pymsg);
360        Py_DECREF(pymsg);
361    }
362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
367    /* return u''.join(lst) */
368    static PyObject *joinfn = NULL;
369    if (joinfn == NULL) {
370        PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371        if (ustr == NULL)
372            return NULL;
373
374        joinfn = PyObject_GetAttrString(ustr, "join");
375        Py_DECREF(ustr);
376        if (joinfn == NULL)
377            return NULL;
378    }
379    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
380}
381
382static PyObject *
383_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384    /* return (rval, idx) tuple, stealing reference to rval */
385    PyObject *tpl;
386    PyObject *pyidx;
387    /*
388    steal a reference to rval, returns (rval, idx)
389    */
390    if (rval == NULL) {
391        return NULL;
392    }
393    pyidx = PyInt_FromSsize_t(idx);
394    if (pyidx == NULL) {
395        Py_DECREF(rval);
396        return NULL;
397    }
398    tpl = PyTuple_New(2);
399    if (tpl == NULL) {
400        Py_DECREF(pyidx);
401        Py_DECREF(rval);
402        return NULL;
403    }
404    PyTuple_SET_ITEM(tpl, 0, rval);
405    PyTuple_SET_ITEM(tpl, 1, pyidx);
406    return tpl;
407}
408
409static PyObject *
410scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411{
412    /* Read the JSON string from PyString pystr.
413    end is the index of the first character after the quote.
414    encoding is the encoding of pystr (must be an ASCII superset)
415    if strict is zero then literal control characters are allowed
416    *next_end_ptr is a return-by-reference index of the character
417        after the end quote
418
419    Return value is a new PyString (if ASCII-only) or PyUnicode
420    */
421    PyObject *rval;
422    Py_ssize_t len = PyString_GET_SIZE(pystr);
423    Py_ssize_t begin = end - 1;
424    Py_ssize_t next;
425    char *buf = PyString_AS_STRING(pystr);
426    PyObject *chunks = PyList_New(0);
427    if (chunks == NULL) {
428        goto bail;
429    }
430    if (end < 0 || len <= end) {
431        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432        goto bail;
433    }
434    while (1) {
435        /* Find the end of the string or the next escape */
436        Py_UNICODE c = 0;
437        PyObject *chunk = NULL;
438        for (next = end; next < len; next++) {
439            c = (unsigned char)buf[next];
440            if (c == '"' || c == '\\') {
441                break;
442            }
443            else if (strict && c <= 0x1f) {
444                raise_errmsg("Invalid control character at", pystr, next);
445                goto bail;
446            }
447        }
448        if (!(c == '"' || c == '\\')) {
449            raise_errmsg("Unterminated string starting at", pystr, begin);
450            goto bail;
451        }
452        /* Pick up this chunk if it's not zero length */
453        if (next != end) {
454            PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
455            if (strchunk == NULL) {
456                goto bail;
457            }
458            chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459            Py_DECREF(strchunk);
460            if (chunk == NULL) {
461                goto bail;
462            }
463            if (PyList_Append(chunks, chunk)) {
464                Py_DECREF(chunk);
465                goto bail;
466            }
467            Py_DECREF(chunk);
468        }
469        next++;
470        if (c == '"') {
471            end = next;
472            break;
473        }
474        if (next == len) {
475            raise_errmsg("Unterminated string starting at", pystr, begin);
476            goto bail;
477        }
478        c = buf[next];
479        if (c != 'u') {
480            /* Non-unicode backslash escapes */
481            end = next + 1;
482            switch (c) {
483                case '"': break;
484                case '\\': break;
485                case '/': break;
486                case 'b': c = '\b'; break;
487                case 'f': c = '\f'; break;
488                case 'n': c = '\n'; break;
489                case 'r': c = '\r'; break;
490                case 't': c = '\t'; break;
491                default: c = 0;
492            }
493            if (c == 0) {
494                raise_errmsg("Invalid \\escape", pystr, end - 2);
495                goto bail;
496            }
497        }
498        else {
499            c = 0;
500            next++;
501            end = next + 4;
502            if (end >= len) {
503                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504                goto bail;
505            }
506            /* Decode 4 hex digits */
507            for (; next < end; next++) {
508                Py_UNICODE digit = buf[next];
509                c <<= 4;
510                switch (digit) {
511                    case '0': case '1': case '2': case '3': case '4':
512                    case '5': case '6': case '7': case '8': case '9':
513                        c |= (digit - '0'); break;
514                    case 'a': case 'b': case 'c': case 'd': case 'e':
515                    case 'f':
516                        c |= (digit - 'a' + 10); break;
517                    case 'A': case 'B': case 'C': case 'D': case 'E':
518                    case 'F':
519                        c |= (digit - 'A' + 10); break;
520                    default:
521                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522                        goto bail;
523                }
524            }
525#ifdef Py_UNICODE_WIDE
526            /* Surrogate pair */
527            if ((c & 0xfc00) == 0xd800) {
528                Py_UNICODE c2 = 0;
529                if (end + 6 >= len) {
530                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
531                    goto bail;
532                }
533                if (buf[next++] != '\\' || buf[next++] != 'u') {
534                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
535                    goto bail;
536                }
537                end += 6;
538                /* Decode 4 hex digits */
539                for (; next < end; next++) {
540                    Py_UNICODE digit = buf[next];
541                    c2 <<= 4;
542                    switch (digit) {
543                        case '0': case '1': case '2': case '3': case '4':
544                        case '5': case '6': case '7': case '8': case '9':
545                            c2 |= (digit - '0'); break;
546                        case 'a': case 'b': case 'c': case 'd': case 'e':
547                        case 'f':
548                            c2 |= (digit - 'a' + 10); break;
549                        case 'A': case 'B': case 'C': case 'D': case 'E':
550                        case 'F':
551                            c2 |= (digit - 'A' + 10); break;
552                        default:
553                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
554                            goto bail;
555                    }
556                }
557                if ((c2 & 0xfc00) != 0xdc00) {
558                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
559                    goto bail;
560                }
561                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
562            }
563            else if ((c & 0xfc00) == 0xdc00) {
564                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
565                goto bail;
566            }
567#endif
568        }
569        chunk = PyUnicode_FromUnicode(&c, 1);
570        if (chunk == NULL) {
571            goto bail;
572        }
573        if (PyList_Append(chunks, chunk)) {
574            Py_DECREF(chunk);
575            goto bail;
576        }
577        Py_DECREF(chunk);
578    }
579
580    rval = join_list_unicode(chunks);
581    if (rval == NULL) {
582        goto bail;
583    }
584    Py_CLEAR(chunks);
585    *next_end_ptr = end;
586    return rval;
587bail:
588    *next_end_ptr = -1;
589    Py_XDECREF(chunks);
590    return NULL;
591}
592
593
594static PyObject *
595scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
596{
597    /* Read the JSON string from PyUnicode pystr.
598    end is the index of the first character after the quote.
599    if strict is zero then literal control characters are allowed
600    *next_end_ptr is a return-by-reference index of the character
601        after the end quote
602
603    Return value is a new PyUnicode
604    */
605    PyObject *rval;
606    Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
607    Py_ssize_t begin = end - 1;
608    Py_ssize_t next;
609    const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
610    PyObject *chunks = PyList_New(0);
611    if (chunks == NULL) {
612        goto bail;
613    }
614    if (end < 0 || len <= end) {
615        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
616        goto bail;
617    }
618    while (1) {
619        /* Find the end of the string or the next escape */
620        Py_UNICODE c = 0;
621        PyObject *chunk = NULL;
622        for (next = end; next < len; next++) {
623            c = buf[next];
624            if (c == '"' || c == '\\') {
625                break;
626            }
627            else if (strict && c <= 0x1f) {
628                raise_errmsg("Invalid control character at", pystr, next);
629                goto bail;
630            }
631        }
632        if (!(c == '"' || c == '\\')) {
633            raise_errmsg("Unterminated string starting at", pystr, begin);
634            goto bail;
635        }
636        /* Pick up this chunk if it's not zero length */
637        if (next != end) {
638            chunk = PyUnicode_FromUnicode(&buf[end], next - end);
639            if (chunk == NULL) {
640                goto bail;
641            }
642            if (PyList_Append(chunks, chunk)) {
643                Py_DECREF(chunk);
644                goto bail;
645            }
646            Py_DECREF(chunk);
647        }
648        next++;
649        if (c == '"') {
650            end = next;
651            break;
652        }
653        if (next == len) {
654            raise_errmsg("Unterminated string starting at", pystr, begin);
655            goto bail;
656        }
657        c = buf[next];
658        if (c != 'u') {
659            /* Non-unicode backslash escapes */
660            end = next + 1;
661            switch (c) {
662                case '"': break;
663                case '\\': break;
664                case '/': break;
665                case 'b': c = '\b'; break;
666                case 'f': c = '\f'; break;
667                case 'n': c = '\n'; break;
668                case 'r': c = '\r'; break;
669                case 't': c = '\t'; break;
670                default: c = 0;
671            }
672            if (c == 0) {
673                raise_errmsg("Invalid \\escape", pystr, end - 2);
674                goto bail;
675            }
676        }
677        else {
678            c = 0;
679            next++;
680            end = next + 4;
681            if (end >= len) {
682                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
683                goto bail;
684            }
685            /* Decode 4 hex digits */
686            for (; next < end; next++) {
687                Py_UNICODE digit = buf[next];
688                c <<= 4;
689                switch (digit) {
690                    case '0': case '1': case '2': case '3': case '4':
691                    case '5': case '6': case '7': case '8': case '9':
692                        c |= (digit - '0'); break;
693                    case 'a': case 'b': case 'c': case 'd': case 'e':
694                    case 'f':
695                        c |= (digit - 'a' + 10); break;
696                    case 'A': case 'B': case 'C': case 'D': case 'E':
697                    case 'F':
698                        c |= (digit - 'A' + 10); break;
699                    default:
700                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
701                        goto bail;
702                }
703            }
704#ifdef Py_UNICODE_WIDE
705            /* Surrogate pair */
706            if ((c & 0xfc00) == 0xd800) {
707                Py_UNICODE c2 = 0;
708                if (end + 6 >= len) {
709                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
710                    goto bail;
711                }
712                if (buf[next++] != '\\' || buf[next++] != 'u') {
713                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
714                    goto bail;
715                }
716                end += 6;
717                /* Decode 4 hex digits */
718                for (; next < end; next++) {
719                    Py_UNICODE digit = buf[next];
720                    c2 <<= 4;
721                    switch (digit) {
722                        case '0': case '1': case '2': case '3': case '4':
723                        case '5': case '6': case '7': case '8': case '9':
724                            c2 |= (digit - '0'); break;
725                        case 'a': case 'b': case 'c': case 'd': case 'e':
726                        case 'f':
727                            c2 |= (digit - 'a' + 10); break;
728                        case 'A': case 'B': case 'C': case 'D': case 'E':
729                        case 'F':
730                            c2 |= (digit - 'A' + 10); break;
731                        default:
732                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
733                            goto bail;
734                    }
735                }
736                if ((c2 & 0xfc00) != 0xdc00) {
737                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
738                    goto bail;
739                }
740                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
741            }
742            else if ((c & 0xfc00) == 0xdc00) {
743                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
744                goto bail;
745            }
746#endif
747        }
748        chunk = PyUnicode_FromUnicode(&c, 1);
749        if (chunk == NULL) {
750            goto bail;
751        }
752        if (PyList_Append(chunks, chunk)) {
753            Py_DECREF(chunk);
754            goto bail;
755        }
756        Py_DECREF(chunk);
757    }
758
759    rval = join_list_unicode(chunks);
760    if (rval == NULL) {
761        goto bail;
762    }
763    Py_DECREF(chunks);
764    *next_end_ptr = end;
765    return rval;
766bail:
767    *next_end_ptr = -1;
768    Py_XDECREF(chunks);
769    return NULL;
770}
771
772PyDoc_STRVAR(pydoc_scanstring,
773    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774    "\n"
775    "Scan the string s for a JSON string. End is the index of the\n"
776    "character in s after the quote that started the JSON string.\n"
777    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778    "on attempt to decode an invalid string. If strict is False then literal\n"
779    "control characters are allowed in the string.\n"
780    "\n"
781    "Returns a tuple of the decoded string and the index of the character in s\n"
782    "after the end quote."
783);
784
785static PyObject *
786py_scanstring(PyObject* self UNUSED, PyObject *args)
787{
788    PyObject *pystr;
789    PyObject *rval;
790    Py_ssize_t end;
791    Py_ssize_t next_end = -1;
792    char *encoding = NULL;
793    int strict = 1;
794    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
795        return NULL;
796    }
797    if (encoding == NULL) {
798        encoding = DEFAULT_ENCODING;
799    }
800    if (PyString_Check(pystr)) {
801        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
802    }
803    else if (PyUnicode_Check(pystr)) {
804        rval = scanstring_unicode(pystr, end, strict, &next_end);
805    }
806    else {
807        PyErr_Format(PyExc_TypeError,
808                     "first argument must be a string, not %.80s",
809                     Py_TYPE(pystr)->tp_name);
810        return NULL;
811    }
812    return _build_rval_index_tuple(rval, next_end);
813}
814
815PyDoc_STRVAR(pydoc_encode_basestring_ascii,
816    "encode_basestring_ascii(basestring) -> str\n"
817    "\n"
818    "Return an ASCII-only JSON representation of a Python string"
819);
820
821static PyObject *
822py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
823{
824    /* Return an ASCII-only JSON representation of a Python string */
825    /* METH_O */
826    if (PyString_Check(pystr)) {
827        return ascii_escape_str(pystr);
828    }
829    else if (PyUnicode_Check(pystr)) {
830        return ascii_escape_unicode(pystr);
831    }
832    else {
833        PyErr_Format(PyExc_TypeError,
834                     "first argument must be a string, not %.80s",
835                     Py_TYPE(pystr)->tp_name);
836        return NULL;
837    }
838}
839
840static void
841scanner_dealloc(PyObject *self)
842{
843    /* Deallocate scanner object */
844    scanner_clear(self);
845    Py_TYPE(self)->tp_free(self);
846}
847
848static int
849scanner_traverse(PyObject *self, visitproc visit, void *arg)
850{
851    PyScannerObject *s;
852    assert(PyScanner_Check(self));
853    s = (PyScannerObject *)self;
854    Py_VISIT(s->encoding);
855    Py_VISIT(s->strict);
856    Py_VISIT(s->object_hook);
857    Py_VISIT(s->pairs_hook);
858    Py_VISIT(s->parse_float);
859    Py_VISIT(s->parse_int);
860    Py_VISIT(s->parse_constant);
861    return 0;
862}
863
864static int
865scanner_clear(PyObject *self)
866{
867    PyScannerObject *s;
868    assert(PyScanner_Check(self));
869    s = (PyScannerObject *)self;
870    Py_CLEAR(s->encoding);
871    Py_CLEAR(s->strict);
872    Py_CLEAR(s->object_hook);
873    Py_CLEAR(s->pairs_hook);
874    Py_CLEAR(s->parse_float);
875    Py_CLEAR(s->parse_int);
876    Py_CLEAR(s->parse_constant);
877    return 0;
878}
879
880static PyObject *
881_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
882    /* Read a JSON object from PyString pystr.
883    idx is the index of the first character after the opening curly brace.
884    *next_idx_ptr is a return-by-reference index to the first character after
885        the closing curly brace.
886
887    Returns a new PyObject (usually a dict, but object_hook can change that)
888    */
889    char *str = PyString_AS_STRING(pystr);
890    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
891    PyObject *rval;
892    PyObject *pairs;
893    PyObject *item;
894    PyObject *key = NULL;
895    PyObject *val = NULL;
896    char *encoding = PyString_AS_STRING(s->encoding);
897    int strict = PyObject_IsTrue(s->strict);
898    Py_ssize_t next_idx;
899
900    pairs = PyList_New(0);
901    if (pairs == NULL)
902        return NULL;
903
904    /* skip whitespace after { */
905    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
906
907    /* only loop if the object is non-empty */
908    if (idx <= end_idx && str[idx] != '}') {
909        while (idx <= end_idx) {
910            /* read key */
911            if (str[idx] != '"') {
912                raise_errmsg("Expecting property name", pystr, idx);
913                goto bail;
914            }
915            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
916            if (key == NULL)
917                goto bail;
918            idx = next_idx;
919
920            /* skip whitespace between key and : delimiter, read :, skip whitespace */
921            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
922            if (idx > end_idx || str[idx] != ':') {
923                raise_errmsg("Expecting : delimiter", pystr, idx);
924                goto bail;
925            }
926            idx++;
927            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
928
929            /* read any JSON data type */
930            val = scan_once_str(s, pystr, idx, &next_idx);
931            if (val == NULL)
932                goto bail;
933
934            item = PyTuple_Pack(2, key, val);
935            if (item == NULL)
936                goto bail;
937            Py_CLEAR(key);
938            Py_CLEAR(val);
939            if (PyList_Append(pairs, item) == -1) {
940                Py_DECREF(item);
941                goto bail;
942            }
943            Py_DECREF(item);
944            idx = next_idx;
945
946            /* skip whitespace before } or , */
947            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
948
949            /* bail if the object is closed or we didn't get the , delimiter */
950            if (idx > end_idx) break;
951            if (str[idx] == '}') {
952                break;
953            }
954            else if (str[idx] != ',') {
955                raise_errmsg("Expecting , delimiter", pystr, idx);
956                goto bail;
957            }
958            idx++;
959
960            /* skip whitespace after , delimiter */
961            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
962        }
963    }
964    /* verify that idx < end_idx, str[idx] should be '}' */
965    if (idx > end_idx || str[idx] != '}') {
966        raise_errmsg("Expecting object", pystr, end_idx);
967        goto bail;
968    }
969
970    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
971    if (s->pairs_hook != Py_None) {
972        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
973        if (val == NULL)
974            goto bail;
975        Py_DECREF(pairs);
976        *next_idx_ptr = idx + 1;
977        return val;
978    }
979
980    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
981                                         pairs, NULL);
982    if (rval == NULL)
983        goto bail;
984    Py_CLEAR(pairs);
985
986    /* if object_hook is not None: rval = object_hook(rval) */
987    if (s->object_hook != Py_None) {
988        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
989        if (val == NULL)
990            goto bail;
991        Py_DECREF(rval);
992        rval = val;
993        val = NULL;
994    }
995    *next_idx_ptr = idx + 1;
996    return rval;
997bail:
998    Py_XDECREF(key);
999    Py_XDECREF(val);
1000    Py_XDECREF(pairs);
1001    return NULL;
1002}
1003
1004static PyObject *
1005_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1006    /* Read a JSON object from PyUnicode pystr.
1007    idx is the index of the first character after the opening curly brace.
1008    *next_idx_ptr is a return-by-reference index to the first character after
1009        the closing curly brace.
1010
1011    Returns a new PyObject (usually a dict, but object_hook can change that)
1012    */
1013    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1014    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1015    PyObject *rval;
1016    PyObject *pairs;
1017    PyObject *item;
1018    PyObject *key = NULL;
1019    PyObject *val = NULL;
1020    int strict = PyObject_IsTrue(s->strict);
1021    Py_ssize_t next_idx;
1022
1023    pairs = PyList_New(0);
1024    if (pairs == NULL)
1025        return NULL;
1026
1027    /* skip whitespace after { */
1028    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1029
1030    /* only loop if the object is non-empty */
1031    if (idx <= end_idx && str[idx] != '}') {
1032        while (idx <= end_idx) {
1033            /* read key */
1034            if (str[idx] != '"') {
1035                raise_errmsg("Expecting property name", pystr, idx);
1036                goto bail;
1037            }
1038            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1039            if (key == NULL)
1040                goto bail;
1041            idx = next_idx;
1042
1043            /* skip whitespace between key and : delimiter, read :, skip whitespace */
1044            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1045            if (idx > end_idx || str[idx] != ':') {
1046                raise_errmsg("Expecting : delimiter", pystr, idx);
1047                goto bail;
1048            }
1049            idx++;
1050            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1051
1052            /* read any JSON term */
1053            val = scan_once_unicode(s, pystr, idx, &next_idx);
1054            if (val == NULL)
1055                goto bail;
1056
1057            item = PyTuple_Pack(2, key, val);
1058            if (item == NULL)
1059                goto bail;
1060            Py_CLEAR(key);
1061            Py_CLEAR(val);
1062            if (PyList_Append(pairs, item) == -1) {
1063                Py_DECREF(item);
1064                goto bail;
1065            }
1066            Py_DECREF(item);
1067            idx = next_idx;
1068
1069            /* skip whitespace before } or , */
1070            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1071
1072            /* bail if the object is closed or we didn't get the , delimiter */
1073            if (idx > end_idx) break;
1074            if (str[idx] == '}') {
1075                break;
1076            }
1077            else if (str[idx] != ',') {
1078                raise_errmsg("Expecting , delimiter", pystr, idx);
1079                goto bail;
1080            }
1081            idx++;
1082
1083            /* skip whitespace after , delimiter */
1084            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1085        }
1086    }
1087
1088    /* verify that idx < end_idx, str[idx] should be '}' */
1089    if (idx > end_idx || str[idx] != '}') {
1090        raise_errmsg("Expecting object", pystr, end_idx);
1091        goto bail;
1092    }
1093
1094    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1095    if (s->pairs_hook != Py_None) {
1096        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1097        if (val == NULL)
1098            goto bail;
1099        Py_DECREF(pairs);
1100        *next_idx_ptr = idx + 1;
1101        return val;
1102    }
1103
1104    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1105                                         pairs, NULL);
1106    if (rval == NULL)
1107        goto bail;
1108    Py_CLEAR(pairs);
1109
1110    /* if object_hook is not None: rval = object_hook(rval) */
1111    if (s->object_hook != Py_None) {
1112        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1113        if (val == NULL)
1114            goto bail;
1115        Py_DECREF(rval);
1116        rval = val;
1117        val = NULL;
1118    }
1119    *next_idx_ptr = idx + 1;
1120    return rval;
1121bail:
1122    Py_XDECREF(key);
1123    Py_XDECREF(val);
1124    Py_XDECREF(pairs);
1125    return NULL;
1126}
1127
1128static PyObject *
1129_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1130    /* Read a JSON array from PyString pystr.
1131    idx is the index of the first character after the opening brace.
1132    *next_idx_ptr is a return-by-reference index to the first character after
1133        the closing brace.
1134
1135    Returns a new PyList
1136    */
1137    char *str = PyString_AS_STRING(pystr);
1138    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1139    PyObject *val = NULL;
1140    PyObject *rval = PyList_New(0);
1141    Py_ssize_t next_idx;
1142    if (rval == NULL)
1143        return NULL;
1144
1145    /* skip whitespace after [ */
1146    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1147
1148    /* only loop if the array is non-empty */
1149    if (idx <= end_idx && str[idx] != ']') {
1150        while (idx <= end_idx) {
1151
1152            /* read any JSON term and de-tuplefy the (rval, idx) */
1153            val = scan_once_str(s, pystr, idx, &next_idx);
1154            if (val == NULL)
1155                goto bail;
1156
1157            if (PyList_Append(rval, val) == -1)
1158                goto bail;
1159
1160            Py_CLEAR(val);
1161            idx = next_idx;
1162
1163            /* skip whitespace between term and , */
1164            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1165
1166            /* bail if the array is closed or we didn't get the , delimiter */
1167            if (idx > end_idx) break;
1168            if (str[idx] == ']') {
1169                break;
1170            }
1171            else if (str[idx] != ',') {
1172                raise_errmsg("Expecting , delimiter", pystr, idx);
1173                goto bail;
1174            }
1175            idx++;
1176
1177            /* skip whitespace after , */
1178            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1179        }
1180    }
1181
1182    /* verify that idx < end_idx, str[idx] should be ']' */
1183    if (idx > end_idx || str[idx] != ']') {
1184        raise_errmsg("Expecting object", pystr, end_idx);
1185        goto bail;
1186    }
1187    *next_idx_ptr = idx + 1;
1188    return rval;
1189bail:
1190    Py_XDECREF(val);
1191    Py_DECREF(rval);
1192    return NULL;
1193}
1194
1195static PyObject *
1196_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1197    /* Read a JSON array from PyString pystr.
1198    idx is the index of the first character after the opening brace.
1199    *next_idx_ptr is a return-by-reference index to the first character after
1200        the closing brace.
1201
1202    Returns a new PyList
1203    */
1204    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1205    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1206    PyObject *val = NULL;
1207    PyObject *rval = PyList_New(0);
1208    Py_ssize_t next_idx;
1209    if (rval == NULL)
1210        return NULL;
1211
1212    /* skip whitespace after [ */
1213    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1214
1215    /* only loop if the array is non-empty */
1216    if (idx <= end_idx && str[idx] != ']') {
1217        while (idx <= end_idx) {
1218
1219            /* read any JSON term  */
1220            val = scan_once_unicode(s, pystr, idx, &next_idx);
1221            if (val == NULL)
1222                goto bail;
1223
1224            if (PyList_Append(rval, val) == -1)
1225                goto bail;
1226
1227            Py_CLEAR(val);
1228            idx = next_idx;
1229
1230            /* skip whitespace between term and , */
1231            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232
1233            /* bail if the array is closed or we didn't get the , delimiter */
1234            if (idx > end_idx) break;
1235            if (str[idx] == ']') {
1236                break;
1237            }
1238            else if (str[idx] != ',') {
1239                raise_errmsg("Expecting , delimiter", pystr, idx);
1240                goto bail;
1241            }
1242            idx++;
1243
1244            /* skip whitespace after , */
1245            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1246        }
1247    }
1248
1249    /* verify that idx < end_idx, str[idx] should be ']' */
1250    if (idx > end_idx || str[idx] != ']') {
1251        raise_errmsg("Expecting object", pystr, end_idx);
1252        goto bail;
1253    }
1254    *next_idx_ptr = idx + 1;
1255    return rval;
1256bail:
1257    Py_XDECREF(val);
1258    Py_DECREF(rval);
1259    return NULL;
1260}
1261
1262static PyObject *
1263_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1264    /* Read a JSON constant from PyString pystr.
1265    constant is the constant string that was found
1266        ("NaN", "Infinity", "-Infinity").
1267    idx is the index of the first character of the constant
1268    *next_idx_ptr is a return-by-reference index to the first character after
1269        the constant.
1270
1271    Returns the result of parse_constant
1272    */
1273    PyObject *cstr;
1274    PyObject *rval;
1275    /* constant is "NaN", "Infinity", or "-Infinity" */
1276    cstr = PyString_InternFromString(constant);
1277    if (cstr == NULL)
1278        return NULL;
1279
1280    /* rval = parse_constant(constant) */
1281    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1282    idx += PyString_GET_SIZE(cstr);
1283    Py_DECREF(cstr);
1284    *next_idx_ptr = idx;
1285    return rval;
1286}
1287
1288static PyObject *
1289_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1290    /* Read a JSON number from PyString pystr.
1291    idx is the index of the first character of the number
1292    *next_idx_ptr is a return-by-reference index to the first character after
1293        the number.
1294
1295    Returns a new PyObject representation of that number:
1296        PyInt, PyLong, or PyFloat.
1297        May return other types if parse_int or parse_float are set
1298    */
1299    char *str = PyString_AS_STRING(pystr);
1300    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1301    Py_ssize_t idx = start;
1302    int is_float = 0;
1303    PyObject *rval;
1304    PyObject *numstr;
1305
1306    /* read a sign if it's there, make sure it's not the end of the string */
1307    if (str[idx] == '-') {
1308        idx++;
1309        if (idx > end_idx) {
1310            PyErr_SetNone(PyExc_StopIteration);
1311            return NULL;
1312        }
1313    }
1314
1315    /* read as many integer digits as we find as long as it doesn't start with 0 */
1316    if (str[idx] >= '1' && str[idx] <= '9') {
1317        idx++;
1318        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1319    }
1320    /* if it starts with 0 we only expect one integer digit */
1321    else if (str[idx] == '0') {
1322        idx++;
1323    }
1324    /* no integer digits, error */
1325    else {
1326        PyErr_SetNone(PyExc_StopIteration);
1327        return NULL;
1328    }
1329
1330    /* if the next char is '.' followed by a digit then read all float digits */
1331    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1332        is_float = 1;
1333        idx += 2;
1334        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1335    }
1336
1337    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1338    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1339
1340        /* save the index of the 'e' or 'E' just in case we need to backtrack */
1341        Py_ssize_t e_start = idx;
1342        idx++;
1343
1344        /* read an exponent sign if present */
1345        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1346
1347        /* read all digits */
1348        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1349
1350        /* if we got a digit, then parse as float. if not, backtrack */
1351        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1352            is_float = 1;
1353        }
1354        else {
1355            idx = e_start;
1356        }
1357    }
1358
1359    /* copy the section we determined to be a number */
1360    numstr = PyString_FromStringAndSize(&str[start], idx - start);
1361    if (numstr == NULL)
1362        return NULL;
1363    if (is_float) {
1364        /* parse as a float using a fast path if available, otherwise call user defined method */
1365        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1366            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1367        }
1368        else {
1369            double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1370                                             NULL, NULL);
1371            if (d == -1.0 && PyErr_Occurred())
1372                return NULL;
1373            rval = PyFloat_FromDouble(d);
1374        }
1375    }
1376    else {
1377        /* parse as an int using a fast path if available, otherwise call user defined method */
1378        if (s->parse_int != (PyObject *)&PyInt_Type) {
1379            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1380        }
1381        else {
1382            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1383        }
1384    }
1385    Py_DECREF(numstr);
1386    *next_idx_ptr = idx;
1387    return rval;
1388}
1389
1390static PyObject *
1391_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1392    /* Read a JSON number from PyUnicode pystr.
1393    idx is the index of the first character of the number
1394    *next_idx_ptr is a return-by-reference index to the first character after
1395        the number.
1396
1397    Returns a new PyObject representation of that number:
1398        PyInt, PyLong, or PyFloat.
1399        May return other types if parse_int or parse_float are set
1400    */
1401    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1402    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1403    Py_ssize_t idx = start;
1404    int is_float = 0;
1405    PyObject *rval;
1406    PyObject *numstr;
1407
1408    /* read a sign if it's there, make sure it's not the end of the string */
1409    if (str[idx] == '-') {
1410        idx++;
1411        if (idx > end_idx) {
1412            PyErr_SetNone(PyExc_StopIteration);
1413            return NULL;
1414        }
1415    }
1416
1417    /* read as many integer digits as we find as long as it doesn't start with 0 */
1418    if (str[idx] >= '1' && str[idx] <= '9') {
1419        idx++;
1420        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1421    }
1422    /* if it starts with 0 we only expect one integer digit */
1423    else if (str[idx] == '0') {
1424        idx++;
1425    }
1426    /* no integer digits, error */
1427    else {
1428        PyErr_SetNone(PyExc_StopIteration);
1429        return NULL;
1430    }
1431
1432    /* if the next char is '.' followed by a digit then read all float digits */
1433    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1434        is_float = 1;
1435        idx += 2;
1436        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1437    }
1438
1439    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1440    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1441        Py_ssize_t e_start = idx;
1442        idx++;
1443
1444        /* read an exponent sign if present */
1445        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1446
1447        /* read all digits */
1448        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1449
1450        /* if we got a digit, then parse as float. if not, backtrack */
1451        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1452            is_float = 1;
1453        }
1454        else {
1455            idx = e_start;
1456        }
1457    }
1458
1459    /* copy the section we determined to be a number */
1460    numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1461    if (numstr == NULL)
1462        return NULL;
1463    if (is_float) {
1464        /* parse as a float using a fast path if available, otherwise call user defined method */
1465        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1466            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1467        }
1468        else {
1469            rval = PyFloat_FromString(numstr, NULL);
1470        }
1471    }
1472    else {
1473        /* no fast path for unicode -> int, just call */
1474        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1475    }
1476    Py_DECREF(numstr);
1477    *next_idx_ptr = idx;
1478    return rval;
1479}
1480
1481static PyObject *
1482scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1483{
1484    /* Read one JSON term (of any kind) from PyString pystr.
1485    idx is the index of the first character of the term
1486    *next_idx_ptr is a return-by-reference index to the first character after
1487        the number.
1488
1489    Returns a new PyObject representation of the term.
1490    */
1491    PyObject *res;
1492    char *str = PyString_AS_STRING(pystr);
1493    Py_ssize_t length = PyString_GET_SIZE(pystr);
1494    if (idx >= length) {
1495        PyErr_SetNone(PyExc_StopIteration);
1496        return NULL;
1497    }
1498    switch (str[idx]) {
1499        case '"':
1500            /* string */
1501            return scanstring_str(pystr, idx + 1,
1502                PyString_AS_STRING(s->encoding),
1503                PyObject_IsTrue(s->strict),
1504                next_idx_ptr);
1505        case '{':
1506            /* object */
1507            if (Py_EnterRecursiveCall(" while decoding a JSON object "
1508                                      "from a byte string"))
1509                return NULL;
1510            res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1511            Py_LeaveRecursiveCall();
1512            return res;
1513        case '[':
1514            /* array */
1515            if (Py_EnterRecursiveCall(" while decoding a JSON array "
1516                                      "from a byte string"))
1517                return NULL;
1518            res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1519            Py_LeaveRecursiveCall();
1520            return res;
1521        case 'n':
1522            /* null */
1523            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1524                Py_INCREF(Py_None);
1525                *next_idx_ptr = idx + 4;
1526                return Py_None;
1527            }
1528            break;
1529        case 't':
1530            /* true */
1531            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1532                Py_INCREF(Py_True);
1533                *next_idx_ptr = idx + 4;
1534                return Py_True;
1535            }
1536            break;
1537        case 'f':
1538            /* false */
1539            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1540                Py_INCREF(Py_False);
1541                *next_idx_ptr = idx + 5;
1542                return Py_False;
1543            }
1544            break;
1545        case 'N':
1546            /* NaN */
1547            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1548                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1549            }
1550            break;
1551        case 'I':
1552            /* Infinity */
1553            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1554                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1555            }
1556            break;
1557        case '-':
1558            /* -Infinity */
1559            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1560                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1561            }
1562            break;
1563    }
1564    /* Didn't find a string, object, array, or named constant. Look for a number. */
1565    return _match_number_str(s, pystr, idx, next_idx_ptr);
1566}
1567
1568static PyObject *
1569scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1570{
1571    /* Read one JSON term (of any kind) from PyUnicode pystr.
1572    idx is the index of the first character of the term
1573    *next_idx_ptr is a return-by-reference index to the first character after
1574        the number.
1575
1576    Returns a new PyObject representation of the term.
1577    */
1578    PyObject *res;
1579    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1580    Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1581    if (idx >= length) {
1582        PyErr_SetNone(PyExc_StopIteration);
1583        return NULL;
1584    }
1585    switch (str[idx]) {
1586        case '"':
1587            /* string */
1588            return scanstring_unicode(pystr, idx + 1,
1589                PyObject_IsTrue(s->strict),
1590                next_idx_ptr);
1591        case '{':
1592            /* object */
1593            if (Py_EnterRecursiveCall(" while decoding a JSON object "
1594                                      "from a unicode string"))
1595                return NULL;
1596            res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1597            Py_LeaveRecursiveCall();
1598            return res;
1599        case '[':
1600            /* array */
1601            if (Py_EnterRecursiveCall(" while decoding a JSON array "
1602                                      "from a unicode string"))
1603                return NULL;
1604            res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1605            Py_LeaveRecursiveCall();
1606            return res;
1607        case 'n':
1608            /* null */
1609            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1610                Py_INCREF(Py_None);
1611                *next_idx_ptr = idx + 4;
1612                return Py_None;
1613            }
1614            break;
1615        case 't':
1616            /* true */
1617            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1618                Py_INCREF(Py_True);
1619                *next_idx_ptr = idx + 4;
1620                return Py_True;
1621            }
1622            break;
1623        case 'f':
1624            /* false */
1625            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1626                Py_INCREF(Py_False);
1627                *next_idx_ptr = idx + 5;
1628                return Py_False;
1629            }
1630            break;
1631        case 'N':
1632            /* NaN */
1633            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1634                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1635            }
1636            break;
1637        case 'I':
1638            /* Infinity */
1639            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1640                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1641            }
1642            break;
1643        case '-':
1644            /* -Infinity */
1645            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1646                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1647            }
1648            break;
1649    }
1650    /* Didn't find a string, object, array, or named constant. Look for a number. */
1651    return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1652}
1653
1654static PyObject *
1655scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1656{
1657    /* Python callable interface to scan_once_{str,unicode} */
1658    PyObject *pystr;
1659    PyObject *rval;
1660    Py_ssize_t idx;
1661    Py_ssize_t next_idx = -1;
1662    static char *kwlist[] = {"string", "idx", NULL};
1663    PyScannerObject *s;
1664    assert(PyScanner_Check(self));
1665    s = (PyScannerObject *)self;
1666    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1667        return NULL;
1668
1669    if (PyString_Check(pystr)) {
1670        rval = scan_once_str(s, pystr, idx, &next_idx);
1671    }
1672    else if (PyUnicode_Check(pystr)) {
1673        rval = scan_once_unicode(s, pystr, idx, &next_idx);
1674    }
1675    else {
1676        PyErr_Format(PyExc_TypeError,
1677                 "first argument must be a string, not %.80s",
1678                 Py_TYPE(pystr)->tp_name);
1679        return NULL;
1680    }
1681    return _build_rval_index_tuple(rval, next_idx);
1682}
1683
1684static PyObject *
1685scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1686{
1687    PyScannerObject *s;
1688    s = (PyScannerObject *)type->tp_alloc(type, 0);
1689    if (s != NULL) {
1690        s->encoding = NULL;
1691        s->strict = NULL;
1692        s->object_hook = NULL;
1693        s->pairs_hook = NULL;
1694        s->parse_float = NULL;
1695        s->parse_int = NULL;
1696        s->parse_constant = NULL;
1697    }
1698    return (PyObject *)s;
1699}
1700
1701static int
1702scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1703{
1704    /* Initialize Scanner object */
1705    PyObject *ctx;
1706    static char *kwlist[] = {"context", NULL};
1707    PyScannerObject *s;
1708
1709    assert(PyScanner_Check(self));
1710    s = (PyScannerObject *)self;
1711
1712    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1713        return -1;
1714
1715    /* PyString_AS_STRING is used on encoding */
1716    s->encoding = PyObject_GetAttrString(ctx, "encoding");
1717    if (s->encoding == NULL)
1718        goto bail;
1719    if (s->encoding == Py_None) {
1720        Py_DECREF(Py_None);
1721        s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1722    }
1723    else if (PyUnicode_Check(s->encoding)) {
1724        PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1725        Py_DECREF(s->encoding);
1726        s->encoding = tmp;
1727    }
1728    if (s->encoding == NULL || !PyString_Check(s->encoding))
1729        goto bail;
1730
1731    /* All of these will fail "gracefully" so we don't need to verify them */
1732    s->strict = PyObject_GetAttrString(ctx, "strict");
1733    if (s->strict == NULL)
1734        goto bail;
1735    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1736    if (s->object_hook == NULL)
1737        goto bail;
1738    s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1739    if (s->pairs_hook == NULL)
1740        goto bail;
1741    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1742    if (s->parse_float == NULL)
1743        goto bail;
1744    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1745    if (s->parse_int == NULL)
1746        goto bail;
1747    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1748    if (s->parse_constant == NULL)
1749        goto bail;
1750
1751    return 0;
1752
1753bail:
1754    Py_CLEAR(s->encoding);
1755    Py_CLEAR(s->strict);
1756    Py_CLEAR(s->object_hook);
1757    Py_CLEAR(s->pairs_hook);
1758    Py_CLEAR(s->parse_float);
1759    Py_CLEAR(s->parse_int);
1760    Py_CLEAR(s->parse_constant);
1761    return -1;
1762}
1763
1764PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1765
1766static
1767PyTypeObject PyScannerType = {
1768    PyObject_HEAD_INIT(NULL)
1769    0,                    /* tp_internal */
1770    "_json.Scanner",       /* tp_name */
1771    sizeof(PyScannerObject), /* tp_basicsize */
1772    0,                    /* tp_itemsize */
1773    scanner_dealloc, /* tp_dealloc */
1774    0,                    /* tp_print */
1775    0,                    /* tp_getattr */
1776    0,                    /* tp_setattr */
1777    0,                    /* tp_compare */
1778    0,                    /* tp_repr */
1779    0,                    /* tp_as_number */
1780    0,                    /* tp_as_sequence */
1781    0,                    /* tp_as_mapping */
1782    0,                    /* tp_hash */
1783    scanner_call,         /* tp_call */
1784    0,                    /* tp_str */
1785    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1786    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1787    0,                    /* tp_as_buffer */
1788    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1789    scanner_doc,          /* tp_doc */
1790    scanner_traverse,                    /* tp_traverse */
1791    scanner_clear,                    /* tp_clear */
1792    0,                    /* tp_richcompare */
1793    0,                    /* tp_weaklistoffset */
1794    0,                    /* tp_iter */
1795    0,                    /* tp_iternext */
1796    0,                    /* tp_methods */
1797    scanner_members,                    /* tp_members */
1798    0,                    /* tp_getset */
1799    0,                    /* tp_base */
1800    0,                    /* tp_dict */
1801    0,                    /* tp_descr_get */
1802    0,                    /* tp_descr_set */
1803    0,                    /* tp_dictoffset */
1804    scanner_init,                    /* tp_init */
1805    0,/* PyType_GenericAlloc, */        /* tp_alloc */
1806    scanner_new,          /* tp_new */
1807    0,/* PyObject_GC_Del, */              /* tp_free */
1808};
1809
1810static PyObject *
1811encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1812{
1813    PyEncoderObject *s;
1814    s = (PyEncoderObject *)type->tp_alloc(type, 0);
1815    if (s != NULL) {
1816        s->markers = NULL;
1817        s->defaultfn = NULL;
1818        s->encoder = NULL;
1819        s->indent = NULL;
1820        s->key_separator = NULL;
1821        s->item_separator = NULL;
1822        s->sort_keys = NULL;
1823        s->skipkeys = NULL;
1824    }
1825    return (PyObject *)s;
1826}
1827
1828static int
1829encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1830{
1831    /* initialize Encoder object */
1832    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1833
1834    PyEncoderObject *s;
1835    PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1836    PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1837
1838    assert(PyEncoder_Check(self));
1839    s = (PyEncoderObject *)self;
1840
1841    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1842        &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1843        &sort_keys, &skipkeys, &allow_nan))
1844        return -1;
1845
1846    s->markers = markers;
1847    s->defaultfn = defaultfn;
1848    s->encoder = encoder;
1849    s->indent = indent;
1850    s->key_separator = key_separator;
1851    s->item_separator = item_separator;
1852    s->sort_keys = sort_keys;
1853    s->skipkeys = skipkeys;
1854    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1855    s->allow_nan = PyObject_IsTrue(allow_nan);
1856
1857    Py_INCREF(s->markers);
1858    Py_INCREF(s->defaultfn);
1859    Py_INCREF(s->encoder);
1860    Py_INCREF(s->indent);
1861    Py_INCREF(s->key_separator);
1862    Py_INCREF(s->item_separator);
1863    Py_INCREF(s->sort_keys);
1864    Py_INCREF(s->skipkeys);
1865    return 0;
1866}
1867
1868static PyObject *
1869encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1870{
1871    /* Python callable interface to encode_listencode_obj */
1872    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1873    PyObject *obj;
1874    PyObject *rval;
1875    Py_ssize_t indent_level;
1876    PyEncoderObject *s;
1877    assert(PyEncoder_Check(self));
1878    s = (PyEncoderObject *)self;
1879    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1880        &obj, _convertPyInt_AsSsize_t, &indent_level))
1881        return NULL;
1882    rval = PyList_New(0);
1883    if (rval == NULL)
1884        return NULL;
1885    if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1886        Py_DECREF(rval);
1887        return NULL;
1888    }
1889    return rval;
1890}
1891
1892static PyObject *
1893_encoded_const(PyObject *obj)
1894{
1895    /* Return the JSON string representation of None, True, False */
1896    if (obj == Py_None) {
1897        static PyObject *s_null = NULL;
1898        if (s_null == NULL) {
1899            s_null = PyString_InternFromString("null");
1900        }
1901        Py_INCREF(s_null);
1902        return s_null;
1903    }
1904    else if (obj == Py_True) {
1905        static PyObject *s_true = NULL;
1906        if (s_true == NULL) {
1907            s_true = PyString_InternFromString("true");
1908        }
1909        Py_INCREF(s_true);
1910        return s_true;
1911    }
1912    else if (obj == Py_False) {
1913        static PyObject *s_false = NULL;
1914        if (s_false == NULL) {
1915            s_false = PyString_InternFromString("false");
1916        }
1917        Py_INCREF(s_false);
1918        return s_false;
1919    }
1920    else {
1921        PyErr_SetString(PyExc_ValueError, "not a const");
1922        return NULL;
1923    }
1924}
1925
1926static PyObject *
1927encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1928{
1929    /* Return the JSON representation of a PyFloat */
1930    double i = PyFloat_AS_DOUBLE(obj);
1931    if (!Py_IS_FINITE(i)) {
1932        if (!s->allow_nan) {
1933            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1934            return NULL;
1935        }
1936        if (i > 0) {
1937            return PyString_FromString("Infinity");
1938        }
1939        else if (i < 0) {
1940            return PyString_FromString("-Infinity");
1941        }
1942        else {
1943            return PyString_FromString("NaN");
1944        }
1945    }
1946    /* Use a better float format here? */
1947    return PyObject_Repr(obj);
1948}
1949
1950static PyObject *
1951encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1952{
1953    /* Return the JSON representation of a string */
1954    if (s->fast_encode)
1955        return py_encode_basestring_ascii(NULL, obj);
1956    else
1957        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1958}
1959
1960static int
1961_steal_list_append(PyObject *lst, PyObject *stolen)
1962{
1963    /* Append stolen and then decrement its reference count */
1964    int rval = PyList_Append(lst, stolen);
1965    Py_DECREF(stolen);
1966    return rval;
1967}
1968
1969static int
1970encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1971{
1972    /* Encode Python object obj to a JSON term, rval is a PyList */
1973    PyObject *newobj;
1974    int rv;
1975
1976    if (obj == Py_None || obj == Py_True || obj == Py_False) {
1977        PyObject *cstr = _encoded_const(obj);
1978        if (cstr == NULL)
1979            return -1;
1980        return _steal_list_append(rval, cstr);
1981    }
1982    else if (PyString_Check(obj) || PyUnicode_Check(obj))
1983    {
1984        PyObject *encoded = encoder_encode_string(s, obj);
1985        if (encoded == NULL)
1986            return -1;
1987        return _steal_list_append(rval, encoded);
1988    }
1989    else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1990        PyObject *encoded = PyObject_Str(obj);
1991        if (encoded == NULL)
1992            return -1;
1993        return _steal_list_append(rval, encoded);
1994    }
1995    else if (PyFloat_Check(obj)) {
1996        PyObject *encoded = encoder_encode_float(s, obj);
1997        if (encoded == NULL)
1998            return -1;
1999        return _steal_list_append(rval, encoded);
2000    }
2001    else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2002        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2003            return -1;
2004        rv = encoder_listencode_list(s, rval, obj, indent_level);
2005        Py_LeaveRecursiveCall();
2006        return rv;
2007    }
2008    else if (PyDict_Check(obj)) {
2009        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2010            return -1;
2011        rv = encoder_listencode_dict(s, rval, obj, indent_level);
2012        Py_LeaveRecursiveCall();
2013        return rv;
2014    }
2015    else {
2016        PyObject *ident = NULL;
2017        if (s->markers != Py_None) {
2018            int has_key;
2019            ident = PyLong_FromVoidPtr(obj);
2020            if (ident == NULL)
2021                return -1;
2022            has_key = PyDict_Contains(s->markers, ident);
2023            if (has_key) {
2024                if (has_key != -1)
2025                    PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2026                Py_DECREF(ident);
2027                return -1;
2028            }
2029            if (PyDict_SetItem(s->markers, ident, obj)) {
2030                Py_DECREF(ident);
2031                return -1;
2032            }
2033        }
2034        newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2035        if (newobj == NULL) {
2036            Py_XDECREF(ident);
2037            return -1;
2038        }
2039
2040        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2041            return -1;
2042        rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2043        Py_LeaveRecursiveCall();
2044
2045        Py_DECREF(newobj);
2046        if (rv) {
2047            Py_XDECREF(ident);
2048            return -1;
2049        }
2050        if (ident != NULL) {
2051            if (PyDict_DelItem(s->markers, ident)) {
2052                Py_XDECREF(ident);
2053                return -1;
2054            }
2055            Py_XDECREF(ident);
2056        }
2057        return rv;
2058    }
2059}
2060
2061static int
2062encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2063{
2064    /* Encode Python dict dct a JSON term, rval is a PyList */
2065    static PyObject *open_dict = NULL;
2066    static PyObject *close_dict = NULL;
2067    static PyObject *empty_dict = NULL;
2068    PyObject *kstr = NULL;
2069    PyObject *ident = NULL;
2070    PyObject *key = NULL;
2071    PyObject *value = NULL;
2072    PyObject *it = NULL;
2073    int skipkeys;
2074    Py_ssize_t idx;
2075
2076    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2077        open_dict = PyString_InternFromString("{");
2078        close_dict = PyString_InternFromString("}");
2079        empty_dict = PyString_InternFromString("{}");
2080        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2081            return -1;
2082    }
2083    if (Py_SIZE(dct) == 0)
2084        return PyList_Append(rval, empty_dict);
2085
2086    if (s->markers != Py_None) {
2087        int has_key;
2088        ident = PyLong_FromVoidPtr(dct);
2089        if (ident == NULL)
2090            goto bail;
2091        has_key = PyDict_Contains(s->markers, ident);
2092        if (has_key) {
2093            if (has_key != -1)
2094                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2095            goto bail;
2096        }
2097        if (PyDict_SetItem(s->markers, ident, dct)) {
2098            goto bail;
2099        }
2100    }
2101
2102    if (PyList_Append(rval, open_dict))
2103        goto bail;
2104
2105    if (s->indent != Py_None) {
2106        /* TODO: DOES NOT RUN */
2107        indent_level += 1;
2108        /*
2109            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2110            separator = _item_separator + newline_indent
2111            buf += newline_indent
2112        */
2113    }
2114
2115    /* TODO: C speedup not implemented for sort_keys */
2116
2117    it = PyObject_GetIter(dct);
2118    if (it == NULL)
2119        goto bail;
2120    skipkeys = PyObject_IsTrue(s->skipkeys);
2121    idx = 0;
2122    while ((key = PyIter_Next(it)) != NULL) {
2123        PyObject *encoded;
2124
2125        if (PyString_Check(key) || PyUnicode_Check(key)) {
2126            Py_INCREF(key);
2127            kstr = key;
2128        }
2129        else if (PyFloat_Check(key)) {
2130            kstr = encoder_encode_float(s, key);
2131            if (kstr == NULL)
2132                goto bail;
2133        }
2134        else if (PyInt_Check(key) || PyLong_Check(key)) {
2135            kstr = PyObject_Str(key);
2136            if (kstr == NULL)
2137                goto bail;
2138        }
2139        else if (key == Py_True || key == Py_False || key == Py_None) {
2140            kstr = _encoded_const(key);
2141            if (kstr == NULL)
2142                goto bail;
2143        }
2144        else if (skipkeys) {
2145            Py_DECREF(key);
2146            continue;
2147        }
2148        else {
2149            /* TODO: include repr of key */
2150            PyErr_SetString(PyExc_TypeError, "keys must be a string");
2151            goto bail;
2152        }
2153
2154        if (idx) {
2155            if (PyList_Append(rval, s->item_separator))
2156                goto bail;
2157        }
2158
2159        value = PyObject_GetItem(dct, key);
2160        if (value == NULL)
2161            goto bail;
2162
2163        encoded = encoder_encode_string(s, kstr);
2164        Py_CLEAR(kstr);
2165        if (encoded == NULL)
2166            goto bail;
2167        if (PyList_Append(rval, encoded)) {
2168            Py_DECREF(encoded);
2169            goto bail;
2170        }
2171        Py_DECREF(encoded);
2172        if (PyList_Append(rval, s->key_separator))
2173            goto bail;
2174        if (encoder_listencode_obj(s, rval, value, indent_level))
2175            goto bail;
2176        idx += 1;
2177        Py_CLEAR(value);
2178        Py_DECREF(key);
2179    }
2180    if (PyErr_Occurred())
2181        goto bail;
2182    Py_CLEAR(it);
2183
2184    if (ident != NULL) {
2185        if (PyDict_DelItem(s->markers, ident))
2186            goto bail;
2187        Py_CLEAR(ident);
2188    }
2189    if (s->indent != Py_None) {
2190        /* TODO: DOES NOT RUN */
2191        /*
2192            indent_level -= 1;
2193
2194            yield '\n' + (' ' * (_indent * _current_indent_level))
2195        */
2196    }
2197    if (PyList_Append(rval, close_dict))
2198        goto bail;
2199    return 0;
2200
2201bail:
2202    Py_XDECREF(it);
2203    Py_XDECREF(key);
2204    Py_XDECREF(value);
2205    Py_XDECREF(kstr);
2206    Py_XDECREF(ident);
2207    return -1;
2208}
2209
2210
2211static int
2212encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2213{
2214    /* Encode Python list seq to a JSON term, rval is a PyList */
2215    static PyObject *open_array = NULL;
2216    static PyObject *close_array = NULL;
2217    static PyObject *empty_array = NULL;
2218    PyObject *ident = NULL;
2219    PyObject *s_fast = NULL;
2220    Py_ssize_t num_items;
2221    PyObject **seq_items;
2222    Py_ssize_t i;
2223
2224    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2225        open_array = PyString_InternFromString("[");
2226        close_array = PyString_InternFromString("]");
2227        empty_array = PyString_InternFromString("[]");
2228        if (open_array == NULL || close_array == NULL || empty_array == NULL)
2229            return -1;
2230    }
2231    ident = NULL;
2232    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2233    if (s_fast == NULL)
2234        return -1;
2235    num_items = PySequence_Fast_GET_SIZE(s_fast);
2236    if (num_items == 0) {
2237        Py_DECREF(s_fast);
2238        return PyList_Append(rval, empty_array);
2239    }
2240
2241    if (s->markers != Py_None) {
2242        int has_key;
2243        ident = PyLong_FromVoidPtr(seq);
2244        if (ident == NULL)
2245            goto bail;
2246        has_key = PyDict_Contains(s->markers, ident);
2247        if (has_key) {
2248            if (has_key != -1)
2249                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2250            goto bail;
2251        }
2252        if (PyDict_SetItem(s->markers, ident, seq)) {
2253            goto bail;
2254        }
2255    }
2256
2257    seq_items = PySequence_Fast_ITEMS(s_fast);
2258    if (PyList_Append(rval, open_array))
2259        goto bail;
2260    if (s->indent != Py_None) {
2261        /* TODO: DOES NOT RUN */
2262        indent_level += 1;
2263        /*
2264            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2265            separator = _item_separator + newline_indent
2266            buf += newline_indent
2267        */
2268    }
2269    for (i = 0; i < num_items; i++) {
2270        PyObject *obj = seq_items[i];
2271        if (i) {
2272            if (PyList_Append(rval, s->item_separator))
2273                goto bail;
2274        }
2275        if (encoder_listencode_obj(s, rval, obj, indent_level))
2276            goto bail;
2277    }
2278    if (ident != NULL) {
2279        if (PyDict_DelItem(s->markers, ident))
2280            goto bail;
2281        Py_CLEAR(ident);
2282    }
2283    if (s->indent != Py_None) {
2284        /* TODO: DOES NOT RUN */
2285        /*
2286            indent_level -= 1;
2287
2288            yield '\n' + (' ' * (_indent * _current_indent_level))
2289        */
2290    }
2291    if (PyList_Append(rval, close_array))
2292        goto bail;
2293    Py_DECREF(s_fast);
2294    return 0;
2295
2296bail:
2297    Py_XDECREF(ident);
2298    Py_DECREF(s_fast);
2299    return -1;
2300}
2301
2302static void
2303encoder_dealloc(PyObject *self)
2304{
2305    /* Deallocate Encoder */
2306    encoder_clear(self);
2307    Py_TYPE(self)->tp_free(self);
2308}
2309
2310static int
2311encoder_traverse(PyObject *self, visitproc visit, void *arg)
2312{
2313    PyEncoderObject *s;
2314    assert(PyEncoder_Check(self));
2315    s = (PyEncoderObject *)self;
2316    Py_VISIT(s->markers);
2317    Py_VISIT(s->defaultfn);
2318    Py_VISIT(s->encoder);
2319    Py_VISIT(s->indent);
2320    Py_VISIT(s->key_separator);
2321    Py_VISIT(s->item_separator);
2322    Py_VISIT(s->sort_keys);
2323    Py_VISIT(s->skipkeys);
2324    return 0;
2325}
2326
2327static int
2328encoder_clear(PyObject *self)
2329{
2330    /* Deallocate Encoder */
2331    PyEncoderObject *s;
2332    assert(PyEncoder_Check(self));
2333    s = (PyEncoderObject *)self;
2334    Py_CLEAR(s->markers);
2335    Py_CLEAR(s->defaultfn);
2336    Py_CLEAR(s->encoder);
2337    Py_CLEAR(s->indent);
2338    Py_CLEAR(s->key_separator);
2339    Py_CLEAR(s->item_separator);
2340    Py_CLEAR(s->sort_keys);
2341    Py_CLEAR(s->skipkeys);
2342    return 0;
2343}
2344
2345PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2346
2347static
2348PyTypeObject PyEncoderType = {
2349    PyObject_HEAD_INIT(NULL)
2350    0,                    /* tp_internal */
2351    "_json.Encoder",       /* tp_name */
2352    sizeof(PyEncoderObject), /* tp_basicsize */
2353    0,                    /* tp_itemsize */
2354    encoder_dealloc, /* tp_dealloc */
2355    0,                    /* tp_print */
2356    0,                    /* tp_getattr */
2357    0,                    /* tp_setattr */
2358    0,                    /* tp_compare */
2359    0,                    /* tp_repr */
2360    0,                    /* tp_as_number */
2361    0,                    /* tp_as_sequence */
2362    0,                    /* tp_as_mapping */
2363    0,                    /* tp_hash */
2364    encoder_call,         /* tp_call */
2365    0,                    /* tp_str */
2366    0,                    /* tp_getattro */
2367    0,                    /* tp_setattro */
2368    0,                    /* tp_as_buffer */
2369    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2370    encoder_doc,          /* tp_doc */
2371    encoder_traverse,     /* tp_traverse */
2372    encoder_clear,        /* tp_clear */
2373    0,                    /* tp_richcompare */
2374    0,                    /* tp_weaklistoffset */
2375    0,                    /* tp_iter */
2376    0,                    /* tp_iternext */
2377    0,                    /* tp_methods */
2378    encoder_members,      /* tp_members */
2379    0,                    /* tp_getset */
2380    0,                    /* tp_base */
2381    0,                    /* tp_dict */
2382    0,                    /* tp_descr_get */
2383    0,                    /* tp_descr_set */
2384    0,                    /* tp_dictoffset */
2385    encoder_init,         /* tp_init */
2386    0,                    /* tp_alloc */
2387    encoder_new,          /* tp_new */
2388    0,                    /* tp_free */
2389};
2390
2391static PyMethodDef speedups_methods[] = {
2392    {"encode_basestring_ascii",
2393        (PyCFunction)py_encode_basestring_ascii,
2394        METH_O,
2395        pydoc_encode_basestring_ascii},
2396    {"scanstring",
2397        (PyCFunction)py_scanstring,
2398        METH_VARARGS,
2399        pydoc_scanstring},
2400    {NULL, NULL, 0, NULL}
2401};
2402
2403PyDoc_STRVAR(module_doc,
2404"json speedups\n");
2405
2406void
2407init_json(void)
2408{
2409    PyObject *m;
2410    PyScannerType.tp_new = PyType_GenericNew;
2411    if (PyType_Ready(&PyScannerType) < 0)
2412        return;
2413    PyEncoderType.tp_new = PyType_GenericNew;
2414    if (PyType_Ready(&PyEncoderType) < 0)
2415        return;
2416    m = Py_InitModule3("_json", speedups_methods, module_doc);
2417    Py_INCREF((PyObject*)&PyScannerType);
2418    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2419    Py_INCREF((PyObject*)&PyEncoderType);
2420    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2421}
2422