1#include "Python.h"
2#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define DEFAULT_ENCODING "utf-8"
24
25#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34    PyObject_HEAD
35    PyObject *encoding;
36    PyObject *strict;
37    PyObject *object_hook;
38    PyObject *pairs_hook;
39    PyObject *parse_float;
40    PyObject *parse_int;
41    PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52    {NULL}
53};
54
55typedef struct _PyEncoderObject {
56    PyObject_HEAD
57    PyObject *markers;
58    PyObject *defaultfn;
59    PyObject *encoder;
60    PyObject *indent;
61    PyObject *key_separator;
62    PyObject *item_separator;
63    PyObject *sort_keys;
64    PyObject *skipkeys;
65    int fast_encode;
66    int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78    {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
119_encoded_const(PyObject *obj);
120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144    /* PyObject to Py_ssize_t converter */
145    *size_ptr = PyInt_AsSsize_t(o);
146    if (*size_ptr == -1 && PyErr_Occurred())
147        return 0;
148    return 1;
149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154    /* Py_ssize_t to PyObject converter */
155    return PyInt_FromSsize_t(*size_ptr);
156}
157
158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
161    /* Escape unicode code point c to ASCII escape sequences
162    in char *output. output must have at least 12 bytes unused to
163    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164    output[chars++] = '\\';
165    switch (c) {
166        case '\\': output[chars++] = (char)c; break;
167        case '"': output[chars++] = (char)c; break;
168        case '\b': output[chars++] = 'b'; break;
169        case '\f': output[chars++] = 'f'; break;
170        case '\n': output[chars++] = 'n'; break;
171        case '\r': output[chars++] = 'r'; break;
172        case '\t': output[chars++] = 't'; break;
173        default:
174#ifdef Py_UNICODE_WIDE
175            if (c >= 0x10000) {
176                /* UTF-16 surrogate pair */
177                Py_UNICODE v = c - 0x10000;
178                c = 0xd800 | ((v >> 10) & 0x3ff);
179                output[chars++] = 'u';
180                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
182                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
183                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
184                c = 0xdc00 | (v & 0x3ff);
185                output[chars++] = '\\';
186            }
187#endif
188            output[chars++] = 'u';
189            output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190            output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
191            output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
192            output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
193    }
194    return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
200    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201    Py_ssize_t i;
202    Py_ssize_t input_chars;
203    Py_ssize_t output_size;
204    Py_ssize_t max_output_size;
205    Py_ssize_t chars;
206    Py_ssize_t incr;
207    PyObject *rval;
208    char *output;
209    Py_UNICODE *input_unicode;
210
211    input_chars = PyUnicode_GET_SIZE(pystr);
212    input_unicode = PyUnicode_AS_UNICODE(pystr);
213
214    output_size = input_chars;
215    incr = 2; /* for quotes */
216    /* One char input can be up to 6 chars output, estimate 4 of these */
217    incr += MIN_EXPANSION * 4;
218    if (PY_SSIZE_T_MAX - incr < output_size) {
219        PyErr_NoMemory();
220        return NULL;
221    }
222    output_size += incr;
223    if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
224        PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
225        max_output_size = PY_SSIZE_T_MAX;
226    else
227        max_output_size = 2 + (input_chars * MAX_EXPANSION);
228    rval = PyString_FromStringAndSize(NULL, output_size);
229    if (rval == NULL) {
230        return NULL;
231    }
232    output = PyString_AS_STRING(rval);
233    chars = 0;
234    output[chars++] = '"';
235    for (i = 0; i < input_chars; i++) {
236        Py_UNICODE c = input_unicode[i];
237        if (S_CHAR(c)) {
238            output[chars++] = (char)c;
239        }
240        else {
241            chars = ascii_escape_char(c, output, chars);
242        }
243        if (output_size - chars < (1 + MAX_EXPANSION)) {
244            if (output_size == PY_SSIZE_T_MAX) {
245                Py_DECREF(rval);
246                PyErr_NoMemory();
247                return NULL;
248            }
249            /* There's more than four, so let's resize by a lot */
250            if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
251                output_size *= 2;
252            else
253                output_size = max_output_size;
254            if (_PyString_Resize(&rval, output_size) == -1) {
255                return NULL;
256            }
257            output = PyString_AS_STRING(rval);
258        }
259    }
260    output[chars++] = '"';
261    if (_PyString_Resize(&rval, chars) == -1) {
262        return NULL;
263    }
264    return rval;
265}
266
267static PyObject *
268ascii_escape_str(PyObject *pystr)
269{
270    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
271    Py_ssize_t i;
272    Py_ssize_t input_chars;
273    Py_ssize_t output_size;
274    Py_ssize_t max_output_size;
275    Py_ssize_t chars;
276    Py_ssize_t incr;
277    PyObject *rval;
278    char *output;
279    char *input_str;
280
281    input_chars = PyString_GET_SIZE(pystr);
282    input_str = PyString_AS_STRING(pystr);
283
284    /* Fast path for a string that's already ASCII */
285    for (i = 0; i < input_chars; i++) {
286        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
287        if (!S_CHAR(c)) {
288            /* If we have to escape something, scan the string for unicode */
289            Py_ssize_t j;
290            for (j = i; j < input_chars; j++) {
291                c = (Py_UNICODE)(unsigned char)input_str[j];
292                if (c > 0x7f) {
293                    /* We hit a non-ASCII character, bail to unicode mode */
294                    PyObject *uni;
295                    uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
296                    if (uni == NULL) {
297                        return NULL;
298                    }
299                    rval = ascii_escape_unicode(uni);
300                    Py_DECREF(uni);
301                    return rval;
302                }
303            }
304            break;
305        }
306    }
307
308    output_size = input_chars;
309    incr = 2; /* for quotes */
310    if (i != input_chars) {
311        /* One char input can be up to 6 chars output, estimate 4 of these */
312        incr += MIN_EXPANSION * 4;
313    }
314    if (PY_SSIZE_T_MAX - incr < output_size) {
315        PyErr_NoMemory();
316        return NULL;
317    }
318    output_size += incr;
319    if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
320        PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
321        max_output_size = PY_SSIZE_T_MAX;
322    else
323        max_output_size = 2 + (input_chars * MIN_EXPANSION);
324    rval = PyString_FromStringAndSize(NULL, output_size);
325    if (rval == NULL) {
326        return NULL;
327    }
328    output = PyString_AS_STRING(rval);
329    output[0] = '"';
330
331    /* We know that everything up to i is ASCII already */
332    chars = i + 1;
333    memcpy(&output[1], input_str, i);
334
335    for (; i < input_chars; i++) {
336        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
337        if (S_CHAR(c)) {
338            output[chars++] = (char)c;
339        }
340        else {
341            chars = ascii_escape_char(c, output, chars);
342        }
343        /* An ASCII char can't possibly expand to a surrogate! */
344        if (output_size - chars < (1 + MIN_EXPANSION)) {
345            if (output_size == PY_SSIZE_T_MAX) {
346                Py_DECREF(rval);
347                PyErr_NoMemory();
348                return NULL;
349            }
350            /* There's more than four, so let's resize by a lot */
351            if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
352                output_size *= 2;
353            else
354                output_size = max_output_size;
355            if (_PyString_Resize(&rval, output_size) == -1) {
356                return NULL;
357            }
358            output = PyString_AS_STRING(rval);
359        }
360    }
361    output[chars++] = '"';
362    if (_PyString_Resize(&rval, chars) == -1) {
363        return NULL;
364    }
365    return rval;
366}
367
368static void
369raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
370{
371    /* Use the Python function json.decoder.errmsg to raise a nice
372    looking ValueError exception */
373    static PyObject *errmsg_fn = NULL;
374    PyObject *pymsg;
375    if (errmsg_fn == NULL) {
376        PyObject *decoder = PyImport_ImportModule("json.decoder");
377        if (decoder == NULL)
378            return;
379        errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
380        Py_DECREF(decoder);
381        if (errmsg_fn == NULL)
382            return;
383    }
384    pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
385    if (pymsg) {
386        PyErr_SetObject(PyExc_ValueError, pymsg);
387        Py_DECREF(pymsg);
388    }
389}
390
391static PyObject *
392join_list_unicode(PyObject *lst)
393{
394    /* return u''.join(lst) */
395    static PyObject *joinfn = NULL;
396    if (joinfn == NULL) {
397        PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
398        if (ustr == NULL)
399            return NULL;
400
401        joinfn = PyObject_GetAttrString(ustr, "join");
402        Py_DECREF(ustr);
403        if (joinfn == NULL)
404            return NULL;
405    }
406    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
407}
408
409static PyObject *
410_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
411    /* return (rval, idx) tuple, stealing reference to rval */
412    PyObject *tpl;
413    PyObject *pyidx;
414    /*
415    steal a reference to rval, returns (rval, idx)
416    */
417    if (rval == NULL) {
418        return NULL;
419    }
420    pyidx = PyInt_FromSsize_t(idx);
421    if (pyidx == NULL) {
422        Py_DECREF(rval);
423        return NULL;
424    }
425    tpl = PyTuple_New(2);
426    if (tpl == NULL) {
427        Py_DECREF(pyidx);
428        Py_DECREF(rval);
429        return NULL;
430    }
431    PyTuple_SET_ITEM(tpl, 0, rval);
432    PyTuple_SET_ITEM(tpl, 1, pyidx);
433    return tpl;
434}
435
436static PyObject *
437scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
438{
439    /* Read the JSON string from PyString pystr.
440    end is the index of the first character after the quote.
441    encoding is the encoding of pystr (must be an ASCII superset)
442    if strict is zero then literal control characters are allowed
443    *next_end_ptr is a return-by-reference index of the character
444        after the end quote
445
446    Return value is a new PyString (if ASCII-only) or PyUnicode
447    */
448    PyObject *rval;
449    Py_ssize_t len = PyString_GET_SIZE(pystr);
450    Py_ssize_t begin = end - 1;
451    Py_ssize_t next;
452    char *buf = PyString_AS_STRING(pystr);
453    PyObject *chunks = PyList_New(0);
454    if (chunks == NULL) {
455        goto bail;
456    }
457    if (end < 0 || len <= end) {
458        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
459        goto bail;
460    }
461    while (1) {
462        /* Find the end of the string or the next escape */
463        Py_UNICODE c = 0;
464        PyObject *chunk = NULL;
465        for (next = end; next < len; next++) {
466            c = (unsigned char)buf[next];
467            if (c == '"' || c == '\\') {
468                break;
469            }
470            else if (strict && c <= 0x1f) {
471                raise_errmsg("Invalid control character at", pystr, next);
472                goto bail;
473            }
474        }
475        if (!(c == '"' || c == '\\')) {
476            raise_errmsg("Unterminated string starting at", pystr, begin);
477            goto bail;
478        }
479        /* Pick up this chunk if it's not zero length */
480        if (next != end) {
481            PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
482            if (strchunk == NULL) {
483                goto bail;
484            }
485            chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
486            Py_DECREF(strchunk);
487            if (chunk == NULL) {
488                goto bail;
489            }
490            if (PyList_Append(chunks, chunk)) {
491                Py_DECREF(chunk);
492                goto bail;
493            }
494            Py_DECREF(chunk);
495        }
496        next++;
497        if (c == '"') {
498            end = next;
499            break;
500        }
501        if (next == len) {
502            raise_errmsg("Unterminated string starting at", pystr, begin);
503            goto bail;
504        }
505        c = buf[next];
506        if (c != 'u') {
507            /* Non-unicode backslash escapes */
508            end = next + 1;
509            switch (c) {
510                case '"': break;
511                case '\\': break;
512                case '/': break;
513                case 'b': c = '\b'; break;
514                case 'f': c = '\f'; break;
515                case 'n': c = '\n'; break;
516                case 'r': c = '\r'; break;
517                case 't': c = '\t'; break;
518                default: c = 0;
519            }
520            if (c == 0) {
521                raise_errmsg("Invalid \\escape", pystr, end - 2);
522                goto bail;
523            }
524        }
525        else {
526            c = 0;
527            next++;
528            end = next + 4;
529            if (end >= len) {
530                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531                goto bail;
532            }
533            /* Decode 4 hex digits */
534            for (; next < end; next++) {
535                Py_UNICODE digit = buf[next];
536                c <<= 4;
537                switch (digit) {
538                    case '0': case '1': case '2': case '3': case '4':
539                    case '5': case '6': case '7': case '8': case '9':
540                        c |= (digit - '0'); break;
541                    case 'a': case 'b': case 'c': case 'd': case 'e':
542                    case 'f':
543                        c |= (digit - 'a' + 10); break;
544                    case 'A': case 'B': case 'C': case 'D': case 'E':
545                    case 'F':
546                        c |= (digit - 'A' + 10); break;
547                    default:
548                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
549                        goto bail;
550                }
551            }
552#ifdef Py_UNICODE_WIDE
553            /* Surrogate pair */
554            if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
555                buf[next++] == '\\' &&
556                buf[next++] == 'u') {
557                Py_UNICODE c2 = 0;
558                end += 6;
559                /* Decode 4 hex digits */
560                for (; next < end; next++) {
561                    Py_UNICODE digit = buf[next];
562                    c2 <<= 4;
563                    switch (digit) {
564                        case '0': case '1': case '2': case '3': case '4':
565                        case '5': case '6': case '7': case '8': case '9':
566                            c2 |= (digit - '0'); break;
567                        case 'a': case 'b': case 'c': case 'd': case 'e':
568                        case 'f':
569                            c2 |= (digit - 'a' + 10); break;
570                        case 'A': case 'B': case 'C': case 'D': case 'E':
571                        case 'F':
572                            c2 |= (digit - 'A' + 10); break;
573                        default:
574                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
575                            goto bail;
576                    }
577                }
578                if ((c2 & 0xfc00) == 0xdc00)
579                    c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
580                else
581                    end -= 6;
582            }
583#endif
584        }
585        chunk = PyUnicode_FromUnicode(&c, 1);
586        if (chunk == NULL) {
587            goto bail;
588        }
589        if (PyList_Append(chunks, chunk)) {
590            Py_DECREF(chunk);
591            goto bail;
592        }
593        Py_DECREF(chunk);
594    }
595
596    rval = join_list_unicode(chunks);
597    if (rval == NULL) {
598        goto bail;
599    }
600    Py_CLEAR(chunks);
601    *next_end_ptr = end;
602    return rval;
603bail:
604    *next_end_ptr = -1;
605    Py_XDECREF(chunks);
606    return NULL;
607}
608
609
610static PyObject *
611scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
612{
613    /* Read the JSON string from PyUnicode pystr.
614    end is the index of the first character after the quote.
615    if strict is zero then literal control characters are allowed
616    *next_end_ptr is a return-by-reference index of the character
617        after the end quote
618
619    Return value is a new PyUnicode
620    */
621    PyObject *rval;
622    Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
623    Py_ssize_t begin = end - 1;
624    Py_ssize_t next;
625    const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
626    PyObject *chunks = PyList_New(0);
627    if (chunks == NULL) {
628        goto bail;
629    }
630    if (end < 0 || len <= end) {
631        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
632        goto bail;
633    }
634    while (1) {
635        /* Find the end of the string or the next escape */
636        Py_UNICODE c = 0;
637        PyObject *chunk = NULL;
638        for (next = end; next < len; next++) {
639            c = buf[next];
640            if (c == '"' || c == '\\') {
641                break;
642            }
643            else if (strict && c <= 0x1f) {
644                raise_errmsg("Invalid control character at", pystr, next);
645                goto bail;
646            }
647        }
648        if (!(c == '"' || c == '\\')) {
649            raise_errmsg("Unterminated string starting at", pystr, begin);
650            goto bail;
651        }
652        /* Pick up this chunk if it's not zero length */
653        if (next != end) {
654            chunk = PyUnicode_FromUnicode(&buf[end], next - end);
655            if (chunk == NULL) {
656                goto bail;
657            }
658            if (PyList_Append(chunks, chunk)) {
659                Py_DECREF(chunk);
660                goto bail;
661            }
662            Py_DECREF(chunk);
663        }
664        next++;
665        if (c == '"') {
666            end = next;
667            break;
668        }
669        if (next == len) {
670            raise_errmsg("Unterminated string starting at", pystr, begin);
671            goto bail;
672        }
673        c = buf[next];
674        if (c != 'u') {
675            /* Non-unicode backslash escapes */
676            end = next + 1;
677            switch (c) {
678                case '"': break;
679                case '\\': break;
680                case '/': break;
681                case 'b': c = '\b'; break;
682                case 'f': c = '\f'; break;
683                case 'n': c = '\n'; break;
684                case 'r': c = '\r'; break;
685                case 't': c = '\t'; break;
686                default: c = 0;
687            }
688            if (c == 0) {
689                raise_errmsg("Invalid \\escape", pystr, end - 2);
690                goto bail;
691            }
692        }
693        else {
694            c = 0;
695            next++;
696            end = next + 4;
697            if (end >= len) {
698                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
699                goto bail;
700            }
701            /* Decode 4 hex digits */
702            for (; next < end; next++) {
703                Py_UNICODE digit = buf[next];
704                c <<= 4;
705                switch (digit) {
706                    case '0': case '1': case '2': case '3': case '4':
707                    case '5': case '6': case '7': case '8': case '9':
708                        c |= (digit - '0'); break;
709                    case 'a': case 'b': case 'c': case 'd': case 'e':
710                    case 'f':
711                        c |= (digit - 'a' + 10); break;
712                    case 'A': case 'B': case 'C': case 'D': case 'E':
713                    case 'F':
714                        c |= (digit - 'A' + 10); break;
715                    default:
716                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
717                        goto bail;
718                }
719            }
720#ifdef Py_UNICODE_WIDE
721            /* Surrogate pair */
722            if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
723                buf[next++] == '\\' && buf[next++] == 'u') {
724                Py_UNICODE c2 = 0;
725                end += 6;
726                /* Decode 4 hex digits */
727                for (; next < end; next++) {
728                    Py_UNICODE digit = buf[next];
729                    c2 <<= 4;
730                    switch (digit) {
731                        case '0': case '1': case '2': case '3': case '4':
732                        case '5': case '6': case '7': case '8': case '9':
733                            c2 |= (digit - '0'); break;
734                        case 'a': case 'b': case 'c': case 'd': case 'e':
735                        case 'f':
736                            c2 |= (digit - 'a' + 10); break;
737                        case 'A': case 'B': case 'C': case 'D': case 'E':
738                        case 'F':
739                            c2 |= (digit - 'A' + 10); break;
740                        default:
741                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
742                            goto bail;
743                    }
744                }
745                if ((c2 & 0xfc00) == 0xdc00)
746                    c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
747                else
748                    end -= 6;
749            }
750#endif
751        }
752        chunk = PyUnicode_FromUnicode(&c, 1);
753        if (chunk == NULL) {
754            goto bail;
755        }
756        if (PyList_Append(chunks, chunk)) {
757            Py_DECREF(chunk);
758            goto bail;
759        }
760        Py_DECREF(chunk);
761    }
762
763    rval = join_list_unicode(chunks);
764    if (rval == NULL) {
765        goto bail;
766    }
767    Py_DECREF(chunks);
768    *next_end_ptr = end;
769    return rval;
770bail:
771    *next_end_ptr = -1;
772    Py_XDECREF(chunks);
773    return NULL;
774}
775
776PyDoc_STRVAR(pydoc_scanstring,
777    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
778    "\n"
779    "Scan the string s for a JSON string. End is the index of the\n"
780    "character in s after the quote that started the JSON string.\n"
781    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
782    "on attempt to decode an invalid string. If strict is False then literal\n"
783    "control characters are allowed in the string.\n"
784    "\n"
785    "Returns a tuple of the decoded string and the index of the character in s\n"
786    "after the end quote."
787);
788
789static PyObject *
790py_scanstring(PyObject* self UNUSED, PyObject *args)
791{
792    PyObject *pystr;
793    PyObject *rval;
794    Py_ssize_t end;
795    Py_ssize_t next_end = -1;
796    char *encoding = NULL;
797    int strict = 1;
798    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
799        return NULL;
800    }
801    if (encoding == NULL) {
802        encoding = DEFAULT_ENCODING;
803    }
804    if (PyString_Check(pystr)) {
805        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
806    }
807    else if (PyUnicode_Check(pystr)) {
808        rval = scanstring_unicode(pystr, end, strict, &next_end);
809    }
810    else {
811        PyErr_Format(PyExc_TypeError,
812                     "first argument must be a string, not %.80s",
813                     Py_TYPE(pystr)->tp_name);
814        return NULL;
815    }
816    return _build_rval_index_tuple(rval, next_end);
817}
818
819PyDoc_STRVAR(pydoc_encode_basestring_ascii,
820    "encode_basestring_ascii(basestring) -> str\n"
821    "\n"
822    "Return an ASCII-only JSON representation of a Python string"
823);
824
825static PyObject *
826py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
827{
828    /* Return an ASCII-only JSON representation of a Python string */
829    /* METH_O */
830    if (PyString_Check(pystr)) {
831        return ascii_escape_str(pystr);
832    }
833    else if (PyUnicode_Check(pystr)) {
834        return ascii_escape_unicode(pystr);
835    }
836    else {
837        PyErr_Format(PyExc_TypeError,
838                     "first argument must be a string, not %.80s",
839                     Py_TYPE(pystr)->tp_name);
840        return NULL;
841    }
842}
843
844static void
845scanner_dealloc(PyObject *self)
846{
847    /* Deallocate scanner object */
848    scanner_clear(self);
849    Py_TYPE(self)->tp_free(self);
850}
851
852static int
853scanner_traverse(PyObject *self, visitproc visit, void *arg)
854{
855    PyScannerObject *s;
856    assert(PyScanner_Check(self));
857    s = (PyScannerObject *)self;
858    Py_VISIT(s->encoding);
859    Py_VISIT(s->strict);
860    Py_VISIT(s->object_hook);
861    Py_VISIT(s->pairs_hook);
862    Py_VISIT(s->parse_float);
863    Py_VISIT(s->parse_int);
864    Py_VISIT(s->parse_constant);
865    return 0;
866}
867
868static int
869scanner_clear(PyObject *self)
870{
871    PyScannerObject *s;
872    assert(PyScanner_Check(self));
873    s = (PyScannerObject *)self;
874    Py_CLEAR(s->encoding);
875    Py_CLEAR(s->strict);
876    Py_CLEAR(s->object_hook);
877    Py_CLEAR(s->pairs_hook);
878    Py_CLEAR(s->parse_float);
879    Py_CLEAR(s->parse_int);
880    Py_CLEAR(s->parse_constant);
881    return 0;
882}
883
884static PyObject *
885_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
886    /* Read a JSON object from PyString pystr.
887    idx is the index of the first character after the opening curly brace.
888    *next_idx_ptr is a return-by-reference index to the first character after
889        the closing curly brace.
890
891    Returns a new PyObject (usually a dict, but object_hook can change that)
892    */
893    char *str = PyString_AS_STRING(pystr);
894    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
895    PyObject *rval;
896    PyObject *pairs;
897    PyObject *item;
898    PyObject *key = NULL;
899    PyObject *val = NULL;
900    char *encoding = PyString_AS_STRING(s->encoding);
901    int strict = PyObject_IsTrue(s->strict);
902    Py_ssize_t next_idx;
903
904    if (strict < 0)
905        return NULL;
906
907    pairs = PyList_New(0);
908    if (pairs == NULL)
909        return NULL;
910
911    /* skip whitespace after { */
912    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
913
914    /* only loop if the object is non-empty */
915    if (idx <= end_idx && str[idx] != '}') {
916        while (idx <= end_idx) {
917            /* read key */
918            if (str[idx] != '"') {
919                raise_errmsg("Expecting property name", pystr, idx);
920                goto bail;
921            }
922            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
923            if (key == NULL)
924                goto bail;
925            idx = next_idx;
926
927            /* skip whitespace between key and : delimiter, read :, skip whitespace */
928            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
929            if (idx > end_idx || str[idx] != ':') {
930                raise_errmsg("Expecting : delimiter", pystr, idx);
931                goto bail;
932            }
933            idx++;
934            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
935
936            /* read any JSON data type */
937            val = scan_once_str(s, pystr, idx, &next_idx);
938            if (val == NULL)
939                goto bail;
940
941            item = PyTuple_Pack(2, key, val);
942            if (item == NULL)
943                goto bail;
944            Py_CLEAR(key);
945            Py_CLEAR(val);
946            if (PyList_Append(pairs, item) == -1) {
947                Py_DECREF(item);
948                goto bail;
949            }
950            Py_DECREF(item);
951            idx = next_idx;
952
953            /* skip whitespace before } or , */
954            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
955
956            /* bail if the object is closed or we didn't get the , delimiter */
957            if (idx > end_idx) break;
958            if (str[idx] == '}') {
959                break;
960            }
961            else if (str[idx] != ',') {
962                raise_errmsg("Expecting , delimiter", pystr, idx);
963                goto bail;
964            }
965            idx++;
966
967            /* skip whitespace after , delimiter */
968            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
969        }
970    }
971    /* verify that idx < end_idx, str[idx] should be '}' */
972    if (idx > end_idx || str[idx] != '}') {
973        raise_errmsg("Expecting object", pystr, end_idx);
974        goto bail;
975    }
976
977    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
978    if (s->pairs_hook != Py_None) {
979        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
980        if (val == NULL)
981            goto bail;
982        Py_DECREF(pairs);
983        *next_idx_ptr = idx + 1;
984        return val;
985    }
986
987    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
988                                         pairs, NULL);
989    if (rval == NULL)
990        goto bail;
991    Py_CLEAR(pairs);
992
993    /* if object_hook is not None: rval = object_hook(rval) */
994    if (s->object_hook != Py_None) {
995        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
996        if (val == NULL)
997            goto bail;
998        Py_DECREF(rval);
999        rval = val;
1000        val = NULL;
1001    }
1002    *next_idx_ptr = idx + 1;
1003    return rval;
1004bail:
1005    Py_XDECREF(key);
1006    Py_XDECREF(val);
1007    Py_XDECREF(pairs);
1008    return NULL;
1009}
1010
1011static PyObject *
1012_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1013    /* Read a JSON object from PyUnicode pystr.
1014    idx is the index of the first character after the opening curly brace.
1015    *next_idx_ptr is a return-by-reference index to the first character after
1016        the closing curly brace.
1017
1018    Returns a new PyObject (usually a dict, but object_hook can change that)
1019    */
1020    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1021    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1022    PyObject *rval;
1023    PyObject *pairs;
1024    PyObject *item;
1025    PyObject *key = NULL;
1026    PyObject *val = NULL;
1027    int strict = PyObject_IsTrue(s->strict);
1028    Py_ssize_t next_idx;
1029
1030    if (strict < 0)
1031        return NULL;
1032
1033    pairs = PyList_New(0);
1034    if (pairs == NULL)
1035        return NULL;
1036
1037    /* skip whitespace after { */
1038    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1039
1040    /* only loop if the object is non-empty */
1041    if (idx <= end_idx && str[idx] != '}') {
1042        while (idx <= end_idx) {
1043            /* read key */
1044            if (str[idx] != '"') {
1045                raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
1046                goto bail;
1047            }
1048            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1049            if (key == NULL)
1050                goto bail;
1051            idx = next_idx;
1052
1053            /* skip whitespace between key and : delimiter, read :, skip whitespace */
1054            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1055            if (idx > end_idx || str[idx] != ':') {
1056                raise_errmsg("Expecting ':' delimiter", pystr, idx);
1057                goto bail;
1058            }
1059            idx++;
1060            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1061
1062            /* read any JSON term */
1063            val = scan_once_unicode(s, pystr, idx, &next_idx);
1064            if (val == NULL)
1065                goto bail;
1066
1067            item = PyTuple_Pack(2, key, val);
1068            if (item == NULL)
1069                goto bail;
1070            Py_CLEAR(key);
1071            Py_CLEAR(val);
1072            if (PyList_Append(pairs, item) == -1) {
1073                Py_DECREF(item);
1074                goto bail;
1075            }
1076            Py_DECREF(item);
1077            idx = next_idx;
1078
1079            /* skip whitespace before } or , */
1080            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1081
1082            /* bail if the object is closed or we didn't get the , delimiter */
1083            if (idx > end_idx) break;
1084            if (str[idx] == '}') {
1085                break;
1086            }
1087            else if (str[idx] != ',') {
1088                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1089                goto bail;
1090            }
1091            idx++;
1092
1093            /* skip whitespace after , delimiter */
1094            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1095        }
1096    }
1097
1098    /* verify that idx < end_idx, str[idx] should be '}' */
1099    if (idx > end_idx || str[idx] != '}') {
1100        raise_errmsg("Expecting object", pystr, end_idx);
1101        goto bail;
1102    }
1103
1104    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1105    if (s->pairs_hook != Py_None) {
1106        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1107        if (val == NULL)
1108            goto bail;
1109        Py_DECREF(pairs);
1110        *next_idx_ptr = idx + 1;
1111        return val;
1112    }
1113
1114    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1115                                         pairs, NULL);
1116    if (rval == NULL)
1117        goto bail;
1118    Py_CLEAR(pairs);
1119
1120    /* if object_hook is not None: rval = object_hook(rval) */
1121    if (s->object_hook != Py_None) {
1122        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1123        if (val == NULL)
1124            goto bail;
1125        Py_DECREF(rval);
1126        rval = val;
1127        val = NULL;
1128    }
1129    *next_idx_ptr = idx + 1;
1130    return rval;
1131bail:
1132    Py_XDECREF(key);
1133    Py_XDECREF(val);
1134    Py_XDECREF(pairs);
1135    return NULL;
1136}
1137
1138static PyObject *
1139_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1140    /* Read a JSON array from PyString pystr.
1141    idx is the index of the first character after the opening brace.
1142    *next_idx_ptr is a return-by-reference index to the first character after
1143        the closing brace.
1144
1145    Returns a new PyList
1146    */
1147    char *str = PyString_AS_STRING(pystr);
1148    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1149    PyObject *val = NULL;
1150    PyObject *rval = PyList_New(0);
1151    Py_ssize_t next_idx;
1152    if (rval == NULL)
1153        return NULL;
1154
1155    /* skip whitespace after [ */
1156    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1157
1158    /* only loop if the array is non-empty */
1159    if (idx <= end_idx && str[idx] != ']') {
1160        while (idx <= end_idx) {
1161
1162            /* read any JSON term and de-tuplefy the (rval, idx) */
1163            val = scan_once_str(s, pystr, idx, &next_idx);
1164            if (val == NULL)
1165                goto bail;
1166
1167            if (PyList_Append(rval, val) == -1)
1168                goto bail;
1169
1170            Py_CLEAR(val);
1171            idx = next_idx;
1172
1173            /* skip whitespace between term and , */
1174            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1175
1176            /* bail if the array is closed or we didn't get the , delimiter */
1177            if (idx > end_idx) break;
1178            if (str[idx] == ']') {
1179                break;
1180            }
1181            else if (str[idx] != ',') {
1182                raise_errmsg("Expecting , delimiter", pystr, idx);
1183                goto bail;
1184            }
1185            idx++;
1186
1187            /* skip whitespace after , */
1188            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1189        }
1190    }
1191
1192    /* verify that idx < end_idx, str[idx] should be ']' */
1193    if (idx > end_idx || str[idx] != ']') {
1194        raise_errmsg("Expecting object", pystr, end_idx);
1195        goto bail;
1196    }
1197    *next_idx_ptr = idx + 1;
1198    return rval;
1199bail:
1200    Py_XDECREF(val);
1201    Py_DECREF(rval);
1202    return NULL;
1203}
1204
1205static PyObject *
1206_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1207    /* Read a JSON array from PyString pystr.
1208    idx is the index of the first character after the opening brace.
1209    *next_idx_ptr is a return-by-reference index to the first character after
1210        the closing brace.
1211
1212    Returns a new PyList
1213    */
1214    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1215    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1216    PyObject *val = NULL;
1217    PyObject *rval = PyList_New(0);
1218    Py_ssize_t next_idx;
1219    if (rval == NULL)
1220        return NULL;
1221
1222    /* skip whitespace after [ */
1223    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1224
1225    /* only loop if the array is non-empty */
1226    if (idx <= end_idx && str[idx] != ']') {
1227        while (idx <= end_idx) {
1228
1229            /* read any JSON term  */
1230            val = scan_once_unicode(s, pystr, idx, &next_idx);
1231            if (val == NULL)
1232                goto bail;
1233
1234            if (PyList_Append(rval, val) == -1)
1235                goto bail;
1236
1237            Py_CLEAR(val);
1238            idx = next_idx;
1239
1240            /* skip whitespace between term and , */
1241            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1242
1243            /* bail if the array is closed or we didn't get the , delimiter */
1244            if (idx > end_idx) break;
1245            if (str[idx] == ']') {
1246                break;
1247            }
1248            else if (str[idx] != ',') {
1249                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1250                goto bail;
1251            }
1252            idx++;
1253
1254            /* skip whitespace after , */
1255            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1256        }
1257    }
1258
1259    /* verify that idx < end_idx, str[idx] should be ']' */
1260    if (idx > end_idx || str[idx] != ']') {
1261        raise_errmsg("Expecting object", pystr, end_idx);
1262        goto bail;
1263    }
1264    *next_idx_ptr = idx + 1;
1265    return rval;
1266bail:
1267    Py_XDECREF(val);
1268    Py_DECREF(rval);
1269    return NULL;
1270}
1271
1272static PyObject *
1273_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1274    /* Read a JSON constant from PyString pystr.
1275    constant is the constant string that was found
1276        ("NaN", "Infinity", "-Infinity").
1277    idx is the index of the first character of the constant
1278    *next_idx_ptr is a return-by-reference index to the first character after
1279        the constant.
1280
1281    Returns the result of parse_constant
1282    */
1283    PyObject *cstr;
1284    PyObject *rval;
1285    /* constant is "NaN", "Infinity", or "-Infinity" */
1286    cstr = PyString_InternFromString(constant);
1287    if (cstr == NULL)
1288        return NULL;
1289
1290    /* rval = parse_constant(constant) */
1291    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1292    idx += PyString_GET_SIZE(cstr);
1293    Py_DECREF(cstr);
1294    *next_idx_ptr = idx;
1295    return rval;
1296}
1297
1298static PyObject *
1299_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1300    /* Read a JSON number from PyString pystr.
1301    idx is the index of the first character of the number
1302    *next_idx_ptr is a return-by-reference index to the first character after
1303        the number.
1304
1305    Returns a new PyObject representation of that number:
1306        PyInt, PyLong, or PyFloat.
1307        May return other types if parse_int or parse_float are set
1308    */
1309    char *str = PyString_AS_STRING(pystr);
1310    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1311    Py_ssize_t idx = start;
1312    int is_float = 0;
1313    PyObject *rval;
1314    PyObject *numstr;
1315
1316    /* read a sign if it's there, make sure it's not the end of the string */
1317    if (str[idx] == '-') {
1318        idx++;
1319        if (idx > end_idx) {
1320            PyErr_SetNone(PyExc_StopIteration);
1321            return NULL;
1322        }
1323    }
1324
1325    /* read as many integer digits as we find as long as it doesn't start with 0 */
1326    if (str[idx] >= '1' && str[idx] <= '9') {
1327        idx++;
1328        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1329    }
1330    /* if it starts with 0 we only expect one integer digit */
1331    else if (str[idx] == '0') {
1332        idx++;
1333    }
1334    /* no integer digits, error */
1335    else {
1336        PyErr_SetNone(PyExc_StopIteration);
1337        return NULL;
1338    }
1339
1340    /* if the next char is '.' followed by a digit then read all float digits */
1341    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1342        is_float = 1;
1343        idx += 2;
1344        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1345    }
1346
1347    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1348    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1349
1350        /* save the index of the 'e' or 'E' just in case we need to backtrack */
1351        Py_ssize_t e_start = idx;
1352        idx++;
1353
1354        /* read an exponent sign if present */
1355        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1356
1357        /* read all digits */
1358        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1359
1360        /* if we got a digit, then parse as float. if not, backtrack */
1361        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1362            is_float = 1;
1363        }
1364        else {
1365            idx = e_start;
1366        }
1367    }
1368
1369    /* copy the section we determined to be a number */
1370    numstr = PyString_FromStringAndSize(&str[start], idx - start);
1371    if (numstr == NULL)
1372        return NULL;
1373    if (is_float) {
1374        /* parse as a float using a fast path if available, otherwise call user defined method */
1375        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1376            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1377        }
1378        else {
1379            double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1380                                             NULL, NULL);
1381            if (d == -1.0 && PyErr_Occurred())
1382                return NULL;
1383            rval = PyFloat_FromDouble(d);
1384        }
1385    }
1386    else {
1387        /* parse as an int using a fast path if available, otherwise call user defined method */
1388        if (s->parse_int != (PyObject *)&PyInt_Type) {
1389            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1390        }
1391        else {
1392            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1393        }
1394    }
1395    Py_DECREF(numstr);
1396    *next_idx_ptr = idx;
1397    return rval;
1398}
1399
1400static PyObject *
1401_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1402    /* Read a JSON number from PyUnicode pystr.
1403    idx is the index of the first character of the number
1404    *next_idx_ptr is a return-by-reference index to the first character after
1405        the number.
1406
1407    Returns a new PyObject representation of that number:
1408        PyInt, PyLong, or PyFloat.
1409        May return other types if parse_int or parse_float are set
1410    */
1411    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1412    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1413    Py_ssize_t idx = start;
1414    int is_float = 0;
1415    PyObject *rval;
1416    PyObject *numstr;
1417
1418    /* read a sign if it's there, make sure it's not the end of the string */
1419    if (str[idx] == '-') {
1420        idx++;
1421        if (idx > end_idx) {
1422            PyErr_SetNone(PyExc_StopIteration);
1423            return NULL;
1424        }
1425    }
1426
1427    /* read as many integer digits as we find as long as it doesn't start with 0 */
1428    if (str[idx] >= '1' && str[idx] <= '9') {
1429        idx++;
1430        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1431    }
1432    /* if it starts with 0 we only expect one integer digit */
1433    else if (str[idx] == '0') {
1434        idx++;
1435    }
1436    /* no integer digits, error */
1437    else {
1438        PyErr_SetNone(PyExc_StopIteration);
1439        return NULL;
1440    }
1441
1442    /* if the next char is '.' followed by a digit then read all float digits */
1443    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1444        is_float = 1;
1445        idx += 2;
1446        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1447    }
1448
1449    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1450    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1451        Py_ssize_t e_start = idx;
1452        idx++;
1453
1454        /* read an exponent sign if present */
1455        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1456
1457        /* read all digits */
1458        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1459
1460        /* if we got a digit, then parse as float. if not, backtrack */
1461        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1462            is_float = 1;
1463        }
1464        else {
1465            idx = e_start;
1466        }
1467    }
1468
1469    /* copy the section we determined to be a number */
1470    numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1471    if (numstr == NULL)
1472        return NULL;
1473    if (is_float) {
1474        /* parse as a float using a fast path if available, otherwise call user defined method */
1475        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1476            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1477        }
1478        else {
1479            rval = PyFloat_FromString(numstr, NULL);
1480        }
1481    }
1482    else {
1483        /* no fast path for unicode -> int, just call */
1484        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1485    }
1486    Py_DECREF(numstr);
1487    *next_idx_ptr = idx;
1488    return rval;
1489}
1490
1491static PyObject *
1492scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1493{
1494    /* Read one JSON term (of any kind) from PyString pystr.
1495    idx is the index of the first character of the term
1496    *next_idx_ptr is a return-by-reference index to the first character after
1497        the number.
1498
1499    Returns a new PyObject representation of the term.
1500    */
1501    PyObject *res;
1502    int strict;
1503    char *str = PyString_AS_STRING(pystr);
1504    Py_ssize_t length = PyString_GET_SIZE(pystr);
1505    if (idx < 0) {
1506        PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1507        return NULL;
1508    }
1509    if (idx >= length) {
1510        PyErr_SetNone(PyExc_StopIteration);
1511        return NULL;
1512    }
1513    switch (str[idx]) {
1514        case '"':
1515            /* string */
1516            strict = PyObject_IsTrue(s->strict);
1517            if (strict < 0)
1518                return NULL;
1519            return scanstring_str(pystr, idx + 1,
1520                PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
1521        case '{':
1522            /* object */
1523            if (Py_EnterRecursiveCall(" while decoding a JSON object "
1524                                      "from a byte string"))
1525                return NULL;
1526            res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1527            Py_LeaveRecursiveCall();
1528            return res;
1529        case '[':
1530            /* array */
1531            if (Py_EnterRecursiveCall(" while decoding a JSON array "
1532                                      "from a byte string"))
1533                return NULL;
1534            res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1535            Py_LeaveRecursiveCall();
1536            return res;
1537        case 'n':
1538            /* null */
1539            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1540                Py_INCREF(Py_None);
1541                *next_idx_ptr = idx + 4;
1542                return Py_None;
1543            }
1544            break;
1545        case 't':
1546            /* true */
1547            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1548                Py_INCREF(Py_True);
1549                *next_idx_ptr = idx + 4;
1550                return Py_True;
1551            }
1552            break;
1553        case 'f':
1554            /* false */
1555            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1556                Py_INCREF(Py_False);
1557                *next_idx_ptr = idx + 5;
1558                return Py_False;
1559            }
1560            break;
1561        case 'N':
1562            /* NaN */
1563            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1564                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1565            }
1566            break;
1567        case 'I':
1568            /* Infinity */
1569            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1570                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1571            }
1572            break;
1573        case '-':
1574            /* -Infinity */
1575            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1576                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1577            }
1578            break;
1579    }
1580    /* Didn't find a string, object, array, or named constant. Look for a number. */
1581    return _match_number_str(s, pystr, idx, next_idx_ptr);
1582}
1583
1584static PyObject *
1585scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1586{
1587    /* Read one JSON term (of any kind) from PyUnicode pystr.
1588    idx is the index of the first character of the term
1589    *next_idx_ptr is a return-by-reference index to the first character after
1590        the number.
1591
1592    Returns a new PyObject representation of the term.
1593    */
1594    PyObject *res;
1595    int strict;
1596    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1597    Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1598    if (idx < 0) {
1599        PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1600        return NULL;
1601    }
1602    if (idx >= length) {
1603        PyErr_SetNone(PyExc_StopIteration);
1604        return NULL;
1605    }
1606    switch (str[idx]) {
1607        case '"':
1608            /* string */
1609            strict = PyObject_IsTrue(s->strict);
1610            if (strict < 0)
1611                return NULL;
1612            return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
1613        case '{':
1614            /* object */
1615            if (Py_EnterRecursiveCall(" while decoding a JSON object "
1616                                      "from a unicode string"))
1617                return NULL;
1618            res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1619            Py_LeaveRecursiveCall();
1620            return res;
1621        case '[':
1622            /* array */
1623            if (Py_EnterRecursiveCall(" while decoding a JSON array "
1624                                      "from a unicode string"))
1625                return NULL;
1626            res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1627            Py_LeaveRecursiveCall();
1628            return res;
1629        case 'n':
1630            /* null */
1631            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1632                Py_INCREF(Py_None);
1633                *next_idx_ptr = idx + 4;
1634                return Py_None;
1635            }
1636            break;
1637        case 't':
1638            /* true */
1639            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1640                Py_INCREF(Py_True);
1641                *next_idx_ptr = idx + 4;
1642                return Py_True;
1643            }
1644            break;
1645        case 'f':
1646            /* false */
1647            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1648                Py_INCREF(Py_False);
1649                *next_idx_ptr = idx + 5;
1650                return Py_False;
1651            }
1652            break;
1653        case 'N':
1654            /* NaN */
1655            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1656                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1657            }
1658            break;
1659        case 'I':
1660            /* Infinity */
1661            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1662                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1663            }
1664            break;
1665        case '-':
1666            /* -Infinity */
1667            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1668                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1669            }
1670            break;
1671    }
1672    /* Didn't find a string, object, array, or named constant. Look for a number. */
1673    return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1674}
1675
1676static PyObject *
1677scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1678{
1679    /* Python callable interface to scan_once_{str,unicode} */
1680    PyObject *pystr;
1681    PyObject *rval;
1682    Py_ssize_t idx;
1683    Py_ssize_t next_idx = -1;
1684    static char *kwlist[] = {"string", "idx", NULL};
1685    PyScannerObject *s;
1686    assert(PyScanner_Check(self));
1687    s = (PyScannerObject *)self;
1688    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1689        return NULL;
1690
1691    if (PyString_Check(pystr)) {
1692        rval = scan_once_str(s, pystr, idx, &next_idx);
1693    }
1694    else if (PyUnicode_Check(pystr)) {
1695        rval = scan_once_unicode(s, pystr, idx, &next_idx);
1696    }
1697    else {
1698        PyErr_Format(PyExc_TypeError,
1699                 "first argument must be a string, not %.80s",
1700                 Py_TYPE(pystr)->tp_name);
1701        return NULL;
1702    }
1703    return _build_rval_index_tuple(rval, next_idx);
1704}
1705
1706static PyObject *
1707scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1708{
1709    PyScannerObject *s;
1710    s = (PyScannerObject *)type->tp_alloc(type, 0);
1711    if (s != NULL) {
1712        s->encoding = NULL;
1713        s->strict = NULL;
1714        s->object_hook = NULL;
1715        s->pairs_hook = NULL;
1716        s->parse_float = NULL;
1717        s->parse_int = NULL;
1718        s->parse_constant = NULL;
1719    }
1720    return (PyObject *)s;
1721}
1722
1723static int
1724scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1725{
1726    /* Initialize Scanner object */
1727    PyObject *ctx;
1728    static char *kwlist[] = {"context", NULL};
1729    PyScannerObject *s;
1730
1731    assert(PyScanner_Check(self));
1732    s = (PyScannerObject *)self;
1733
1734    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1735        return -1;
1736
1737    /* PyString_AS_STRING is used on encoding */
1738    s->encoding = PyObject_GetAttrString(ctx, "encoding");
1739    if (s->encoding == NULL)
1740        goto bail;
1741    if (s->encoding == Py_None) {
1742        Py_DECREF(Py_None);
1743        s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1744    }
1745    else if (PyUnicode_Check(s->encoding)) {
1746        PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1747        Py_SETREF(s->encoding, tmp);
1748    }
1749    if (s->encoding == NULL)
1750        goto bail;
1751    if (!PyString_Check(s->encoding)) {
1752	PyErr_Format(PyExc_TypeError,
1753		     "encoding must be a string, not %.80s",
1754		     Py_TYPE(s->encoding)->tp_name);
1755	goto bail;
1756    }
1757
1758
1759    /* All of these will fail "gracefully" so we don't need to verify them */
1760    s->strict = PyObject_GetAttrString(ctx, "strict");
1761    if (s->strict == NULL)
1762        goto bail;
1763    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1764    if (s->object_hook == NULL)
1765        goto bail;
1766    s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1767    if (s->pairs_hook == NULL)
1768        goto bail;
1769    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1770    if (s->parse_float == NULL)
1771        goto bail;
1772    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1773    if (s->parse_int == NULL)
1774        goto bail;
1775    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1776    if (s->parse_constant == NULL)
1777        goto bail;
1778
1779    return 0;
1780
1781bail:
1782    Py_CLEAR(s->encoding);
1783    Py_CLEAR(s->strict);
1784    Py_CLEAR(s->object_hook);
1785    Py_CLEAR(s->pairs_hook);
1786    Py_CLEAR(s->parse_float);
1787    Py_CLEAR(s->parse_int);
1788    Py_CLEAR(s->parse_constant);
1789    return -1;
1790}
1791
1792PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1793
1794static
1795PyTypeObject PyScannerType = {
1796    PyObject_HEAD_INIT(NULL)
1797    0,                    /* tp_internal */
1798    "_json.Scanner",       /* tp_name */
1799    sizeof(PyScannerObject), /* tp_basicsize */
1800    0,                    /* tp_itemsize */
1801    scanner_dealloc, /* tp_dealloc */
1802    0,                    /* tp_print */
1803    0,                    /* tp_getattr */
1804    0,                    /* tp_setattr */
1805    0,                    /* tp_compare */
1806    0,                    /* tp_repr */
1807    0,                    /* tp_as_number */
1808    0,                    /* tp_as_sequence */
1809    0,                    /* tp_as_mapping */
1810    0,                    /* tp_hash */
1811    scanner_call,         /* tp_call */
1812    0,                    /* tp_str */
1813    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1814    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1815    0,                    /* tp_as_buffer */
1816    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1817    scanner_doc,          /* tp_doc */
1818    scanner_traverse,                    /* tp_traverse */
1819    scanner_clear,                    /* tp_clear */
1820    0,                    /* tp_richcompare */
1821    0,                    /* tp_weaklistoffset */
1822    0,                    /* tp_iter */
1823    0,                    /* tp_iternext */
1824    0,                    /* tp_methods */
1825    scanner_members,                    /* tp_members */
1826    0,                    /* tp_getset */
1827    0,                    /* tp_base */
1828    0,                    /* tp_dict */
1829    0,                    /* tp_descr_get */
1830    0,                    /* tp_descr_set */
1831    0,                    /* tp_dictoffset */
1832    scanner_init,                    /* tp_init */
1833    0,/* PyType_GenericAlloc, */        /* tp_alloc */
1834    scanner_new,          /* tp_new */
1835    0,/* PyObject_GC_Del, */              /* tp_free */
1836};
1837
1838static PyObject *
1839encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1840{
1841    PyEncoderObject *s;
1842    s = (PyEncoderObject *)type->tp_alloc(type, 0);
1843    if (s != NULL) {
1844        s->markers = NULL;
1845        s->defaultfn = NULL;
1846        s->encoder = NULL;
1847        s->indent = NULL;
1848        s->key_separator = NULL;
1849        s->item_separator = NULL;
1850        s->sort_keys = NULL;
1851        s->skipkeys = NULL;
1852    }
1853    return (PyObject *)s;
1854}
1855
1856static int
1857encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1858{
1859    /* initialize Encoder object */
1860    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1861
1862    PyEncoderObject *s;
1863    PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1864    PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1865    int allow_nan;
1866
1867    assert(PyEncoder_Check(self));
1868    s = (PyEncoderObject *)self;
1869
1870    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1871        &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1872        &sort_keys, &skipkeys, &allow_nan_obj))
1873        return -1;
1874
1875    allow_nan = PyObject_IsTrue(allow_nan_obj);
1876    if (allow_nan < 0)
1877        return -1;
1878
1879    if (markers != Py_None && !PyDict_Check(markers)) {
1880        PyErr_Format(PyExc_TypeError,
1881                     "make_encoder() argument 1 must be dict or None, "
1882                     "not %.200s", Py_TYPE(markers)->tp_name);
1883        return -1;
1884    }
1885
1886    s->markers = markers;
1887    s->defaultfn = defaultfn;
1888    s->encoder = encoder;
1889    s->indent = indent;
1890    s->key_separator = key_separator;
1891    s->item_separator = item_separator;
1892    s->sort_keys = sort_keys;
1893    s->skipkeys = skipkeys;
1894    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1895    s->allow_nan = allow_nan;
1896
1897    Py_INCREF(s->markers);
1898    Py_INCREF(s->defaultfn);
1899    Py_INCREF(s->encoder);
1900    Py_INCREF(s->indent);
1901    Py_INCREF(s->key_separator);
1902    Py_INCREF(s->item_separator);
1903    Py_INCREF(s->sort_keys);
1904    Py_INCREF(s->skipkeys);
1905    return 0;
1906}
1907
1908static PyObject *
1909encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1910{
1911    /* Python callable interface to encode_listencode_obj */
1912    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1913    PyObject *obj;
1914    PyObject *rval;
1915    Py_ssize_t indent_level;
1916    PyEncoderObject *s;
1917    assert(PyEncoder_Check(self));
1918    s = (PyEncoderObject *)self;
1919    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1920        &obj, _convertPyInt_AsSsize_t, &indent_level))
1921        return NULL;
1922    rval = PyList_New(0);
1923    if (rval == NULL)
1924        return NULL;
1925    if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1926        Py_DECREF(rval);
1927        return NULL;
1928    }
1929    return rval;
1930}
1931
1932static PyObject *
1933_encoded_const(PyObject *obj)
1934{
1935    /* Return the JSON string representation of None, True, False */
1936    if (obj == Py_None) {
1937        static PyObject *s_null = NULL;
1938        if (s_null == NULL) {
1939            s_null = PyString_InternFromString("null");
1940        }
1941        Py_INCREF(s_null);
1942        return s_null;
1943    }
1944    else if (obj == Py_True) {
1945        static PyObject *s_true = NULL;
1946        if (s_true == NULL) {
1947            s_true = PyString_InternFromString("true");
1948        }
1949        Py_INCREF(s_true);
1950        return s_true;
1951    }
1952    else if (obj == Py_False) {
1953        static PyObject *s_false = NULL;
1954        if (s_false == NULL) {
1955            s_false = PyString_InternFromString("false");
1956        }
1957        Py_INCREF(s_false);
1958        return s_false;
1959    }
1960    else {
1961        PyErr_SetString(PyExc_ValueError, "not a const");
1962        return NULL;
1963    }
1964}
1965
1966static PyObject *
1967encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1968{
1969    /* Return the JSON representation of a PyFloat */
1970    double i = PyFloat_AS_DOUBLE(obj);
1971    if (!Py_IS_FINITE(i)) {
1972        if (!s->allow_nan) {
1973            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1974            return NULL;
1975        }
1976        if (i > 0) {
1977            return PyString_FromString("Infinity");
1978        }
1979        else if (i < 0) {
1980            return PyString_FromString("-Infinity");
1981        }
1982        else {
1983            return PyString_FromString("NaN");
1984        }
1985    }
1986    /* Make sure to use the base float class repr method */
1987    return PyFloat_Type.tp_repr(obj);
1988}
1989
1990static PyObject *
1991encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1992{
1993    /* Return the JSON representation of a string */
1994    if (s->fast_encode)
1995        return py_encode_basestring_ascii(NULL, obj);
1996    else
1997        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1998}
1999
2000static int
2001_steal_list_append(PyObject *lst, PyObject *stolen)
2002{
2003    /* Append stolen and then decrement its reference count */
2004    int rval = PyList_Append(lst, stolen);
2005    Py_DECREF(stolen);
2006    return rval;
2007}
2008
2009static int
2010encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2011{
2012    /* Encode Python object obj to a JSON term, rval is a PyList */
2013    PyObject *newobj;
2014    int rv;
2015
2016    if (obj == Py_None || obj == Py_True || obj == Py_False) {
2017        PyObject *cstr = _encoded_const(obj);
2018        if (cstr == NULL)
2019            return -1;
2020        return _steal_list_append(rval, cstr);
2021    }
2022    else if (PyString_Check(obj) || PyUnicode_Check(obj))
2023    {
2024        PyObject *encoded = encoder_encode_string(s, obj);
2025        if (encoded == NULL)
2026            return -1;
2027        return _steal_list_append(rval, encoded);
2028    }
2029    else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2030        PyObject *encoded = PyObject_Str(obj);
2031        if (encoded == NULL)
2032            return -1;
2033        return _steal_list_append(rval, encoded);
2034    }
2035    else if (PyFloat_Check(obj)) {
2036        PyObject *encoded = encoder_encode_float(s, obj);
2037        if (encoded == NULL)
2038            return -1;
2039        return _steal_list_append(rval, encoded);
2040    }
2041    else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2042        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2043            return -1;
2044        rv = encoder_listencode_list(s, rval, obj, indent_level);
2045        Py_LeaveRecursiveCall();
2046        return rv;
2047    }
2048    else if (PyDict_Check(obj)) {
2049        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2050            return -1;
2051        rv = encoder_listencode_dict(s, rval, obj, indent_level);
2052        Py_LeaveRecursiveCall();
2053        return rv;
2054    }
2055    else {
2056        PyObject *ident = NULL;
2057        if (s->markers != Py_None) {
2058            int has_key;
2059            ident = PyLong_FromVoidPtr(obj);
2060            if (ident == NULL)
2061                return -1;
2062            has_key = PyDict_Contains(s->markers, ident);
2063            if (has_key) {
2064                if (has_key != -1)
2065                    PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2066                Py_DECREF(ident);
2067                return -1;
2068            }
2069            if (PyDict_SetItem(s->markers, ident, obj)) {
2070                Py_DECREF(ident);
2071                return -1;
2072            }
2073        }
2074        newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2075        if (newobj == NULL) {
2076            Py_XDECREF(ident);
2077            return -1;
2078        }
2079
2080        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2081            return -1;
2082        rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2083        Py_LeaveRecursiveCall();
2084
2085        Py_DECREF(newobj);
2086        if (rv) {
2087            Py_XDECREF(ident);
2088            return -1;
2089        }
2090        if (ident != NULL) {
2091            if (PyDict_DelItem(s->markers, ident)) {
2092                Py_XDECREF(ident);
2093                return -1;
2094            }
2095            Py_XDECREF(ident);
2096        }
2097        return rv;
2098    }
2099}
2100
2101static int
2102encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2103{
2104    /* Encode Python dict dct a JSON term, rval is a PyList */
2105    static PyObject *open_dict = NULL;
2106    static PyObject *close_dict = NULL;
2107    static PyObject *empty_dict = NULL;
2108    PyObject *kstr = NULL;
2109    PyObject *ident = NULL;
2110    PyObject *key = NULL;
2111    PyObject *value = NULL;
2112    PyObject *it = NULL;
2113    int skipkeys;
2114    Py_ssize_t idx;
2115
2116    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2117        open_dict = PyString_InternFromString("{");
2118        close_dict = PyString_InternFromString("}");
2119        empty_dict = PyString_InternFromString("{}");
2120        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2121            return -1;
2122    }
2123    if (Py_SIZE(dct) == 0)
2124        return PyList_Append(rval, empty_dict);
2125
2126    if (s->markers != Py_None) {
2127        int has_key;
2128        ident = PyLong_FromVoidPtr(dct);
2129        if (ident == NULL)
2130            goto bail;
2131        has_key = PyDict_Contains(s->markers, ident);
2132        if (has_key) {
2133            if (has_key != -1)
2134                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2135            goto bail;
2136        }
2137        if (PyDict_SetItem(s->markers, ident, dct)) {
2138            goto bail;
2139        }
2140    }
2141
2142    if (PyList_Append(rval, open_dict))
2143        goto bail;
2144
2145    if (s->indent != Py_None) {
2146        /* TODO: DOES NOT RUN */
2147        indent_level += 1;
2148        /*
2149            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2150            separator = _item_separator + newline_indent
2151            buf += newline_indent
2152        */
2153    }
2154
2155    /* TODO: C speedup not implemented for sort_keys */
2156
2157    it = PyObject_GetIter(dct);
2158    if (it == NULL)
2159        goto bail;
2160    skipkeys = PyObject_IsTrue(s->skipkeys);
2161    if (skipkeys < 0)
2162        goto bail;
2163    idx = 0;
2164    while ((key = PyIter_Next(it)) != NULL) {
2165        PyObject *encoded;
2166
2167        if (PyString_Check(key) || PyUnicode_Check(key)) {
2168            Py_INCREF(key);
2169            kstr = key;
2170        }
2171        else if (PyFloat_Check(key)) {
2172            kstr = encoder_encode_float(s, key);
2173            if (kstr == NULL)
2174                goto bail;
2175        }
2176        else if (PyInt_Check(key) || PyLong_Check(key)) {
2177            kstr = PyObject_Str(key);
2178            if (kstr == NULL)
2179                goto bail;
2180        }
2181        else if (key == Py_True || key == Py_False || key == Py_None) {
2182            kstr = _encoded_const(key);
2183            if (kstr == NULL)
2184                goto bail;
2185        }
2186        else if (skipkeys) {
2187            Py_DECREF(key);
2188            continue;
2189        }
2190        else {
2191            /* TODO: include repr of key */
2192            PyErr_SetString(PyExc_TypeError, "keys must be a string");
2193            goto bail;
2194        }
2195
2196        if (idx) {
2197            if (PyList_Append(rval, s->item_separator))
2198                goto bail;
2199        }
2200
2201        value = PyObject_GetItem(dct, key);
2202        if (value == NULL)
2203            goto bail;
2204
2205        encoded = encoder_encode_string(s, kstr);
2206        Py_CLEAR(kstr);
2207        if (encoded == NULL)
2208            goto bail;
2209        if (PyList_Append(rval, encoded)) {
2210            Py_DECREF(encoded);
2211            goto bail;
2212        }
2213        Py_DECREF(encoded);
2214        if (PyList_Append(rval, s->key_separator))
2215            goto bail;
2216        if (encoder_listencode_obj(s, rval, value, indent_level))
2217            goto bail;
2218        idx += 1;
2219        Py_CLEAR(value);
2220        Py_DECREF(key);
2221    }
2222    if (PyErr_Occurred())
2223        goto bail;
2224    Py_CLEAR(it);
2225
2226    if (ident != NULL) {
2227        if (PyDict_DelItem(s->markers, ident))
2228            goto bail;
2229        Py_CLEAR(ident);
2230    }
2231    if (s->indent != Py_None) {
2232        /* TODO: DOES NOT RUN */
2233        /*
2234            indent_level -= 1;
2235
2236            yield '\n' + (' ' * (_indent * _current_indent_level))
2237        */
2238    }
2239    if (PyList_Append(rval, close_dict))
2240        goto bail;
2241    return 0;
2242
2243bail:
2244    Py_XDECREF(it);
2245    Py_XDECREF(key);
2246    Py_XDECREF(value);
2247    Py_XDECREF(kstr);
2248    Py_XDECREF(ident);
2249    return -1;
2250}
2251
2252
2253static int
2254encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2255{
2256    /* Encode Python list seq to a JSON term, rval is a PyList */
2257    static PyObject *open_array = NULL;
2258    static PyObject *close_array = NULL;
2259    static PyObject *empty_array = NULL;
2260    PyObject *ident = NULL;
2261    PyObject *s_fast = NULL;
2262    Py_ssize_t i;
2263
2264    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2265        open_array = PyString_InternFromString("[");
2266        close_array = PyString_InternFromString("]");
2267        empty_array = PyString_InternFromString("[]");
2268        if (open_array == NULL || close_array == NULL || empty_array == NULL)
2269            return -1;
2270    }
2271    ident = NULL;
2272    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2273    if (s_fast == NULL)
2274        return -1;
2275    if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
2276        Py_DECREF(s_fast);
2277        return PyList_Append(rval, empty_array);
2278    }
2279
2280    if (s->markers != Py_None) {
2281        int has_key;
2282        ident = PyLong_FromVoidPtr(seq);
2283        if (ident == NULL)
2284            goto bail;
2285        has_key = PyDict_Contains(s->markers, ident);
2286        if (has_key) {
2287            if (has_key != -1)
2288                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2289            goto bail;
2290        }
2291        if (PyDict_SetItem(s->markers, ident, seq)) {
2292            goto bail;
2293        }
2294    }
2295
2296    if (PyList_Append(rval, open_array))
2297        goto bail;
2298    if (s->indent != Py_None) {
2299        /* TODO: DOES NOT RUN */
2300        indent_level += 1;
2301        /*
2302            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2303            separator = _item_separator + newline_indent
2304            buf += newline_indent
2305        */
2306    }
2307    for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2308        PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
2309        if (i) {
2310            if (PyList_Append(rval, s->item_separator))
2311                goto bail;
2312        }
2313        if (encoder_listencode_obj(s, rval, obj, indent_level))
2314            goto bail;
2315    }
2316    if (ident != NULL) {
2317        if (PyDict_DelItem(s->markers, ident))
2318            goto bail;
2319        Py_CLEAR(ident);
2320    }
2321    if (s->indent != Py_None) {
2322        /* TODO: DOES NOT RUN */
2323        /*
2324            indent_level -= 1;
2325
2326            yield '\n' + (' ' * (_indent * _current_indent_level))
2327        */
2328    }
2329    if (PyList_Append(rval, close_array))
2330        goto bail;
2331    Py_DECREF(s_fast);
2332    return 0;
2333
2334bail:
2335    Py_XDECREF(ident);
2336    Py_DECREF(s_fast);
2337    return -1;
2338}
2339
2340static void
2341encoder_dealloc(PyObject *self)
2342{
2343    /* Deallocate Encoder */
2344    encoder_clear(self);
2345    Py_TYPE(self)->tp_free(self);
2346}
2347
2348static int
2349encoder_traverse(PyObject *self, visitproc visit, void *arg)
2350{
2351    PyEncoderObject *s;
2352    assert(PyEncoder_Check(self));
2353    s = (PyEncoderObject *)self;
2354    Py_VISIT(s->markers);
2355    Py_VISIT(s->defaultfn);
2356    Py_VISIT(s->encoder);
2357    Py_VISIT(s->indent);
2358    Py_VISIT(s->key_separator);
2359    Py_VISIT(s->item_separator);
2360    Py_VISIT(s->sort_keys);
2361    Py_VISIT(s->skipkeys);
2362    return 0;
2363}
2364
2365static int
2366encoder_clear(PyObject *self)
2367{
2368    /* Deallocate Encoder */
2369    PyEncoderObject *s;
2370    assert(PyEncoder_Check(self));
2371    s = (PyEncoderObject *)self;
2372    Py_CLEAR(s->markers);
2373    Py_CLEAR(s->defaultfn);
2374    Py_CLEAR(s->encoder);
2375    Py_CLEAR(s->indent);
2376    Py_CLEAR(s->key_separator);
2377    Py_CLEAR(s->item_separator);
2378    Py_CLEAR(s->sort_keys);
2379    Py_CLEAR(s->skipkeys);
2380    return 0;
2381}
2382
2383PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2384
2385static
2386PyTypeObject PyEncoderType = {
2387    PyObject_HEAD_INIT(NULL)
2388    0,                    /* tp_internal */
2389    "_json.Encoder",       /* tp_name */
2390    sizeof(PyEncoderObject), /* tp_basicsize */
2391    0,                    /* tp_itemsize */
2392    encoder_dealloc, /* tp_dealloc */
2393    0,                    /* tp_print */
2394    0,                    /* tp_getattr */
2395    0,                    /* tp_setattr */
2396    0,                    /* tp_compare */
2397    0,                    /* tp_repr */
2398    0,                    /* tp_as_number */
2399    0,                    /* tp_as_sequence */
2400    0,                    /* tp_as_mapping */
2401    0,                    /* tp_hash */
2402    encoder_call,         /* tp_call */
2403    0,                    /* tp_str */
2404    0,                    /* tp_getattro */
2405    0,                    /* tp_setattro */
2406    0,                    /* tp_as_buffer */
2407    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2408    encoder_doc,          /* tp_doc */
2409    encoder_traverse,     /* tp_traverse */
2410    encoder_clear,        /* tp_clear */
2411    0,                    /* tp_richcompare */
2412    0,                    /* tp_weaklistoffset */
2413    0,                    /* tp_iter */
2414    0,                    /* tp_iternext */
2415    0,                    /* tp_methods */
2416    encoder_members,      /* tp_members */
2417    0,                    /* tp_getset */
2418    0,                    /* tp_base */
2419    0,                    /* tp_dict */
2420    0,                    /* tp_descr_get */
2421    0,                    /* tp_descr_set */
2422    0,                    /* tp_dictoffset */
2423    encoder_init,         /* tp_init */
2424    0,                    /* tp_alloc */
2425    encoder_new,          /* tp_new */
2426    0,                    /* tp_free */
2427};
2428
2429static PyMethodDef speedups_methods[] = {
2430    {"encode_basestring_ascii",
2431        (PyCFunction)py_encode_basestring_ascii,
2432        METH_O,
2433        pydoc_encode_basestring_ascii},
2434    {"scanstring",
2435        (PyCFunction)py_scanstring,
2436        METH_VARARGS,
2437        pydoc_scanstring},
2438    {NULL, NULL, 0, NULL}
2439};
2440
2441PyDoc_STRVAR(module_doc,
2442"json speedups\n");
2443
2444void
2445init_json(void)
2446{
2447    PyObject *m;
2448    PyScannerType.tp_new = PyType_GenericNew;
2449    if (PyType_Ready(&PyScannerType) < 0)
2450        return;
2451    PyEncoderType.tp_new = PyType_GenericNew;
2452    if (PyType_Ready(&PyEncoderType) < 0)
2453        return;
2454    m = Py_InitModule3("_json", speedups_methods, module_doc);
2455    if (m == NULL)
2456        return;
2457    Py_INCREF((PyObject*)&PyScannerType);
2458    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2459    Py_INCREF((PyObject*)&PyEncoderType);
2460    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2461}
2462