1#include "Python.h"
2#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define DEFAULT_ENCODING "utf-8"
24
25#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34    PyObject_HEAD
35    PyObject *encoding;
36    PyObject *strict;
37    PyObject *object_hook;
38    PyObject *pairs_hook;
39    PyObject *parse_float;
40    PyObject *parse_int;
41    PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52    {NULL}
53};
54
55typedef struct _PyEncoderObject {
56    PyObject_HEAD
57    PyObject *markers;
58    PyObject *defaultfn;
59    PyObject *encoder;
60    PyObject *indent;
61    PyObject *key_separator;
62    PyObject *item_separator;
63    PyObject *sort_keys;
64    PyObject *skipkeys;
65    int fast_encode;
66    int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78    {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
119_encoded_const(PyObject *obj);
120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144    /* PyObject to Py_ssize_t converter */
145    *size_ptr = PyInt_AsSsize_t(o);
146    if (*size_ptr == -1 && PyErr_Occurred())
147        return 0;
148    return 1;
149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154    /* Py_ssize_t to PyObject converter */
155    return PyInt_FromSsize_t(*size_ptr);
156}
157
158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
161    /* Escape unicode code point c to ASCII escape sequences
162    in char *output. output must have at least 12 bytes unused to
163    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164    output[chars++] = '\\';
165    switch (c) {
166        case '\\': output[chars++] = (char)c; break;
167        case '"': output[chars++] = (char)c; break;
168        case '\b': output[chars++] = 'b'; break;
169        case '\f': output[chars++] = 'f'; break;
170        case '\n': output[chars++] = 'n'; break;
171        case '\r': output[chars++] = 'r'; break;
172        case '\t': output[chars++] = 't'; break;
173        default:
174#ifdef Py_UNICODE_WIDE
175            if (c >= 0x10000) {
176                /* UTF-16 surrogate pair */
177                Py_UNICODE v = c - 0x10000;
178                c = 0xd800 | ((v >> 10) & 0x3ff);
179                output[chars++] = 'u';
180                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
182                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
183                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
184                c = 0xdc00 | (v & 0x3ff);
185                output[chars++] = '\\';
186            }
187#endif
188            output[chars++] = 'u';
189            output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190            output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
191            output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
192            output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
193    }
194    return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
200    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201    Py_ssize_t i;
202    Py_ssize_t input_chars;
203    Py_ssize_t output_size;
204    Py_ssize_t max_output_size;
205    Py_ssize_t chars;
206    PyObject *rval;
207    char *output;
208    Py_UNICODE *input_unicode;
209
210    input_chars = PyUnicode_GET_SIZE(pystr);
211    input_unicode = PyUnicode_AS_UNICODE(pystr);
212
213    /* One char input can be up to 6 chars output, estimate 4 of these */
214    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
215    max_output_size = 2 + (input_chars * MAX_EXPANSION);
216    rval = PyString_FromStringAndSize(NULL, output_size);
217    if (rval == NULL) {
218        return NULL;
219    }
220    output = PyString_AS_STRING(rval);
221    chars = 0;
222    output[chars++] = '"';
223    for (i = 0; i < input_chars; i++) {
224        Py_UNICODE c = input_unicode[i];
225        if (S_CHAR(c)) {
226            output[chars++] = (char)c;
227        }
228        else {
229            chars = ascii_escape_char(c, output, chars);
230        }
231        if (output_size - chars < (1 + MAX_EXPANSION)) {
232            /* There's more than four, so let's resize by a lot */
233            Py_ssize_t new_output_size = output_size * 2;
234            /* This is an upper bound */
235            if (new_output_size > max_output_size) {
236                new_output_size = max_output_size;
237            }
238            /* Make sure that the output size changed before resizing */
239            if (new_output_size != output_size) {
240                output_size = new_output_size;
241                if (_PyString_Resize(&rval, output_size) == -1) {
242                    return NULL;
243                }
244                output = PyString_AS_STRING(rval);
245            }
246        }
247    }
248    output[chars++] = '"';
249    if (_PyString_Resize(&rval, chars) == -1) {
250        return NULL;
251    }
252    return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
258    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
259    Py_ssize_t i;
260    Py_ssize_t input_chars;
261    Py_ssize_t output_size;
262    Py_ssize_t chars;
263    PyObject *rval;
264    char *output;
265    char *input_str;
266
267    input_chars = PyString_GET_SIZE(pystr);
268    input_str = PyString_AS_STRING(pystr);
269
270    /* Fast path for a string that's already ASCII */
271    for (i = 0; i < input_chars; i++) {
272        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273        if (!S_CHAR(c)) {
274            /* If we have to escape something, scan the string for unicode */
275            Py_ssize_t j;
276            for (j = i; j < input_chars; j++) {
277                c = (Py_UNICODE)(unsigned char)input_str[j];
278                if (c > 0x7f) {
279                    /* We hit a non-ASCII character, bail to unicode mode */
280                    PyObject *uni;
281                    uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282                    if (uni == NULL) {
283                        return NULL;
284                    }
285                    rval = ascii_escape_unicode(uni);
286                    Py_DECREF(uni);
287                    return rval;
288                }
289            }
290            break;
291        }
292    }
293
294    if (i == input_chars) {
295        /* Input is already ASCII */
296        output_size = 2 + input_chars;
297    }
298    else {
299        /* One char input can be up to 6 chars output, estimate 4 of these */
300        output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301    }
302    rval = PyString_FromStringAndSize(NULL, output_size);
303    if (rval == NULL) {
304        return NULL;
305    }
306    output = PyString_AS_STRING(rval);
307    output[0] = '"';
308
309    /* We know that everything up to i is ASCII already */
310    chars = i + 1;
311    memcpy(&output[1], input_str, i);
312
313    for (; i < input_chars; i++) {
314        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
315        if (S_CHAR(c)) {
316            output[chars++] = (char)c;
317        }
318        else {
319            chars = ascii_escape_char(c, output, chars);
320        }
321        /* An ASCII char can't possibly expand to a surrogate! */
322        if (output_size - chars < (1 + MIN_EXPANSION)) {
323            /* There's more than four, so let's resize by a lot */
324            output_size *= 2;
325            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326                output_size = 2 + (input_chars * MIN_EXPANSION);
327            }
328            if (_PyString_Resize(&rval, output_size) == -1) {
329                return NULL;
330            }
331            output = PyString_AS_STRING(rval);
332        }
333    }
334    output[chars++] = '"';
335    if (_PyString_Resize(&rval, chars) == -1) {
336        return NULL;
337    }
338    return rval;
339}
340
341static void
342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
344    /* Use the Python function json.decoder.errmsg to raise a nice
345    looking ValueError exception */
346    static PyObject *errmsg_fn = NULL;
347    PyObject *pymsg;
348    if (errmsg_fn == NULL) {
349        PyObject *decoder = PyImport_ImportModule("json.decoder");
350        if (decoder == NULL)
351            return;
352        errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
353        Py_DECREF(decoder);
354        if (errmsg_fn == NULL)
355            return;
356    }
357    pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
358    if (pymsg) {
359        PyErr_SetObject(PyExc_ValueError, pymsg);
360        Py_DECREF(pymsg);
361    }
362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
367    /* return u''.join(lst) */
368    static PyObject *joinfn = NULL;
369    if (joinfn == NULL) {
370        PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371        if (ustr == NULL)
372            return NULL;
373
374        joinfn = PyObject_GetAttrString(ustr, "join");
375        Py_DECREF(ustr);
376        if (joinfn == NULL)
377            return NULL;
378    }
379    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
380}
381
382static PyObject *
383_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384    /* return (rval, idx) tuple, stealing reference to rval */
385    PyObject *tpl;
386    PyObject *pyidx;
387    /*
388    steal a reference to rval, returns (rval, idx)
389    */
390    if (rval == NULL) {
391        return NULL;
392    }
393    pyidx = PyInt_FromSsize_t(idx);
394    if (pyidx == NULL) {
395        Py_DECREF(rval);
396        return NULL;
397    }
398    tpl = PyTuple_New(2);
399    if (tpl == NULL) {
400        Py_DECREF(pyidx);
401        Py_DECREF(rval);
402        return NULL;
403    }
404    PyTuple_SET_ITEM(tpl, 0, rval);
405    PyTuple_SET_ITEM(tpl, 1, pyidx);
406    return tpl;
407}
408
409static PyObject *
410scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411{
412    /* Read the JSON string from PyString pystr.
413    end is the index of the first character after the quote.
414    encoding is the encoding of pystr (must be an ASCII superset)
415    if strict is zero then literal control characters are allowed
416    *next_end_ptr is a return-by-reference index of the character
417        after the end quote
418
419    Return value is a new PyString (if ASCII-only) or PyUnicode
420    */
421    PyObject *rval;
422    Py_ssize_t len = PyString_GET_SIZE(pystr);
423    Py_ssize_t begin = end - 1;
424    Py_ssize_t next;
425    char *buf = PyString_AS_STRING(pystr);
426    PyObject *chunks = PyList_New(0);
427    if (chunks == NULL) {
428        goto bail;
429    }
430    if (end < 0 || len <= end) {
431        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432        goto bail;
433    }
434    while (1) {
435        /* Find the end of the string or the next escape */
436        Py_UNICODE c = 0;
437        PyObject *chunk = NULL;
438        for (next = end; next < len; next++) {
439            c = (unsigned char)buf[next];
440            if (c == '"' || c == '\\') {
441                break;
442            }
443            else if (strict && c <= 0x1f) {
444                raise_errmsg("Invalid control character at", pystr, next);
445                goto bail;
446            }
447        }
448        if (!(c == '"' || c == '\\')) {
449            raise_errmsg("Unterminated string starting at", pystr, begin);
450            goto bail;
451        }
452        /* Pick up this chunk if it's not zero length */
453        if (next != end) {
454            PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
455            if (strchunk == NULL) {
456                goto bail;
457            }
458            chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459            Py_DECREF(strchunk);
460            if (chunk == NULL) {
461                goto bail;
462            }
463            if (PyList_Append(chunks, chunk)) {
464                Py_DECREF(chunk);
465                goto bail;
466            }
467            Py_DECREF(chunk);
468        }
469        next++;
470        if (c == '"') {
471            end = next;
472            break;
473        }
474        if (next == len) {
475            raise_errmsg("Unterminated string starting at", pystr, begin);
476            goto bail;
477        }
478        c = buf[next];
479        if (c != 'u') {
480            /* Non-unicode backslash escapes */
481            end = next + 1;
482            switch (c) {
483                case '"': break;
484                case '\\': break;
485                case '/': break;
486                case 'b': c = '\b'; break;
487                case 'f': c = '\f'; break;
488                case 'n': c = '\n'; break;
489                case 'r': c = '\r'; break;
490                case 't': c = '\t'; break;
491                default: c = 0;
492            }
493            if (c == 0) {
494                raise_errmsg("Invalid \\escape", pystr, end - 2);
495                goto bail;
496            }
497        }
498        else {
499            c = 0;
500            next++;
501            end = next + 4;
502            if (end >= len) {
503                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504                goto bail;
505            }
506            /* Decode 4 hex digits */
507            for (; next < end; next++) {
508                Py_UNICODE digit = buf[next];
509                c <<= 4;
510                switch (digit) {
511                    case '0': case '1': case '2': case '3': case '4':
512                    case '5': case '6': case '7': case '8': case '9':
513                        c |= (digit - '0'); break;
514                    case 'a': case 'b': case 'c': case 'd': case 'e':
515                    case 'f':
516                        c |= (digit - 'a' + 10); break;
517                    case 'A': case 'B': case 'C': case 'D': case 'E':
518                    case 'F':
519                        c |= (digit - 'A' + 10); break;
520                    default:
521                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522                        goto bail;
523                }
524            }
525#ifdef Py_UNICODE_WIDE
526            /* Surrogate pair */
527            if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
528                buf[next++] == '\\' &&
529                buf[next++] == 'u') {
530                Py_UNICODE c2 = 0;
531                end += 6;
532                /* Decode 4 hex digits */
533                for (; next < end; next++) {
534                    Py_UNICODE digit = buf[next];
535                    c2 <<= 4;
536                    switch (digit) {
537                        case '0': case '1': case '2': case '3': case '4':
538                        case '5': case '6': case '7': case '8': case '9':
539                            c2 |= (digit - '0'); break;
540                        case 'a': case 'b': case 'c': case 'd': case 'e':
541                        case 'f':
542                            c2 |= (digit - 'a' + 10); break;
543                        case 'A': case 'B': case 'C': case 'D': case 'E':
544                        case 'F':
545                            c2 |= (digit - 'A' + 10); break;
546                        default:
547                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
548                            goto bail;
549                    }
550                }
551                if ((c2 & 0xfc00) == 0xdc00)
552                    c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
553                else
554                    end -= 6;
555            }
556#endif
557        }
558        chunk = PyUnicode_FromUnicode(&c, 1);
559        if (chunk == NULL) {
560            goto bail;
561        }
562        if (PyList_Append(chunks, chunk)) {
563            Py_DECREF(chunk);
564            goto bail;
565        }
566        Py_DECREF(chunk);
567    }
568
569    rval = join_list_unicode(chunks);
570    if (rval == NULL) {
571        goto bail;
572    }
573    Py_CLEAR(chunks);
574    *next_end_ptr = end;
575    return rval;
576bail:
577    *next_end_ptr = -1;
578    Py_XDECREF(chunks);
579    return NULL;
580}
581
582
583static PyObject *
584scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
585{
586    /* Read the JSON string from PyUnicode pystr.
587    end is the index of the first character after the quote.
588    if strict is zero then literal control characters are allowed
589    *next_end_ptr is a return-by-reference index of the character
590        after the end quote
591
592    Return value is a new PyUnicode
593    */
594    PyObject *rval;
595    Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
596    Py_ssize_t begin = end - 1;
597    Py_ssize_t next;
598    const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
599    PyObject *chunks = PyList_New(0);
600    if (chunks == NULL) {
601        goto bail;
602    }
603    if (end < 0 || len <= end) {
604        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
605        goto bail;
606    }
607    while (1) {
608        /* Find the end of the string or the next escape */
609        Py_UNICODE c = 0;
610        PyObject *chunk = NULL;
611        for (next = end; next < len; next++) {
612            c = buf[next];
613            if (c == '"' || c == '\\') {
614                break;
615            }
616            else if (strict && c <= 0x1f) {
617                raise_errmsg("Invalid control character at", pystr, next);
618                goto bail;
619            }
620        }
621        if (!(c == '"' || c == '\\')) {
622            raise_errmsg("Unterminated string starting at", pystr, begin);
623            goto bail;
624        }
625        /* Pick up this chunk if it's not zero length */
626        if (next != end) {
627            chunk = PyUnicode_FromUnicode(&buf[end], next - end);
628            if (chunk == NULL) {
629                goto bail;
630            }
631            if (PyList_Append(chunks, chunk)) {
632                Py_DECREF(chunk);
633                goto bail;
634            }
635            Py_DECREF(chunk);
636        }
637        next++;
638        if (c == '"') {
639            end = next;
640            break;
641        }
642        if (next == len) {
643            raise_errmsg("Unterminated string starting at", pystr, begin);
644            goto bail;
645        }
646        c = buf[next];
647        if (c != 'u') {
648            /* Non-unicode backslash escapes */
649            end = next + 1;
650            switch (c) {
651                case '"': break;
652                case '\\': break;
653                case '/': break;
654                case 'b': c = '\b'; break;
655                case 'f': c = '\f'; break;
656                case 'n': c = '\n'; break;
657                case 'r': c = '\r'; break;
658                case 't': c = '\t'; break;
659                default: c = 0;
660            }
661            if (c == 0) {
662                raise_errmsg("Invalid \\escape", pystr, end - 2);
663                goto bail;
664            }
665        }
666        else {
667            c = 0;
668            next++;
669            end = next + 4;
670            if (end >= len) {
671                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
672                goto bail;
673            }
674            /* Decode 4 hex digits */
675            for (; next < end; next++) {
676                Py_UNICODE digit = buf[next];
677                c <<= 4;
678                switch (digit) {
679                    case '0': case '1': case '2': case '3': case '4':
680                    case '5': case '6': case '7': case '8': case '9':
681                        c |= (digit - '0'); break;
682                    case 'a': case 'b': case 'c': case 'd': case 'e':
683                    case 'f':
684                        c |= (digit - 'a' + 10); break;
685                    case 'A': case 'B': case 'C': case 'D': case 'E':
686                    case 'F':
687                        c |= (digit - 'A' + 10); break;
688                    default:
689                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
690                        goto bail;
691                }
692            }
693#ifdef Py_UNICODE_WIDE
694            /* Surrogate pair */
695            if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
696                buf[next++] == '\\' && buf[next++] == 'u') {
697                Py_UNICODE c2 = 0;
698                end += 6;
699                /* Decode 4 hex digits */
700                for (; next < end; next++) {
701                    Py_UNICODE digit = buf[next];
702                    c2 <<= 4;
703                    switch (digit) {
704                        case '0': case '1': case '2': case '3': case '4':
705                        case '5': case '6': case '7': case '8': case '9':
706                            c2 |= (digit - '0'); break;
707                        case 'a': case 'b': case 'c': case 'd': case 'e':
708                        case 'f':
709                            c2 |= (digit - 'a' + 10); break;
710                        case 'A': case 'B': case 'C': case 'D': case 'E':
711                        case 'F':
712                            c2 |= (digit - 'A' + 10); break;
713                        default:
714                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
715                            goto bail;
716                    }
717                }
718                if ((c2 & 0xfc00) == 0xdc00)
719                    c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
720                else
721                    end -= 6;
722            }
723#endif
724        }
725        chunk = PyUnicode_FromUnicode(&c, 1);
726        if (chunk == NULL) {
727            goto bail;
728        }
729        if (PyList_Append(chunks, chunk)) {
730            Py_DECREF(chunk);
731            goto bail;
732        }
733        Py_DECREF(chunk);
734    }
735
736    rval = join_list_unicode(chunks);
737    if (rval == NULL) {
738        goto bail;
739    }
740    Py_DECREF(chunks);
741    *next_end_ptr = end;
742    return rval;
743bail:
744    *next_end_ptr = -1;
745    Py_XDECREF(chunks);
746    return NULL;
747}
748
749PyDoc_STRVAR(pydoc_scanstring,
750    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
751    "\n"
752    "Scan the string s for a JSON string. End is the index of the\n"
753    "character in s after the quote that started the JSON string.\n"
754    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
755    "on attempt to decode an invalid string. If strict is False then literal\n"
756    "control characters are allowed in the string.\n"
757    "\n"
758    "Returns a tuple of the decoded string and the index of the character in s\n"
759    "after the end quote."
760);
761
762static PyObject *
763py_scanstring(PyObject* self UNUSED, PyObject *args)
764{
765    PyObject *pystr;
766    PyObject *rval;
767    Py_ssize_t end;
768    Py_ssize_t next_end = -1;
769    char *encoding = NULL;
770    int strict = 1;
771    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
772        return NULL;
773    }
774    if (encoding == NULL) {
775        encoding = DEFAULT_ENCODING;
776    }
777    if (PyString_Check(pystr)) {
778        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
779    }
780    else if (PyUnicode_Check(pystr)) {
781        rval = scanstring_unicode(pystr, end, strict, &next_end);
782    }
783    else {
784        PyErr_Format(PyExc_TypeError,
785                     "first argument must be a string, not %.80s",
786                     Py_TYPE(pystr)->tp_name);
787        return NULL;
788    }
789    return _build_rval_index_tuple(rval, next_end);
790}
791
792PyDoc_STRVAR(pydoc_encode_basestring_ascii,
793    "encode_basestring_ascii(basestring) -> str\n"
794    "\n"
795    "Return an ASCII-only JSON representation of a Python string"
796);
797
798static PyObject *
799py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
800{
801    /* Return an ASCII-only JSON representation of a Python string */
802    /* METH_O */
803    if (PyString_Check(pystr)) {
804        return ascii_escape_str(pystr);
805    }
806    else if (PyUnicode_Check(pystr)) {
807        return ascii_escape_unicode(pystr);
808    }
809    else {
810        PyErr_Format(PyExc_TypeError,
811                     "first argument must be a string, not %.80s",
812                     Py_TYPE(pystr)->tp_name);
813        return NULL;
814    }
815}
816
817static void
818scanner_dealloc(PyObject *self)
819{
820    /* Deallocate scanner object */
821    scanner_clear(self);
822    Py_TYPE(self)->tp_free(self);
823}
824
825static int
826scanner_traverse(PyObject *self, visitproc visit, void *arg)
827{
828    PyScannerObject *s;
829    assert(PyScanner_Check(self));
830    s = (PyScannerObject *)self;
831    Py_VISIT(s->encoding);
832    Py_VISIT(s->strict);
833    Py_VISIT(s->object_hook);
834    Py_VISIT(s->pairs_hook);
835    Py_VISIT(s->parse_float);
836    Py_VISIT(s->parse_int);
837    Py_VISIT(s->parse_constant);
838    return 0;
839}
840
841static int
842scanner_clear(PyObject *self)
843{
844    PyScannerObject *s;
845    assert(PyScanner_Check(self));
846    s = (PyScannerObject *)self;
847    Py_CLEAR(s->encoding);
848    Py_CLEAR(s->strict);
849    Py_CLEAR(s->object_hook);
850    Py_CLEAR(s->pairs_hook);
851    Py_CLEAR(s->parse_float);
852    Py_CLEAR(s->parse_int);
853    Py_CLEAR(s->parse_constant);
854    return 0;
855}
856
857static PyObject *
858_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
859    /* Read a JSON object from PyString pystr.
860    idx is the index of the first character after the opening curly brace.
861    *next_idx_ptr is a return-by-reference index to the first character after
862        the closing curly brace.
863
864    Returns a new PyObject (usually a dict, but object_hook can change that)
865    */
866    char *str = PyString_AS_STRING(pystr);
867    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
868    PyObject *rval;
869    PyObject *pairs;
870    PyObject *item;
871    PyObject *key = NULL;
872    PyObject *val = NULL;
873    char *encoding = PyString_AS_STRING(s->encoding);
874    int strict = PyObject_IsTrue(s->strict);
875    Py_ssize_t next_idx;
876
877    pairs = PyList_New(0);
878    if (pairs == NULL)
879        return NULL;
880
881    /* skip whitespace after { */
882    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
883
884    /* only loop if the object is non-empty */
885    if (idx <= end_idx && str[idx] != '}') {
886        while (idx <= end_idx) {
887            /* read key */
888            if (str[idx] != '"') {
889                raise_errmsg("Expecting property name", pystr, idx);
890                goto bail;
891            }
892            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
893            if (key == NULL)
894                goto bail;
895            idx = next_idx;
896
897            /* skip whitespace between key and : delimiter, read :, skip whitespace */
898            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
899            if (idx > end_idx || str[idx] != ':') {
900                raise_errmsg("Expecting : delimiter", pystr, idx);
901                goto bail;
902            }
903            idx++;
904            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
905
906            /* read any JSON data type */
907            val = scan_once_str(s, pystr, idx, &next_idx);
908            if (val == NULL)
909                goto bail;
910
911            item = PyTuple_Pack(2, key, val);
912            if (item == NULL)
913                goto bail;
914            Py_CLEAR(key);
915            Py_CLEAR(val);
916            if (PyList_Append(pairs, item) == -1) {
917                Py_DECREF(item);
918                goto bail;
919            }
920            Py_DECREF(item);
921            idx = next_idx;
922
923            /* skip whitespace before } or , */
924            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
925
926            /* bail if the object is closed or we didn't get the , delimiter */
927            if (idx > end_idx) break;
928            if (str[idx] == '}') {
929                break;
930            }
931            else if (str[idx] != ',') {
932                raise_errmsg("Expecting , delimiter", pystr, idx);
933                goto bail;
934            }
935            idx++;
936
937            /* skip whitespace after , delimiter */
938            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
939        }
940    }
941    /* verify that idx < end_idx, str[idx] should be '}' */
942    if (idx > end_idx || str[idx] != '}') {
943        raise_errmsg("Expecting object", pystr, end_idx);
944        goto bail;
945    }
946
947    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
948    if (s->pairs_hook != Py_None) {
949        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
950        if (val == NULL)
951            goto bail;
952        Py_DECREF(pairs);
953        *next_idx_ptr = idx + 1;
954        return val;
955    }
956
957    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
958                                         pairs, NULL);
959    if (rval == NULL)
960        goto bail;
961    Py_CLEAR(pairs);
962
963    /* if object_hook is not None: rval = object_hook(rval) */
964    if (s->object_hook != Py_None) {
965        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
966        if (val == NULL)
967            goto bail;
968        Py_DECREF(rval);
969        rval = val;
970        val = NULL;
971    }
972    *next_idx_ptr = idx + 1;
973    return rval;
974bail:
975    Py_XDECREF(key);
976    Py_XDECREF(val);
977    Py_XDECREF(pairs);
978    return NULL;
979}
980
981static PyObject *
982_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
983    /* Read a JSON object from PyUnicode pystr.
984    idx is the index of the first character after the opening curly brace.
985    *next_idx_ptr is a return-by-reference index to the first character after
986        the closing curly brace.
987
988    Returns a new PyObject (usually a dict, but object_hook can change that)
989    */
990    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
991    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
992    PyObject *rval;
993    PyObject *pairs;
994    PyObject *item;
995    PyObject *key = NULL;
996    PyObject *val = NULL;
997    int strict = PyObject_IsTrue(s->strict);
998    Py_ssize_t next_idx;
999
1000    pairs = PyList_New(0);
1001    if (pairs == NULL)
1002        return NULL;
1003
1004    /* skip whitespace after { */
1005    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1006
1007    /* only loop if the object is non-empty */
1008    if (idx <= end_idx && str[idx] != '}') {
1009        while (idx <= end_idx) {
1010            /* read key */
1011            if (str[idx] != '"') {
1012                raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
1013                goto bail;
1014            }
1015            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1016            if (key == NULL)
1017                goto bail;
1018            idx = next_idx;
1019
1020            /* skip whitespace between key and : delimiter, read :, skip whitespace */
1021            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1022            if (idx > end_idx || str[idx] != ':') {
1023                raise_errmsg("Expecting ':' delimiter", pystr, idx);
1024                goto bail;
1025            }
1026            idx++;
1027            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1028
1029            /* read any JSON term */
1030            val = scan_once_unicode(s, pystr, idx, &next_idx);
1031            if (val == NULL)
1032                goto bail;
1033
1034            item = PyTuple_Pack(2, key, val);
1035            if (item == NULL)
1036                goto bail;
1037            Py_CLEAR(key);
1038            Py_CLEAR(val);
1039            if (PyList_Append(pairs, item) == -1) {
1040                Py_DECREF(item);
1041                goto bail;
1042            }
1043            Py_DECREF(item);
1044            idx = next_idx;
1045
1046            /* skip whitespace before } or , */
1047            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1048
1049            /* bail if the object is closed or we didn't get the , delimiter */
1050            if (idx > end_idx) break;
1051            if (str[idx] == '}') {
1052                break;
1053            }
1054            else if (str[idx] != ',') {
1055                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1056                goto bail;
1057            }
1058            idx++;
1059
1060            /* skip whitespace after , delimiter */
1061            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1062        }
1063    }
1064
1065    /* verify that idx < end_idx, str[idx] should be '}' */
1066    if (idx > end_idx || str[idx] != '}') {
1067        raise_errmsg("Expecting object", pystr, end_idx);
1068        goto bail;
1069    }
1070
1071    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1072    if (s->pairs_hook != Py_None) {
1073        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1074        if (val == NULL)
1075            goto bail;
1076        Py_DECREF(pairs);
1077        *next_idx_ptr = idx + 1;
1078        return val;
1079    }
1080
1081    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1082                                         pairs, NULL);
1083    if (rval == NULL)
1084        goto bail;
1085    Py_CLEAR(pairs);
1086
1087    /* if object_hook is not None: rval = object_hook(rval) */
1088    if (s->object_hook != Py_None) {
1089        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1090        if (val == NULL)
1091            goto bail;
1092        Py_DECREF(rval);
1093        rval = val;
1094        val = NULL;
1095    }
1096    *next_idx_ptr = idx + 1;
1097    return rval;
1098bail:
1099    Py_XDECREF(key);
1100    Py_XDECREF(val);
1101    Py_XDECREF(pairs);
1102    return NULL;
1103}
1104
1105static PyObject *
1106_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1107    /* Read a JSON array from PyString pystr.
1108    idx is the index of the first character after the opening brace.
1109    *next_idx_ptr is a return-by-reference index to the first character after
1110        the closing brace.
1111
1112    Returns a new PyList
1113    */
1114    char *str = PyString_AS_STRING(pystr);
1115    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1116    PyObject *val = NULL;
1117    PyObject *rval = PyList_New(0);
1118    Py_ssize_t next_idx;
1119    if (rval == NULL)
1120        return NULL;
1121
1122    /* skip whitespace after [ */
1123    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1124
1125    /* only loop if the array is non-empty */
1126    if (idx <= end_idx && str[idx] != ']') {
1127        while (idx <= end_idx) {
1128
1129            /* read any JSON term and de-tuplefy the (rval, idx) */
1130            val = scan_once_str(s, pystr, idx, &next_idx);
1131            if (val == NULL)
1132                goto bail;
1133
1134            if (PyList_Append(rval, val) == -1)
1135                goto bail;
1136
1137            Py_CLEAR(val);
1138            idx = next_idx;
1139
1140            /* skip whitespace between term and , */
1141            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1142
1143            /* bail if the array is closed or we didn't get the , delimiter */
1144            if (idx > end_idx) break;
1145            if (str[idx] == ']') {
1146                break;
1147            }
1148            else if (str[idx] != ',') {
1149                raise_errmsg("Expecting , delimiter", pystr, idx);
1150                goto bail;
1151            }
1152            idx++;
1153
1154            /* skip whitespace after , */
1155            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1156        }
1157    }
1158
1159    /* verify that idx < end_idx, str[idx] should be ']' */
1160    if (idx > end_idx || str[idx] != ']') {
1161        raise_errmsg("Expecting object", pystr, end_idx);
1162        goto bail;
1163    }
1164    *next_idx_ptr = idx + 1;
1165    return rval;
1166bail:
1167    Py_XDECREF(val);
1168    Py_DECREF(rval);
1169    return NULL;
1170}
1171
1172static PyObject *
1173_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1174    /* Read a JSON array from PyString pystr.
1175    idx is the index of the first character after the opening brace.
1176    *next_idx_ptr is a return-by-reference index to the first character after
1177        the closing brace.
1178
1179    Returns a new PyList
1180    */
1181    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1182    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1183    PyObject *val = NULL;
1184    PyObject *rval = PyList_New(0);
1185    Py_ssize_t next_idx;
1186    if (rval == NULL)
1187        return NULL;
1188
1189    /* skip whitespace after [ */
1190    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1191
1192    /* only loop if the array is non-empty */
1193    if (idx <= end_idx && str[idx] != ']') {
1194        while (idx <= end_idx) {
1195
1196            /* read any JSON term  */
1197            val = scan_once_unicode(s, pystr, idx, &next_idx);
1198            if (val == NULL)
1199                goto bail;
1200
1201            if (PyList_Append(rval, val) == -1)
1202                goto bail;
1203
1204            Py_CLEAR(val);
1205            idx = next_idx;
1206
1207            /* skip whitespace between term and , */
1208            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1209
1210            /* bail if the array is closed or we didn't get the , delimiter */
1211            if (idx > end_idx) break;
1212            if (str[idx] == ']') {
1213                break;
1214            }
1215            else if (str[idx] != ',') {
1216                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1217                goto bail;
1218            }
1219            idx++;
1220
1221            /* skip whitespace after , */
1222            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1223        }
1224    }
1225
1226    /* verify that idx < end_idx, str[idx] should be ']' */
1227    if (idx > end_idx || str[idx] != ']') {
1228        raise_errmsg("Expecting object", pystr, end_idx);
1229        goto bail;
1230    }
1231    *next_idx_ptr = idx + 1;
1232    return rval;
1233bail:
1234    Py_XDECREF(val);
1235    Py_DECREF(rval);
1236    return NULL;
1237}
1238
1239static PyObject *
1240_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1241    /* Read a JSON constant from PyString pystr.
1242    constant is the constant string that was found
1243        ("NaN", "Infinity", "-Infinity").
1244    idx is the index of the first character of the constant
1245    *next_idx_ptr is a return-by-reference index to the first character after
1246        the constant.
1247
1248    Returns the result of parse_constant
1249    */
1250    PyObject *cstr;
1251    PyObject *rval;
1252    /* constant is "NaN", "Infinity", or "-Infinity" */
1253    cstr = PyString_InternFromString(constant);
1254    if (cstr == NULL)
1255        return NULL;
1256
1257    /* rval = parse_constant(constant) */
1258    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1259    idx += PyString_GET_SIZE(cstr);
1260    Py_DECREF(cstr);
1261    *next_idx_ptr = idx;
1262    return rval;
1263}
1264
1265static PyObject *
1266_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1267    /* Read a JSON number from PyString pystr.
1268    idx is the index of the first character of the number
1269    *next_idx_ptr is a return-by-reference index to the first character after
1270        the number.
1271
1272    Returns a new PyObject representation of that number:
1273        PyInt, PyLong, or PyFloat.
1274        May return other types if parse_int or parse_float are set
1275    */
1276    char *str = PyString_AS_STRING(pystr);
1277    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1278    Py_ssize_t idx = start;
1279    int is_float = 0;
1280    PyObject *rval;
1281    PyObject *numstr;
1282
1283    /* read a sign if it's there, make sure it's not the end of the string */
1284    if (str[idx] == '-') {
1285        idx++;
1286        if (idx > end_idx) {
1287            PyErr_SetNone(PyExc_StopIteration);
1288            return NULL;
1289        }
1290    }
1291
1292    /* read as many integer digits as we find as long as it doesn't start with 0 */
1293    if (str[idx] >= '1' && str[idx] <= '9') {
1294        idx++;
1295        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1296    }
1297    /* if it starts with 0 we only expect one integer digit */
1298    else if (str[idx] == '0') {
1299        idx++;
1300    }
1301    /* no integer digits, error */
1302    else {
1303        PyErr_SetNone(PyExc_StopIteration);
1304        return NULL;
1305    }
1306
1307    /* if the next char is '.' followed by a digit then read all float digits */
1308    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1309        is_float = 1;
1310        idx += 2;
1311        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1312    }
1313
1314    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1315    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1316
1317        /* save the index of the 'e' or 'E' just in case we need to backtrack */
1318        Py_ssize_t e_start = idx;
1319        idx++;
1320
1321        /* read an exponent sign if present */
1322        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1323
1324        /* read all digits */
1325        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1326
1327        /* if we got a digit, then parse as float. if not, backtrack */
1328        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1329            is_float = 1;
1330        }
1331        else {
1332            idx = e_start;
1333        }
1334    }
1335
1336    /* copy the section we determined to be a number */
1337    numstr = PyString_FromStringAndSize(&str[start], idx - start);
1338    if (numstr == NULL)
1339        return NULL;
1340    if (is_float) {
1341        /* parse as a float using a fast path if available, otherwise call user defined method */
1342        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1343            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1344        }
1345        else {
1346            double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1347                                             NULL, NULL);
1348            if (d == -1.0 && PyErr_Occurred())
1349                return NULL;
1350            rval = PyFloat_FromDouble(d);
1351        }
1352    }
1353    else {
1354        /* parse as an int using a fast path if available, otherwise call user defined method */
1355        if (s->parse_int != (PyObject *)&PyInt_Type) {
1356            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1357        }
1358        else {
1359            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1360        }
1361    }
1362    Py_DECREF(numstr);
1363    *next_idx_ptr = idx;
1364    return rval;
1365}
1366
1367static PyObject *
1368_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1369    /* Read a JSON number from PyUnicode pystr.
1370    idx is the index of the first character of the number
1371    *next_idx_ptr is a return-by-reference index to the first character after
1372        the number.
1373
1374    Returns a new PyObject representation of that number:
1375        PyInt, PyLong, or PyFloat.
1376        May return other types if parse_int or parse_float are set
1377    */
1378    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1379    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1380    Py_ssize_t idx = start;
1381    int is_float = 0;
1382    PyObject *rval;
1383    PyObject *numstr;
1384
1385    /* read a sign if it's there, make sure it's not the end of the string */
1386    if (str[idx] == '-') {
1387        idx++;
1388        if (idx > end_idx) {
1389            PyErr_SetNone(PyExc_StopIteration);
1390            return NULL;
1391        }
1392    }
1393
1394    /* read as many integer digits as we find as long as it doesn't start with 0 */
1395    if (str[idx] >= '1' && str[idx] <= '9') {
1396        idx++;
1397        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1398    }
1399    /* if it starts with 0 we only expect one integer digit */
1400    else if (str[idx] == '0') {
1401        idx++;
1402    }
1403    /* no integer digits, error */
1404    else {
1405        PyErr_SetNone(PyExc_StopIteration);
1406        return NULL;
1407    }
1408
1409    /* if the next char is '.' followed by a digit then read all float digits */
1410    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1411        is_float = 1;
1412        idx += 2;
1413        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1414    }
1415
1416    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1417    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1418        Py_ssize_t e_start = idx;
1419        idx++;
1420
1421        /* read an exponent sign if present */
1422        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1423
1424        /* read all digits */
1425        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1426
1427        /* if we got a digit, then parse as float. if not, backtrack */
1428        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1429            is_float = 1;
1430        }
1431        else {
1432            idx = e_start;
1433        }
1434    }
1435
1436    /* copy the section we determined to be a number */
1437    numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1438    if (numstr == NULL)
1439        return NULL;
1440    if (is_float) {
1441        /* parse as a float using a fast path if available, otherwise call user defined method */
1442        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1443            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1444        }
1445        else {
1446            rval = PyFloat_FromString(numstr, NULL);
1447        }
1448    }
1449    else {
1450        /* no fast path for unicode -> int, just call */
1451        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1452    }
1453    Py_DECREF(numstr);
1454    *next_idx_ptr = idx;
1455    return rval;
1456}
1457
1458static PyObject *
1459scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1460{
1461    /* Read one JSON term (of any kind) from PyString pystr.
1462    idx is the index of the first character of the term
1463    *next_idx_ptr is a return-by-reference index to the first character after
1464        the number.
1465
1466    Returns a new PyObject representation of the term.
1467    */
1468    PyObject *res;
1469    char *str = PyString_AS_STRING(pystr);
1470    Py_ssize_t length = PyString_GET_SIZE(pystr);
1471    if (idx < 0) {
1472        PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1473        return NULL;
1474    }
1475    if (idx >= length) {
1476        PyErr_SetNone(PyExc_StopIteration);
1477        return NULL;
1478    }
1479    switch (str[idx]) {
1480        case '"':
1481            /* string */
1482            return scanstring_str(pystr, idx + 1,
1483                PyString_AS_STRING(s->encoding),
1484                PyObject_IsTrue(s->strict),
1485                next_idx_ptr);
1486        case '{':
1487            /* object */
1488            if (Py_EnterRecursiveCall(" while decoding a JSON object "
1489                                      "from a byte string"))
1490                return NULL;
1491            res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1492            Py_LeaveRecursiveCall();
1493            return res;
1494        case '[':
1495            /* array */
1496            if (Py_EnterRecursiveCall(" while decoding a JSON array "
1497                                      "from a byte string"))
1498                return NULL;
1499            res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1500            Py_LeaveRecursiveCall();
1501            return res;
1502        case 'n':
1503            /* null */
1504            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1505                Py_INCREF(Py_None);
1506                *next_idx_ptr = idx + 4;
1507                return Py_None;
1508            }
1509            break;
1510        case 't':
1511            /* true */
1512            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1513                Py_INCREF(Py_True);
1514                *next_idx_ptr = idx + 4;
1515                return Py_True;
1516            }
1517            break;
1518        case 'f':
1519            /* false */
1520            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1521                Py_INCREF(Py_False);
1522                *next_idx_ptr = idx + 5;
1523                return Py_False;
1524            }
1525            break;
1526        case 'N':
1527            /* NaN */
1528            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1529                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1530            }
1531            break;
1532        case 'I':
1533            /* Infinity */
1534            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1535                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1536            }
1537            break;
1538        case '-':
1539            /* -Infinity */
1540            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1541                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1542            }
1543            break;
1544    }
1545    /* Didn't find a string, object, array, or named constant. Look for a number. */
1546    return _match_number_str(s, pystr, idx, next_idx_ptr);
1547}
1548
1549static PyObject *
1550scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1551{
1552    /* Read one JSON term (of any kind) from PyUnicode pystr.
1553    idx is the index of the first character of the term
1554    *next_idx_ptr is a return-by-reference index to the first character after
1555        the number.
1556
1557    Returns a new PyObject representation of the term.
1558    */
1559    PyObject *res;
1560    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1561    Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1562    if (idx < 0) {
1563        PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1564        return NULL;
1565    }
1566    if (idx >= length) {
1567        PyErr_SetNone(PyExc_StopIteration);
1568        return NULL;
1569    }
1570    switch (str[idx]) {
1571        case '"':
1572            /* string */
1573            return scanstring_unicode(pystr, idx + 1,
1574                PyObject_IsTrue(s->strict),
1575                next_idx_ptr);
1576        case '{':
1577            /* object */
1578            if (Py_EnterRecursiveCall(" while decoding a JSON object "
1579                                      "from a unicode string"))
1580                return NULL;
1581            res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1582            Py_LeaveRecursiveCall();
1583            return res;
1584        case '[':
1585            /* array */
1586            if (Py_EnterRecursiveCall(" while decoding a JSON array "
1587                                      "from a unicode string"))
1588                return NULL;
1589            res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1590            Py_LeaveRecursiveCall();
1591            return res;
1592        case 'n':
1593            /* null */
1594            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1595                Py_INCREF(Py_None);
1596                *next_idx_ptr = idx + 4;
1597                return Py_None;
1598            }
1599            break;
1600        case 't':
1601            /* true */
1602            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1603                Py_INCREF(Py_True);
1604                *next_idx_ptr = idx + 4;
1605                return Py_True;
1606            }
1607            break;
1608        case 'f':
1609            /* false */
1610            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1611                Py_INCREF(Py_False);
1612                *next_idx_ptr = idx + 5;
1613                return Py_False;
1614            }
1615            break;
1616        case 'N':
1617            /* NaN */
1618            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1619                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1620            }
1621            break;
1622        case 'I':
1623            /* Infinity */
1624            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1625                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1626            }
1627            break;
1628        case '-':
1629            /* -Infinity */
1630            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1631                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1632            }
1633            break;
1634    }
1635    /* Didn't find a string, object, array, or named constant. Look for a number. */
1636    return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1637}
1638
1639static PyObject *
1640scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1641{
1642    /* Python callable interface to scan_once_{str,unicode} */
1643    PyObject *pystr;
1644    PyObject *rval;
1645    Py_ssize_t idx;
1646    Py_ssize_t next_idx = -1;
1647    static char *kwlist[] = {"string", "idx", NULL};
1648    PyScannerObject *s;
1649    assert(PyScanner_Check(self));
1650    s = (PyScannerObject *)self;
1651    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1652        return NULL;
1653
1654    if (PyString_Check(pystr)) {
1655        rval = scan_once_str(s, pystr, idx, &next_idx);
1656    }
1657    else if (PyUnicode_Check(pystr)) {
1658        rval = scan_once_unicode(s, pystr, idx, &next_idx);
1659    }
1660    else {
1661        PyErr_Format(PyExc_TypeError,
1662                 "first argument must be a string, not %.80s",
1663                 Py_TYPE(pystr)->tp_name);
1664        return NULL;
1665    }
1666    return _build_rval_index_tuple(rval, next_idx);
1667}
1668
1669static PyObject *
1670scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1671{
1672    PyScannerObject *s;
1673    s = (PyScannerObject *)type->tp_alloc(type, 0);
1674    if (s != NULL) {
1675        s->encoding = NULL;
1676        s->strict = NULL;
1677        s->object_hook = NULL;
1678        s->pairs_hook = NULL;
1679        s->parse_float = NULL;
1680        s->parse_int = NULL;
1681        s->parse_constant = NULL;
1682    }
1683    return (PyObject *)s;
1684}
1685
1686static int
1687scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1688{
1689    /* Initialize Scanner object */
1690    PyObject *ctx;
1691    static char *kwlist[] = {"context", NULL};
1692    PyScannerObject *s;
1693
1694    assert(PyScanner_Check(self));
1695    s = (PyScannerObject *)self;
1696
1697    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1698        return -1;
1699
1700    /* PyString_AS_STRING is used on encoding */
1701    s->encoding = PyObject_GetAttrString(ctx, "encoding");
1702    if (s->encoding == NULL)
1703        goto bail;
1704    if (s->encoding == Py_None) {
1705        Py_DECREF(Py_None);
1706        s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1707    }
1708    else if (PyUnicode_Check(s->encoding)) {
1709        PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1710        Py_DECREF(s->encoding);
1711        s->encoding = tmp;
1712    }
1713    if (s->encoding == NULL)
1714        goto bail;
1715    if (!PyString_Check(s->encoding)) {
1716	PyErr_Format(PyExc_TypeError,
1717		     "encoding must be a string, not %.80s",
1718		     Py_TYPE(s->encoding)->tp_name);
1719	goto bail;
1720    }
1721
1722
1723    /* All of these will fail "gracefully" so we don't need to verify them */
1724    s->strict = PyObject_GetAttrString(ctx, "strict");
1725    if (s->strict == NULL)
1726        goto bail;
1727    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1728    if (s->object_hook == NULL)
1729        goto bail;
1730    s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1731    if (s->pairs_hook == NULL)
1732        goto bail;
1733    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1734    if (s->parse_float == NULL)
1735        goto bail;
1736    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1737    if (s->parse_int == NULL)
1738        goto bail;
1739    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1740    if (s->parse_constant == NULL)
1741        goto bail;
1742
1743    return 0;
1744
1745bail:
1746    Py_CLEAR(s->encoding);
1747    Py_CLEAR(s->strict);
1748    Py_CLEAR(s->object_hook);
1749    Py_CLEAR(s->pairs_hook);
1750    Py_CLEAR(s->parse_float);
1751    Py_CLEAR(s->parse_int);
1752    Py_CLEAR(s->parse_constant);
1753    return -1;
1754}
1755
1756PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1757
1758static
1759PyTypeObject PyScannerType = {
1760    PyObject_HEAD_INIT(NULL)
1761    0,                    /* tp_internal */
1762    "_json.Scanner",       /* tp_name */
1763    sizeof(PyScannerObject), /* tp_basicsize */
1764    0,                    /* tp_itemsize */
1765    scanner_dealloc, /* tp_dealloc */
1766    0,                    /* tp_print */
1767    0,                    /* tp_getattr */
1768    0,                    /* tp_setattr */
1769    0,                    /* tp_compare */
1770    0,                    /* tp_repr */
1771    0,                    /* tp_as_number */
1772    0,                    /* tp_as_sequence */
1773    0,                    /* tp_as_mapping */
1774    0,                    /* tp_hash */
1775    scanner_call,         /* tp_call */
1776    0,                    /* tp_str */
1777    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1778    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1779    0,                    /* tp_as_buffer */
1780    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1781    scanner_doc,          /* tp_doc */
1782    scanner_traverse,                    /* tp_traverse */
1783    scanner_clear,                    /* tp_clear */
1784    0,                    /* tp_richcompare */
1785    0,                    /* tp_weaklistoffset */
1786    0,                    /* tp_iter */
1787    0,                    /* tp_iternext */
1788    0,                    /* tp_methods */
1789    scanner_members,                    /* tp_members */
1790    0,                    /* tp_getset */
1791    0,                    /* tp_base */
1792    0,                    /* tp_dict */
1793    0,                    /* tp_descr_get */
1794    0,                    /* tp_descr_set */
1795    0,                    /* tp_dictoffset */
1796    scanner_init,                    /* tp_init */
1797    0,/* PyType_GenericAlloc, */        /* tp_alloc */
1798    scanner_new,          /* tp_new */
1799    0,/* PyObject_GC_Del, */              /* tp_free */
1800};
1801
1802static PyObject *
1803encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1804{
1805    PyEncoderObject *s;
1806    s = (PyEncoderObject *)type->tp_alloc(type, 0);
1807    if (s != NULL) {
1808        s->markers = NULL;
1809        s->defaultfn = NULL;
1810        s->encoder = NULL;
1811        s->indent = NULL;
1812        s->key_separator = NULL;
1813        s->item_separator = NULL;
1814        s->sort_keys = NULL;
1815        s->skipkeys = NULL;
1816    }
1817    return (PyObject *)s;
1818}
1819
1820static int
1821encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1822{
1823    /* initialize Encoder object */
1824    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1825
1826    PyEncoderObject *s;
1827    PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1828    PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1829
1830    assert(PyEncoder_Check(self));
1831    s = (PyEncoderObject *)self;
1832
1833    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1834        &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1835        &sort_keys, &skipkeys, &allow_nan))
1836        return -1;
1837
1838    s->markers = markers;
1839    s->defaultfn = defaultfn;
1840    s->encoder = encoder;
1841    s->indent = indent;
1842    s->key_separator = key_separator;
1843    s->item_separator = item_separator;
1844    s->sort_keys = sort_keys;
1845    s->skipkeys = skipkeys;
1846    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1847    s->allow_nan = PyObject_IsTrue(allow_nan);
1848
1849    Py_INCREF(s->markers);
1850    Py_INCREF(s->defaultfn);
1851    Py_INCREF(s->encoder);
1852    Py_INCREF(s->indent);
1853    Py_INCREF(s->key_separator);
1854    Py_INCREF(s->item_separator);
1855    Py_INCREF(s->sort_keys);
1856    Py_INCREF(s->skipkeys);
1857    return 0;
1858}
1859
1860static PyObject *
1861encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1862{
1863    /* Python callable interface to encode_listencode_obj */
1864    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1865    PyObject *obj;
1866    PyObject *rval;
1867    Py_ssize_t indent_level;
1868    PyEncoderObject *s;
1869    assert(PyEncoder_Check(self));
1870    s = (PyEncoderObject *)self;
1871    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1872        &obj, _convertPyInt_AsSsize_t, &indent_level))
1873        return NULL;
1874    rval = PyList_New(0);
1875    if (rval == NULL)
1876        return NULL;
1877    if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1878        Py_DECREF(rval);
1879        return NULL;
1880    }
1881    return rval;
1882}
1883
1884static PyObject *
1885_encoded_const(PyObject *obj)
1886{
1887    /* Return the JSON string representation of None, True, False */
1888    if (obj == Py_None) {
1889        static PyObject *s_null = NULL;
1890        if (s_null == NULL) {
1891            s_null = PyString_InternFromString("null");
1892        }
1893        Py_INCREF(s_null);
1894        return s_null;
1895    }
1896    else if (obj == Py_True) {
1897        static PyObject *s_true = NULL;
1898        if (s_true == NULL) {
1899            s_true = PyString_InternFromString("true");
1900        }
1901        Py_INCREF(s_true);
1902        return s_true;
1903    }
1904    else if (obj == Py_False) {
1905        static PyObject *s_false = NULL;
1906        if (s_false == NULL) {
1907            s_false = PyString_InternFromString("false");
1908        }
1909        Py_INCREF(s_false);
1910        return s_false;
1911    }
1912    else {
1913        PyErr_SetString(PyExc_ValueError, "not a const");
1914        return NULL;
1915    }
1916}
1917
1918static PyObject *
1919encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1920{
1921    /* Return the JSON representation of a PyFloat */
1922    double i = PyFloat_AS_DOUBLE(obj);
1923    if (!Py_IS_FINITE(i)) {
1924        if (!s->allow_nan) {
1925            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1926            return NULL;
1927        }
1928        if (i > 0) {
1929            return PyString_FromString("Infinity");
1930        }
1931        else if (i < 0) {
1932            return PyString_FromString("-Infinity");
1933        }
1934        else {
1935            return PyString_FromString("NaN");
1936        }
1937    }
1938    /* Use a better float format here? */
1939    return PyObject_Repr(obj);
1940}
1941
1942static PyObject *
1943encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1944{
1945    /* Return the JSON representation of a string */
1946    if (s->fast_encode)
1947        return py_encode_basestring_ascii(NULL, obj);
1948    else
1949        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1950}
1951
1952static int
1953_steal_list_append(PyObject *lst, PyObject *stolen)
1954{
1955    /* Append stolen and then decrement its reference count */
1956    int rval = PyList_Append(lst, stolen);
1957    Py_DECREF(stolen);
1958    return rval;
1959}
1960
1961static int
1962encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1963{
1964    /* Encode Python object obj to a JSON term, rval is a PyList */
1965    PyObject *newobj;
1966    int rv;
1967
1968    if (obj == Py_None || obj == Py_True || obj == Py_False) {
1969        PyObject *cstr = _encoded_const(obj);
1970        if (cstr == NULL)
1971            return -1;
1972        return _steal_list_append(rval, cstr);
1973    }
1974    else if (PyString_Check(obj) || PyUnicode_Check(obj))
1975    {
1976        PyObject *encoded = encoder_encode_string(s, obj);
1977        if (encoded == NULL)
1978            return -1;
1979        return _steal_list_append(rval, encoded);
1980    }
1981    else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1982        PyObject *encoded = PyObject_Str(obj);
1983        if (encoded == NULL)
1984            return -1;
1985        return _steal_list_append(rval, encoded);
1986    }
1987    else if (PyFloat_Check(obj)) {
1988        PyObject *encoded = encoder_encode_float(s, obj);
1989        if (encoded == NULL)
1990            return -1;
1991        return _steal_list_append(rval, encoded);
1992    }
1993    else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1994        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1995            return -1;
1996        rv = encoder_listencode_list(s, rval, obj, indent_level);
1997        Py_LeaveRecursiveCall();
1998        return rv;
1999    }
2000    else if (PyDict_Check(obj)) {
2001        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2002            return -1;
2003        rv = encoder_listencode_dict(s, rval, obj, indent_level);
2004        Py_LeaveRecursiveCall();
2005        return rv;
2006    }
2007    else {
2008        PyObject *ident = NULL;
2009        if (s->markers != Py_None) {
2010            int has_key;
2011            ident = PyLong_FromVoidPtr(obj);
2012            if (ident == NULL)
2013                return -1;
2014            has_key = PyDict_Contains(s->markers, ident);
2015            if (has_key) {
2016                if (has_key != -1)
2017                    PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2018                Py_DECREF(ident);
2019                return -1;
2020            }
2021            if (PyDict_SetItem(s->markers, ident, obj)) {
2022                Py_DECREF(ident);
2023                return -1;
2024            }
2025        }
2026        newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2027        if (newobj == NULL) {
2028            Py_XDECREF(ident);
2029            return -1;
2030        }
2031
2032        if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2033            return -1;
2034        rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2035        Py_LeaveRecursiveCall();
2036
2037        Py_DECREF(newobj);
2038        if (rv) {
2039            Py_XDECREF(ident);
2040            return -1;
2041        }
2042        if (ident != NULL) {
2043            if (PyDict_DelItem(s->markers, ident)) {
2044                Py_XDECREF(ident);
2045                return -1;
2046            }
2047            Py_XDECREF(ident);
2048        }
2049        return rv;
2050    }
2051}
2052
2053static int
2054encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2055{
2056    /* Encode Python dict dct a JSON term, rval is a PyList */
2057    static PyObject *open_dict = NULL;
2058    static PyObject *close_dict = NULL;
2059    static PyObject *empty_dict = NULL;
2060    PyObject *kstr = NULL;
2061    PyObject *ident = NULL;
2062    PyObject *key = NULL;
2063    PyObject *value = NULL;
2064    PyObject *it = NULL;
2065    int skipkeys;
2066    Py_ssize_t idx;
2067
2068    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2069        open_dict = PyString_InternFromString("{");
2070        close_dict = PyString_InternFromString("}");
2071        empty_dict = PyString_InternFromString("{}");
2072        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2073            return -1;
2074    }
2075    if (Py_SIZE(dct) == 0)
2076        return PyList_Append(rval, empty_dict);
2077
2078    if (s->markers != Py_None) {
2079        int has_key;
2080        ident = PyLong_FromVoidPtr(dct);
2081        if (ident == NULL)
2082            goto bail;
2083        has_key = PyDict_Contains(s->markers, ident);
2084        if (has_key) {
2085            if (has_key != -1)
2086                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2087            goto bail;
2088        }
2089        if (PyDict_SetItem(s->markers, ident, dct)) {
2090            goto bail;
2091        }
2092    }
2093
2094    if (PyList_Append(rval, open_dict))
2095        goto bail;
2096
2097    if (s->indent != Py_None) {
2098        /* TODO: DOES NOT RUN */
2099        indent_level += 1;
2100        /*
2101            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2102            separator = _item_separator + newline_indent
2103            buf += newline_indent
2104        */
2105    }
2106
2107    /* TODO: C speedup not implemented for sort_keys */
2108
2109    it = PyObject_GetIter(dct);
2110    if (it == NULL)
2111        goto bail;
2112    skipkeys = PyObject_IsTrue(s->skipkeys);
2113    idx = 0;
2114    while ((key = PyIter_Next(it)) != NULL) {
2115        PyObject *encoded;
2116
2117        if (PyString_Check(key) || PyUnicode_Check(key)) {
2118            Py_INCREF(key);
2119            kstr = key;
2120        }
2121        else if (PyFloat_Check(key)) {
2122            kstr = encoder_encode_float(s, key);
2123            if (kstr == NULL)
2124                goto bail;
2125        }
2126        else if (PyInt_Check(key) || PyLong_Check(key)) {
2127            kstr = PyObject_Str(key);
2128            if (kstr == NULL)
2129                goto bail;
2130        }
2131        else if (key == Py_True || key == Py_False || key == Py_None) {
2132            kstr = _encoded_const(key);
2133            if (kstr == NULL)
2134                goto bail;
2135        }
2136        else if (skipkeys) {
2137            Py_DECREF(key);
2138            continue;
2139        }
2140        else {
2141            /* TODO: include repr of key */
2142            PyErr_SetString(PyExc_TypeError, "keys must be a string");
2143            goto bail;
2144        }
2145
2146        if (idx) {
2147            if (PyList_Append(rval, s->item_separator))
2148                goto bail;
2149        }
2150
2151        value = PyObject_GetItem(dct, key);
2152        if (value == NULL)
2153            goto bail;
2154
2155        encoded = encoder_encode_string(s, kstr);
2156        Py_CLEAR(kstr);
2157        if (encoded == NULL)
2158            goto bail;
2159        if (PyList_Append(rval, encoded)) {
2160            Py_DECREF(encoded);
2161            goto bail;
2162        }
2163        Py_DECREF(encoded);
2164        if (PyList_Append(rval, s->key_separator))
2165            goto bail;
2166        if (encoder_listencode_obj(s, rval, value, indent_level))
2167            goto bail;
2168        idx += 1;
2169        Py_CLEAR(value);
2170        Py_DECREF(key);
2171    }
2172    if (PyErr_Occurred())
2173        goto bail;
2174    Py_CLEAR(it);
2175
2176    if (ident != NULL) {
2177        if (PyDict_DelItem(s->markers, ident))
2178            goto bail;
2179        Py_CLEAR(ident);
2180    }
2181    if (s->indent != Py_None) {
2182        /* TODO: DOES NOT RUN */
2183        /*
2184            indent_level -= 1;
2185
2186            yield '\n' + (' ' * (_indent * _current_indent_level))
2187        */
2188    }
2189    if (PyList_Append(rval, close_dict))
2190        goto bail;
2191    return 0;
2192
2193bail:
2194    Py_XDECREF(it);
2195    Py_XDECREF(key);
2196    Py_XDECREF(value);
2197    Py_XDECREF(kstr);
2198    Py_XDECREF(ident);
2199    return -1;
2200}
2201
2202
2203static int
2204encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2205{
2206    /* Encode Python list seq to a JSON term, rval is a PyList */
2207    static PyObject *open_array = NULL;
2208    static PyObject *close_array = NULL;
2209    static PyObject *empty_array = NULL;
2210    PyObject *ident = NULL;
2211    PyObject *s_fast = NULL;
2212    Py_ssize_t i;
2213
2214    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2215        open_array = PyString_InternFromString("[");
2216        close_array = PyString_InternFromString("]");
2217        empty_array = PyString_InternFromString("[]");
2218        if (open_array == NULL || close_array == NULL || empty_array == NULL)
2219            return -1;
2220    }
2221    ident = NULL;
2222    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2223    if (s_fast == NULL)
2224        return -1;
2225    if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
2226        Py_DECREF(s_fast);
2227        return PyList_Append(rval, empty_array);
2228    }
2229
2230    if (s->markers != Py_None) {
2231        int has_key;
2232        ident = PyLong_FromVoidPtr(seq);
2233        if (ident == NULL)
2234            goto bail;
2235        has_key = PyDict_Contains(s->markers, ident);
2236        if (has_key) {
2237            if (has_key != -1)
2238                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2239            goto bail;
2240        }
2241        if (PyDict_SetItem(s->markers, ident, seq)) {
2242            goto bail;
2243        }
2244    }
2245
2246    if (PyList_Append(rval, open_array))
2247        goto bail;
2248    if (s->indent != Py_None) {
2249        /* TODO: DOES NOT RUN */
2250        indent_level += 1;
2251        /*
2252            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2253            separator = _item_separator + newline_indent
2254            buf += newline_indent
2255        */
2256    }
2257    for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2258        PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
2259        if (i) {
2260            if (PyList_Append(rval, s->item_separator))
2261                goto bail;
2262        }
2263        if (encoder_listencode_obj(s, rval, obj, indent_level))
2264            goto bail;
2265    }
2266    if (ident != NULL) {
2267        if (PyDict_DelItem(s->markers, ident))
2268            goto bail;
2269        Py_CLEAR(ident);
2270    }
2271    if (s->indent != Py_None) {
2272        /* TODO: DOES NOT RUN */
2273        /*
2274            indent_level -= 1;
2275
2276            yield '\n' + (' ' * (_indent * _current_indent_level))
2277        */
2278    }
2279    if (PyList_Append(rval, close_array))
2280        goto bail;
2281    Py_DECREF(s_fast);
2282    return 0;
2283
2284bail:
2285    Py_XDECREF(ident);
2286    Py_DECREF(s_fast);
2287    return -1;
2288}
2289
2290static void
2291encoder_dealloc(PyObject *self)
2292{
2293    /* Deallocate Encoder */
2294    encoder_clear(self);
2295    Py_TYPE(self)->tp_free(self);
2296}
2297
2298static int
2299encoder_traverse(PyObject *self, visitproc visit, void *arg)
2300{
2301    PyEncoderObject *s;
2302    assert(PyEncoder_Check(self));
2303    s = (PyEncoderObject *)self;
2304    Py_VISIT(s->markers);
2305    Py_VISIT(s->defaultfn);
2306    Py_VISIT(s->encoder);
2307    Py_VISIT(s->indent);
2308    Py_VISIT(s->key_separator);
2309    Py_VISIT(s->item_separator);
2310    Py_VISIT(s->sort_keys);
2311    Py_VISIT(s->skipkeys);
2312    return 0;
2313}
2314
2315static int
2316encoder_clear(PyObject *self)
2317{
2318    /* Deallocate Encoder */
2319    PyEncoderObject *s;
2320    assert(PyEncoder_Check(self));
2321    s = (PyEncoderObject *)self;
2322    Py_CLEAR(s->markers);
2323    Py_CLEAR(s->defaultfn);
2324    Py_CLEAR(s->encoder);
2325    Py_CLEAR(s->indent);
2326    Py_CLEAR(s->key_separator);
2327    Py_CLEAR(s->item_separator);
2328    Py_CLEAR(s->sort_keys);
2329    Py_CLEAR(s->skipkeys);
2330    return 0;
2331}
2332
2333PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2334
2335static
2336PyTypeObject PyEncoderType = {
2337    PyObject_HEAD_INIT(NULL)
2338    0,                    /* tp_internal */
2339    "_json.Encoder",       /* tp_name */
2340    sizeof(PyEncoderObject), /* tp_basicsize */
2341    0,                    /* tp_itemsize */
2342    encoder_dealloc, /* tp_dealloc */
2343    0,                    /* tp_print */
2344    0,                    /* tp_getattr */
2345    0,                    /* tp_setattr */
2346    0,                    /* tp_compare */
2347    0,                    /* tp_repr */
2348    0,                    /* tp_as_number */
2349    0,                    /* tp_as_sequence */
2350    0,                    /* tp_as_mapping */
2351    0,                    /* tp_hash */
2352    encoder_call,         /* tp_call */
2353    0,                    /* tp_str */
2354    0,                    /* tp_getattro */
2355    0,                    /* tp_setattro */
2356    0,                    /* tp_as_buffer */
2357    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2358    encoder_doc,          /* tp_doc */
2359    encoder_traverse,     /* tp_traverse */
2360    encoder_clear,        /* tp_clear */
2361    0,                    /* tp_richcompare */
2362    0,                    /* tp_weaklistoffset */
2363    0,                    /* tp_iter */
2364    0,                    /* tp_iternext */
2365    0,                    /* tp_methods */
2366    encoder_members,      /* tp_members */
2367    0,                    /* tp_getset */
2368    0,                    /* tp_base */
2369    0,                    /* tp_dict */
2370    0,                    /* tp_descr_get */
2371    0,                    /* tp_descr_set */
2372    0,                    /* tp_dictoffset */
2373    encoder_init,         /* tp_init */
2374    0,                    /* tp_alloc */
2375    encoder_new,          /* tp_new */
2376    0,                    /* tp_free */
2377};
2378
2379static PyMethodDef speedups_methods[] = {
2380    {"encode_basestring_ascii",
2381        (PyCFunction)py_encode_basestring_ascii,
2382        METH_O,
2383        pydoc_encode_basestring_ascii},
2384    {"scanstring",
2385        (PyCFunction)py_scanstring,
2386        METH_VARARGS,
2387        pydoc_scanstring},
2388    {NULL, NULL, 0, NULL}
2389};
2390
2391PyDoc_STRVAR(module_doc,
2392"json speedups\n");
2393
2394void
2395init_json(void)
2396{
2397    PyObject *m;
2398    PyScannerType.tp_new = PyType_GenericNew;
2399    if (PyType_Ready(&PyScannerType) < 0)
2400        return;
2401    PyEncoderType.tp_new = PyType_GenericNew;
2402    if (PyType_Ready(&PyEncoderType) < 0)
2403        return;
2404    m = Py_InitModule3("_json", speedups_methods, module_doc);
2405    Py_INCREF((PyObject*)&PyScannerType);
2406    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2407    Py_INCREF((PyObject*)&PyEncoderType);
2408    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2409}
2410