_speedups.c revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1#include "Python.h"
2#include "structmember.h"
3#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
4#define PyOS_string_to_double json_PyOS_string_to_double
5static double
6json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
7static double
8json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
9    double x;
10    assert(endptr == NULL);
11    assert(overflow_exception == NULL);
12    PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
13    x = PyOS_ascii_atof(s);
14    PyFPE_END_PROTECT(x)
15    return x;
16}
17#endif
18#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
19#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
20#endif
21#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE)
22#define Py_SIZE(ob)     (((PyVarObject*)(ob))->ob_size)
23#endif
24#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
25typedef int Py_ssize_t;
26#define PY_SSIZE_T_MAX INT_MAX
27#define PY_SSIZE_T_MIN INT_MIN
28#define PyInt_FromSsize_t PyInt_FromLong
29#define PyInt_AsSsize_t PyInt_AsLong
30#endif
31#ifndef Py_IS_FINITE
32#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
33#endif
34
35#ifdef __GNUC__
36#define UNUSED __attribute__((__unused__))
37#else
38#define UNUSED
39#endif
40
41#define DEFAULT_ENCODING "utf-8"
42
43#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
44#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
45#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
46#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
47
48static PyTypeObject PyScannerType;
49static PyTypeObject PyEncoderType;
50
51typedef struct _PyScannerObject {
52    PyObject_HEAD
53    PyObject *encoding;
54    PyObject *strict;
55    PyObject *object_hook;
56    PyObject *pairs_hook;
57    PyObject *parse_float;
58    PyObject *parse_int;
59    PyObject *parse_constant;
60    PyObject *memo;
61} PyScannerObject;
62
63static PyMemberDef scanner_members[] = {
64    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
65    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
66    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
67    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
68    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
69    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
70    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
71    {NULL}
72};
73
74typedef struct _PyEncoderObject {
75    PyObject_HEAD
76    PyObject *markers;
77    PyObject *defaultfn;
78    PyObject *encoder;
79    PyObject *indent;
80    PyObject *key_separator;
81    PyObject *item_separator;
82    PyObject *sort_keys;
83    PyObject *skipkeys;
84    PyObject *key_memo;
85    PyObject *Decimal;
86    int fast_encode;
87    int allow_nan;
88    int use_decimal;
89    int namedtuple_as_object;
90    int tuple_as_array;
91    int bigint_as_string;
92    PyObject *item_sort_key;
93} PyEncoderObject;
94
95static PyMemberDef encoder_members[] = {
96    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
97    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
98    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
99    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
100    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
101    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
102    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
103    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
104    {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
105    {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
106    {NULL}
107};
108
109static PyObject *
110maybe_quote_bigint(PyObject *encoded, PyObject *obj);
111
112static Py_ssize_t
113ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
114static PyObject *
115ascii_escape_unicode(PyObject *pystr);
116static PyObject *
117ascii_escape_str(PyObject *pystr);
118static PyObject *
119py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
120void init_speedups(void);
121static PyObject *
122scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
123static PyObject *
124scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
125static PyObject *
126_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
127static PyObject *
128scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
129static int
130scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
131static void
132scanner_dealloc(PyObject *self);
133static int
134scanner_clear(PyObject *self);
135static PyObject *
136encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
137static int
138encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
139static void
140encoder_dealloc(PyObject *self);
141static int
142encoder_clear(PyObject *self);
143static int
144encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
145static int
146encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
147static int
148encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
149static PyObject *
150_encoded_const(PyObject *obj);
151static void
152raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
153static PyObject *
154encoder_encode_string(PyEncoderObject *s, PyObject *obj);
155static int
156_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
157static PyObject *
158_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
159static PyObject *
160encoder_encode_float(PyEncoderObject *s, PyObject *obj);
161static int
162_is_namedtuple(PyObject *obj);
163
164#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
165#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
166
167#define MIN_EXPANSION 6
168#ifdef Py_UNICODE_WIDE
169#define MAX_EXPANSION (2 * MIN_EXPANSION)
170#else
171#define MAX_EXPANSION MIN_EXPANSION
172#endif
173
174static PyObject *
175maybe_quote_bigint(PyObject *encoded, PyObject *obj)
176{
177    static PyObject *big_long = NULL;
178    static PyObject *small_long = NULL;
179    if (big_long == NULL) {
180        big_long = PyLong_FromLongLong(1LL << 53);
181        if (big_long == NULL) {
182            Py_DECREF(encoded);
183            return NULL;
184        }
185    }
186    if (small_long == NULL) {
187        small_long = PyLong_FromLongLong(-1LL << 53);
188        if (small_long == NULL) {
189            Py_DECREF(encoded);
190            return NULL;
191        }
192    }
193    if (PyObject_RichCompareBool(obj, big_long, Py_GE) ||
194        PyObject_RichCompareBool(obj, small_long, Py_LE)) {
195        PyObject* quoted = PyString_FromFormat("\"%s\"",
196                                               PyString_AsString(encoded));
197        Py_DECREF(encoded);
198        encoded = quoted;
199    }
200    return encoded;
201}
202
203static int
204_is_namedtuple(PyObject *obj)
205{
206    int rval = 0;
207    PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
208    if (_asdict == NULL) {
209        PyErr_Clear();
210        return 0;
211    }
212    rval = PyCallable_Check(_asdict);
213    Py_DECREF(_asdict);
214    return rval;
215}
216
217static int
218_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
219{
220    /* PyObject to Py_ssize_t converter */
221    *size_ptr = PyInt_AsSsize_t(o);
222    if (*size_ptr == -1 && PyErr_Occurred())
223        return 0;
224    return 1;
225}
226
227static PyObject *
228_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
229{
230    /* Py_ssize_t to PyObject converter */
231    return PyInt_FromSsize_t(*size_ptr);
232}
233
234static Py_ssize_t
235ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
236{
237    /* Escape unicode code point c to ASCII escape sequences
238    in char *output. output must have at least 12 bytes unused to
239    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
240    output[chars++] = '\\';
241    switch (c) {
242        case '\\': output[chars++] = (char)c; break;
243        case '"': output[chars++] = (char)c; break;
244        case '\b': output[chars++] = 'b'; break;
245        case '\f': output[chars++] = 'f'; break;
246        case '\n': output[chars++] = 'n'; break;
247        case '\r': output[chars++] = 'r'; break;
248        case '\t': output[chars++] = 't'; break;
249        default:
250#ifdef Py_UNICODE_WIDE
251            if (c >= 0x10000) {
252                /* UTF-16 surrogate pair */
253                Py_UNICODE v = c - 0x10000;
254                c = 0xd800 | ((v >> 10) & 0x3ff);
255                output[chars++] = 'u';
256                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
257                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
258                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
259                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
260                c = 0xdc00 | (v & 0x3ff);
261                output[chars++] = '\\';
262            }
263#endif
264            output[chars++] = 'u';
265            output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
266            output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
267            output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
268            output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
269    }
270    return chars;
271}
272
273static PyObject *
274ascii_escape_unicode(PyObject *pystr)
275{
276    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
277    Py_ssize_t i;
278    Py_ssize_t input_chars;
279    Py_ssize_t output_size;
280    Py_ssize_t max_output_size;
281    Py_ssize_t chars;
282    PyObject *rval;
283    char *output;
284    Py_UNICODE *input_unicode;
285
286    input_chars = PyUnicode_GET_SIZE(pystr);
287    input_unicode = PyUnicode_AS_UNICODE(pystr);
288
289    /* One char input can be up to 6 chars output, estimate 4 of these */
290    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
291    max_output_size = 2 + (input_chars * MAX_EXPANSION);
292    rval = PyString_FromStringAndSize(NULL, output_size);
293    if (rval == NULL) {
294        return NULL;
295    }
296    output = PyString_AS_STRING(rval);
297    chars = 0;
298    output[chars++] = '"';
299    for (i = 0; i < input_chars; i++) {
300        Py_UNICODE c = input_unicode[i];
301        if (S_CHAR(c)) {
302            output[chars++] = (char)c;
303        }
304        else {
305            chars = ascii_escape_char(c, output, chars);
306        }
307        if (output_size - chars < (1 + MAX_EXPANSION)) {
308            /* There's more than four, so let's resize by a lot */
309            Py_ssize_t new_output_size = output_size * 2;
310            /* This is an upper bound */
311            if (new_output_size > max_output_size) {
312                new_output_size = max_output_size;
313            }
314            /* Make sure that the output size changed before resizing */
315            if (new_output_size != output_size) {
316                output_size = new_output_size;
317                if (_PyString_Resize(&rval, output_size) == -1) {
318                    return NULL;
319                }
320                output = PyString_AS_STRING(rval);
321            }
322        }
323    }
324    output[chars++] = '"';
325    if (_PyString_Resize(&rval, chars) == -1) {
326        return NULL;
327    }
328    return rval;
329}
330
331static PyObject *
332ascii_escape_str(PyObject *pystr)
333{
334    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
335    Py_ssize_t i;
336    Py_ssize_t input_chars;
337    Py_ssize_t output_size;
338    Py_ssize_t chars;
339    PyObject *rval;
340    char *output;
341    char *input_str;
342
343    input_chars = PyString_GET_SIZE(pystr);
344    input_str = PyString_AS_STRING(pystr);
345
346    /* Fast path for a string that's already ASCII */
347    for (i = 0; i < input_chars; i++) {
348        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
349        if (!S_CHAR(c)) {
350            /* If we have to escape something, scan the string for unicode */
351            Py_ssize_t j;
352            for (j = i; j < input_chars; j++) {
353                c = (Py_UNICODE)(unsigned char)input_str[j];
354                if (c > 0x7f) {
355                    /* We hit a non-ASCII character, bail to unicode mode */
356                    PyObject *uni;
357                    uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
358                    if (uni == NULL) {
359                        return NULL;
360                    }
361                    rval = ascii_escape_unicode(uni);
362                    Py_DECREF(uni);
363                    return rval;
364                }
365            }
366            break;
367        }
368    }
369
370    if (i == input_chars) {
371        /* Input is already ASCII */
372        output_size = 2 + input_chars;
373    }
374    else {
375        /* One char input can be up to 6 chars output, estimate 4 of these */
376        output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
377    }
378    rval = PyString_FromStringAndSize(NULL, output_size);
379    if (rval == NULL) {
380        return NULL;
381    }
382    output = PyString_AS_STRING(rval);
383    output[0] = '"';
384
385    /* We know that everything up to i is ASCII already */
386    chars = i + 1;
387    memcpy(&output[1], input_str, i);
388
389    for (; i < input_chars; i++) {
390        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
391        if (S_CHAR(c)) {
392            output[chars++] = (char)c;
393        }
394        else {
395            chars = ascii_escape_char(c, output, chars);
396        }
397        /* An ASCII char can't possibly expand to a surrogate! */
398        if (output_size - chars < (1 + MIN_EXPANSION)) {
399            /* There's more than four, so let's resize by a lot */
400            output_size *= 2;
401            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
402                output_size = 2 + (input_chars * MIN_EXPANSION);
403            }
404            if (_PyString_Resize(&rval, output_size) == -1) {
405                return NULL;
406            }
407            output = PyString_AS_STRING(rval);
408        }
409    }
410    output[chars++] = '"';
411    if (_PyString_Resize(&rval, chars) == -1) {
412        return NULL;
413    }
414    return rval;
415}
416
417static void
418raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
419{
420    /* Use the Python function simplejson.decoder.errmsg to raise a nice
421    looking ValueError exception */
422    static PyObject *JSONDecodeError = NULL;
423    PyObject *exc;
424    if (JSONDecodeError == NULL) {
425        PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
426        if (decoder == NULL)
427            return;
428        JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
429        Py_DECREF(decoder);
430        if (JSONDecodeError == NULL)
431            return;
432    }
433    exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
434    if (exc) {
435        PyErr_SetObject(JSONDecodeError, exc);
436        Py_DECREF(exc);
437    }
438}
439
440static PyObject *
441join_list_unicode(PyObject *lst)
442{
443    /* return u''.join(lst) */
444    static PyObject *joinfn = NULL;
445    if (joinfn == NULL) {
446        PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
447        if (ustr == NULL)
448            return NULL;
449
450        joinfn = PyObject_GetAttrString(ustr, "join");
451        Py_DECREF(ustr);
452        if (joinfn == NULL)
453            return NULL;
454    }
455    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
456}
457
458static PyObject *
459join_list_string(PyObject *lst)
460{
461    /* return ''.join(lst) */
462    static PyObject *joinfn = NULL;
463    if (joinfn == NULL) {
464        PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
465        if (ustr == NULL)
466            return NULL;
467
468        joinfn = PyObject_GetAttrString(ustr, "join");
469        Py_DECREF(ustr);
470        if (joinfn == NULL)
471            return NULL;
472    }
473    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
474}
475
476static PyObject *
477_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
478    /* return (rval, idx) tuple, stealing reference to rval */
479    PyObject *tpl;
480    PyObject *pyidx;
481    /*
482    steal a reference to rval, returns (rval, idx)
483    */
484    if (rval == NULL) {
485        return NULL;
486    }
487    pyidx = PyInt_FromSsize_t(idx);
488    if (pyidx == NULL) {
489        Py_DECREF(rval);
490        return NULL;
491    }
492    tpl = PyTuple_New(2);
493    if (tpl == NULL) {
494        Py_DECREF(pyidx);
495        Py_DECREF(rval);
496        return NULL;
497    }
498    PyTuple_SET_ITEM(tpl, 0, rval);
499    PyTuple_SET_ITEM(tpl, 1, pyidx);
500    return tpl;
501}
502
503#define APPEND_OLD_CHUNK \
504    if (chunk != NULL) { \
505        if (chunks == NULL) { \
506            chunks = PyList_New(0); \
507            if (chunks == NULL) { \
508                goto bail; \
509            } \
510        } \
511        if (PyList_Append(chunks, chunk)) { \
512            goto bail; \
513        } \
514        Py_CLEAR(chunk); \
515    }
516
517static PyObject *
518scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
519{
520    /* Read the JSON string from PyString pystr.
521    end is the index of the first character after the quote.
522    encoding is the encoding of pystr (must be an ASCII superset)
523    if strict is zero then literal control characters are allowed
524    *next_end_ptr is a return-by-reference index of the character
525        after the end quote
526
527    Return value is a new PyString (if ASCII-only) or PyUnicode
528    */
529    PyObject *rval;
530    Py_ssize_t len = PyString_GET_SIZE(pystr);
531    Py_ssize_t begin = end - 1;
532    Py_ssize_t next = begin;
533    int has_unicode = 0;
534    char *buf = PyString_AS_STRING(pystr);
535    PyObject *chunks = NULL;
536    PyObject *chunk = NULL;
537
538    if (len == end) {
539        raise_errmsg("Unterminated string starting at", pystr, begin);
540    }
541    else if (end < 0 || len < end) {
542        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
543        goto bail;
544    }
545    while (1) {
546        /* Find the end of the string or the next escape */
547        Py_UNICODE c = 0;
548        for (next = end; next < len; next++) {
549            c = (unsigned char)buf[next];
550            if (c == '"' || c == '\\') {
551                break;
552            }
553            else if (strict && c <= 0x1f) {
554                raise_errmsg("Invalid control character at", pystr, next);
555                goto bail;
556            }
557            else if (c > 0x7f) {
558                has_unicode = 1;
559            }
560        }
561        if (!(c == '"' || c == '\\')) {
562            raise_errmsg("Unterminated string starting at", pystr, begin);
563            goto bail;
564        }
565        /* Pick up this chunk if it's not zero length */
566        if (next != end) {
567            PyObject *strchunk;
568            APPEND_OLD_CHUNK
569            strchunk = PyString_FromStringAndSize(&buf[end], next - end);
570            if (strchunk == NULL) {
571                goto bail;
572            }
573            if (has_unicode) {
574                chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
575                Py_DECREF(strchunk);
576                if (chunk == NULL) {
577                    goto bail;
578                }
579            }
580            else {
581                chunk = strchunk;
582            }
583        }
584        next++;
585        if (c == '"') {
586            end = next;
587            break;
588        }
589        if (next == len) {
590            raise_errmsg("Unterminated string starting at", pystr, begin);
591            goto bail;
592        }
593        c = buf[next];
594        if (c != 'u') {
595            /* Non-unicode backslash escapes */
596            end = next + 1;
597            switch (c) {
598                case '"': break;
599                case '\\': break;
600                case '/': break;
601                case 'b': c = '\b'; break;
602                case 'f': c = '\f'; break;
603                case 'n': c = '\n'; break;
604                case 'r': c = '\r'; break;
605                case 't': c = '\t'; break;
606                default: c = 0;
607            }
608            if (c == 0) {
609                raise_errmsg("Invalid \\escape", pystr, end - 2);
610                goto bail;
611            }
612        }
613        else {
614            c = 0;
615            next++;
616            end = next + 4;
617            if (end >= len) {
618                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
619                goto bail;
620            }
621            /* Decode 4 hex digits */
622            for (; next < end; next++) {
623                Py_UNICODE digit = buf[next];
624                c <<= 4;
625                switch (digit) {
626                    case '0': case '1': case '2': case '3': case '4':
627                    case '5': case '6': case '7': case '8': case '9':
628                        c |= (digit - '0'); break;
629                    case 'a': case 'b': case 'c': case 'd': case 'e':
630                    case 'f':
631                        c |= (digit - 'a' + 10); break;
632                    case 'A': case 'B': case 'C': case 'D': case 'E':
633                    case 'F':
634                        c |= (digit - 'A' + 10); break;
635                    default:
636                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
637                        goto bail;
638                }
639            }
640#ifdef Py_UNICODE_WIDE
641            /* Surrogate pair */
642            if ((c & 0xfc00) == 0xd800) {
643                Py_UNICODE c2 = 0;
644                if (end + 6 >= len) {
645                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
646                    goto bail;
647                }
648                if (buf[next++] != '\\' || buf[next++] != 'u') {
649                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
650                    goto bail;
651                }
652                end += 6;
653                /* Decode 4 hex digits */
654                for (; next < end; next++) {
655                    c2 <<= 4;
656                    Py_UNICODE digit = buf[next];
657                    switch (digit) {
658                        case '0': case '1': case '2': case '3': case '4':
659                        case '5': case '6': case '7': case '8': case '9':
660                            c2 |= (digit - '0'); break;
661                        case 'a': case 'b': case 'c': case 'd': case 'e':
662                        case 'f':
663                            c2 |= (digit - 'a' + 10); break;
664                        case 'A': case 'B': case 'C': case 'D': case 'E':
665                        case 'F':
666                            c2 |= (digit - 'A' + 10); break;
667                        default:
668                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
669                            goto bail;
670                    }
671                }
672                if ((c2 & 0xfc00) != 0xdc00) {
673                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
674                    goto bail;
675                }
676                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
677            }
678            else if ((c & 0xfc00) == 0xdc00) {
679                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
680                goto bail;
681            }
682#endif
683        }
684        if (c > 0x7f) {
685            has_unicode = 1;
686        }
687        APPEND_OLD_CHUNK
688        if (has_unicode) {
689            chunk = PyUnicode_FromUnicode(&c, 1);
690            if (chunk == NULL) {
691                goto bail;
692            }
693        }
694        else {
695            char c_char = Py_CHARMASK(c);
696            chunk = PyString_FromStringAndSize(&c_char, 1);
697            if (chunk == NULL) {
698                goto bail;
699            }
700        }
701    }
702
703    if (chunks == NULL) {
704        if (chunk != NULL)
705            rval = chunk;
706        else
707            rval = PyString_FromStringAndSize("", 0);
708    }
709    else {
710        APPEND_OLD_CHUNK
711        rval = join_list_string(chunks);
712        if (rval == NULL) {
713            goto bail;
714        }
715        Py_CLEAR(chunks);
716    }
717
718    *next_end_ptr = end;
719    return rval;
720bail:
721    *next_end_ptr = -1;
722    Py_XDECREF(chunk);
723    Py_XDECREF(chunks);
724    return NULL;
725}
726
727
728static PyObject *
729scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
730{
731    /* Read the JSON string from PyUnicode pystr.
732    end is the index of the first character after the quote.
733    if strict is zero then literal control characters are allowed
734    *next_end_ptr is a return-by-reference index of the character
735        after the end quote
736
737    Return value is a new PyUnicode
738    */
739    PyObject *rval;
740    Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
741    Py_ssize_t begin = end - 1;
742    Py_ssize_t next = begin;
743    const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
744    PyObject *chunks = NULL;
745    PyObject *chunk = NULL;
746
747    if (len == end) {
748        raise_errmsg("Unterminated string starting at", pystr, begin);
749    }
750    else if (end < 0 || len < end) {
751        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
752        goto bail;
753    }
754    while (1) {
755        /* Find the end of the string or the next escape */
756        Py_UNICODE c = 0;
757        for (next = end; next < len; next++) {
758            c = buf[next];
759            if (c == '"' || c == '\\') {
760                break;
761            }
762            else if (strict && c <= 0x1f) {
763                raise_errmsg("Invalid control character at", pystr, next);
764                goto bail;
765            }
766        }
767        if (!(c == '"' || c == '\\')) {
768            raise_errmsg("Unterminated string starting at", pystr, begin);
769            goto bail;
770        }
771        /* Pick up this chunk if it's not zero length */
772        if (next != end) {
773            APPEND_OLD_CHUNK
774            chunk = PyUnicode_FromUnicode(&buf[end], next - end);
775            if (chunk == NULL) {
776                goto bail;
777            }
778        }
779        next++;
780        if (c == '"') {
781            end = next;
782            break;
783        }
784        if (next == len) {
785            raise_errmsg("Unterminated string starting at", pystr, begin);
786            goto bail;
787        }
788        c = buf[next];
789        if (c != 'u') {
790            /* Non-unicode backslash escapes */
791            end = next + 1;
792            switch (c) {
793                case '"': break;
794                case '\\': break;
795                case '/': break;
796                case 'b': c = '\b'; break;
797                case 'f': c = '\f'; break;
798                case 'n': c = '\n'; break;
799                case 'r': c = '\r'; break;
800                case 't': c = '\t'; break;
801                default: c = 0;
802            }
803            if (c == 0) {
804                raise_errmsg("Invalid \\escape", pystr, end - 2);
805                goto bail;
806            }
807        }
808        else {
809            c = 0;
810            next++;
811            end = next + 4;
812            if (end >= len) {
813                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
814                goto bail;
815            }
816            /* Decode 4 hex digits */
817            for (; next < end; next++) {
818                Py_UNICODE digit = buf[next];
819                c <<= 4;
820                switch (digit) {
821                    case '0': case '1': case '2': case '3': case '4':
822                    case '5': case '6': case '7': case '8': case '9':
823                        c |= (digit - '0'); break;
824                    case 'a': case 'b': case 'c': case 'd': case 'e':
825                    case 'f':
826                        c |= (digit - 'a' + 10); break;
827                    case 'A': case 'B': case 'C': case 'D': case 'E':
828                    case 'F':
829                        c |= (digit - 'A' + 10); break;
830                    default:
831                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
832                        goto bail;
833                }
834            }
835#ifdef Py_UNICODE_WIDE
836            /* Surrogate pair */
837            if ((c & 0xfc00) == 0xd800) {
838                Py_UNICODE c2 = 0;
839                if (end + 6 >= len) {
840                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
841                    goto bail;
842                }
843                if (buf[next++] != '\\' || buf[next++] != 'u') {
844                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
845                    goto bail;
846                }
847                end += 6;
848                /* Decode 4 hex digits */
849                for (; next < end; next++) {
850                    c2 <<= 4;
851                    Py_UNICODE digit = buf[next];
852                    switch (digit) {
853                        case '0': case '1': case '2': case '3': case '4':
854                        case '5': case '6': case '7': case '8': case '9':
855                            c2 |= (digit - '0'); break;
856                        case 'a': case 'b': case 'c': case 'd': case 'e':
857                        case 'f':
858                            c2 |= (digit - 'a' + 10); break;
859                        case 'A': case 'B': case 'C': case 'D': case 'E':
860                        case 'F':
861                            c2 |= (digit - 'A' + 10); break;
862                        default:
863                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
864                            goto bail;
865                    }
866                }
867                if ((c2 & 0xfc00) != 0xdc00) {
868                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
869                    goto bail;
870                }
871                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
872            }
873            else if ((c & 0xfc00) == 0xdc00) {
874                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
875                goto bail;
876            }
877#endif
878        }
879        APPEND_OLD_CHUNK
880        chunk = PyUnicode_FromUnicode(&c, 1);
881        if (chunk == NULL) {
882            goto bail;
883        }
884    }
885
886    if (chunks == NULL) {
887        if (chunk != NULL)
888            rval = chunk;
889        else
890            rval = PyUnicode_FromUnicode(NULL, 0);
891    }
892    else {
893        APPEND_OLD_CHUNK
894        rval = join_list_unicode(chunks);
895        if (rval == NULL) {
896            goto bail;
897        }
898        Py_CLEAR(chunks);
899    }
900    *next_end_ptr = end;
901    return rval;
902bail:
903    *next_end_ptr = -1;
904    Py_XDECREF(chunk);
905    Py_XDECREF(chunks);
906    return NULL;
907}
908
909PyDoc_STRVAR(pydoc_scanstring,
910    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
911    "\n"
912    "Scan the string s for a JSON string. End is the index of the\n"
913    "character in s after the quote that started the JSON string.\n"
914    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
915    "on attempt to decode an invalid string. If strict is False then literal\n"
916    "control characters are allowed in the string.\n"
917    "\n"
918    "Returns a tuple of the decoded string and the index of the character in s\n"
919    "after the end quote."
920);
921
922static PyObject *
923py_scanstring(PyObject* self UNUSED, PyObject *args)
924{
925    PyObject *pystr;
926    PyObject *rval;
927    Py_ssize_t end;
928    Py_ssize_t next_end = -1;
929    char *encoding = NULL;
930    int strict = 1;
931    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
932        return NULL;
933    }
934    if (encoding == NULL) {
935        encoding = DEFAULT_ENCODING;
936    }
937    if (PyString_Check(pystr)) {
938        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
939    }
940    else if (PyUnicode_Check(pystr)) {
941        rval = scanstring_unicode(pystr, end, strict, &next_end);
942    }
943    else {
944        PyErr_Format(PyExc_TypeError,
945                     "first argument must be a string, not %.80s",
946                     Py_TYPE(pystr)->tp_name);
947        return NULL;
948    }
949    return _build_rval_index_tuple(rval, next_end);
950}
951
952PyDoc_STRVAR(pydoc_encode_basestring_ascii,
953    "encode_basestring_ascii(basestring) -> str\n"
954    "\n"
955    "Return an ASCII-only JSON representation of a Python string"
956);
957
958static PyObject *
959py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
960{
961    /* Return an ASCII-only JSON representation of a Python string */
962    /* METH_O */
963    if (PyString_Check(pystr)) {
964        return ascii_escape_str(pystr);
965    }
966    else if (PyUnicode_Check(pystr)) {
967        return ascii_escape_unicode(pystr);
968    }
969    else {
970        PyErr_Format(PyExc_TypeError,
971                     "first argument must be a string, not %.80s",
972                     Py_TYPE(pystr)->tp_name);
973        return NULL;
974    }
975}
976
977static void
978scanner_dealloc(PyObject *self)
979{
980    /* Deallocate scanner object */
981    scanner_clear(self);
982    Py_TYPE(self)->tp_free(self);
983}
984
985static int
986scanner_traverse(PyObject *self, visitproc visit, void *arg)
987{
988    PyScannerObject *s;
989    assert(PyScanner_Check(self));
990    s = (PyScannerObject *)self;
991    Py_VISIT(s->encoding);
992    Py_VISIT(s->strict);
993    Py_VISIT(s->object_hook);
994    Py_VISIT(s->pairs_hook);
995    Py_VISIT(s->parse_float);
996    Py_VISIT(s->parse_int);
997    Py_VISIT(s->parse_constant);
998    Py_VISIT(s->memo);
999    return 0;
1000}
1001
1002static int
1003scanner_clear(PyObject *self)
1004{
1005    PyScannerObject *s;
1006    assert(PyScanner_Check(self));
1007    s = (PyScannerObject *)self;
1008    Py_CLEAR(s->encoding);
1009    Py_CLEAR(s->strict);
1010    Py_CLEAR(s->object_hook);
1011    Py_CLEAR(s->pairs_hook);
1012    Py_CLEAR(s->parse_float);
1013    Py_CLEAR(s->parse_int);
1014    Py_CLEAR(s->parse_constant);
1015    Py_CLEAR(s->memo);
1016    return 0;
1017}
1018
1019static PyObject *
1020_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1021    /* Read a JSON object from PyString pystr.
1022    idx is the index of the first character after the opening curly brace.
1023    *next_idx_ptr is a return-by-reference index to the first character after
1024        the closing curly brace.
1025
1026    Returns a new PyObject (usually a dict, but object_hook or
1027    object_pairs_hook can change that)
1028    */
1029    char *str = PyString_AS_STRING(pystr);
1030    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1031    PyObject *rval = NULL;
1032    PyObject *pairs = NULL;
1033    PyObject *item;
1034    PyObject *key = NULL;
1035    PyObject *val = NULL;
1036    char *encoding = PyString_AS_STRING(s->encoding);
1037    int strict = PyObject_IsTrue(s->strict);
1038    int has_pairs_hook = (s->pairs_hook != Py_None);
1039    Py_ssize_t next_idx;
1040    if (has_pairs_hook) {
1041        pairs = PyList_New(0);
1042        if (pairs == NULL)
1043            return NULL;
1044    }
1045    else {
1046        rval = PyDict_New();
1047        if (rval == NULL)
1048            return NULL;
1049    }
1050
1051    /* skip whitespace after { */
1052    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1053
1054    /* only loop if the object is non-empty */
1055    if (idx <= end_idx && str[idx] != '}') {
1056        while (idx <= end_idx) {
1057            PyObject *memokey;
1058
1059            /* read key */
1060            if (str[idx] != '"') {
1061                raise_errmsg(
1062                    "Expecting property name enclosed in double quotes",
1063                    pystr, idx);
1064                goto bail;
1065            }
1066            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1067            if (key == NULL)
1068                goto bail;
1069            memokey = PyDict_GetItem(s->memo, key);
1070            if (memokey != NULL) {
1071                Py_INCREF(memokey);
1072                Py_DECREF(key);
1073                key = memokey;
1074            }
1075            else {
1076                if (PyDict_SetItem(s->memo, key, key) < 0)
1077                    goto bail;
1078            }
1079            idx = next_idx;
1080
1081            /* skip whitespace between key and : delimiter, read :, skip whitespace */
1082            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1083            if (idx > end_idx || str[idx] != ':') {
1084                raise_errmsg("Expecting ':' delimiter", pystr, idx);
1085                goto bail;
1086            }
1087            idx++;
1088            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1089
1090            /* read any JSON data type */
1091            val = scan_once_str(s, pystr, idx, &next_idx);
1092            if (val == NULL)
1093                goto bail;
1094
1095            if (has_pairs_hook) {
1096                item = PyTuple_Pack(2, key, val);
1097                if (item == NULL)
1098                    goto bail;
1099                Py_CLEAR(key);
1100                Py_CLEAR(val);
1101                if (PyList_Append(pairs, item) == -1) {
1102                    Py_DECREF(item);
1103                    goto bail;
1104                }
1105                Py_DECREF(item);
1106            }
1107            else {
1108                if (PyDict_SetItem(rval, key, val) < 0)
1109                    goto bail;
1110                Py_CLEAR(key);
1111                Py_CLEAR(val);
1112            }
1113            idx = next_idx;
1114
1115            /* skip whitespace before } or , */
1116            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1117
1118            /* bail if the object is closed or we didn't get the , delimiter */
1119            if (idx > end_idx) break;
1120            if (str[idx] == '}') {
1121                break;
1122            }
1123            else if (str[idx] != ',') {
1124                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1125                goto bail;
1126            }
1127            idx++;
1128
1129            /* skip whitespace after , delimiter */
1130            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1131        }
1132    }
1133    /* verify that idx < end_idx, str[idx] should be '}' */
1134    if (idx > end_idx || str[idx] != '}') {
1135        raise_errmsg("Expecting object", pystr, end_idx);
1136        goto bail;
1137    }
1138
1139    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1140    if (s->pairs_hook != Py_None) {
1141        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1142        if (val == NULL)
1143            goto bail;
1144        Py_DECREF(pairs);
1145        *next_idx_ptr = idx + 1;
1146        return val;
1147    }
1148
1149    /* if object_hook is not None: rval = object_hook(rval) */
1150    if (s->object_hook != Py_None) {
1151        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1152        if (val == NULL)
1153            goto bail;
1154        Py_DECREF(rval);
1155        rval = val;
1156        val = NULL;
1157    }
1158    *next_idx_ptr = idx + 1;
1159    return rval;
1160bail:
1161    Py_XDECREF(rval);
1162    Py_XDECREF(key);
1163    Py_XDECREF(val);
1164    Py_XDECREF(pairs);
1165    return NULL;
1166}
1167
1168static PyObject *
1169_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1170    /* Read a JSON object from PyUnicode pystr.
1171    idx is the index of the first character after the opening curly brace.
1172    *next_idx_ptr is a return-by-reference index to the first character after
1173        the closing curly brace.
1174
1175    Returns a new PyObject (usually a dict, but object_hook can change that)
1176    */
1177    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1178    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1179    PyObject *rval = NULL;
1180    PyObject *pairs = NULL;
1181    PyObject *item;
1182    PyObject *key = NULL;
1183    PyObject *val = NULL;
1184    int strict = PyObject_IsTrue(s->strict);
1185    int has_pairs_hook = (s->pairs_hook != Py_None);
1186    Py_ssize_t next_idx;
1187
1188    if (has_pairs_hook) {
1189        pairs = PyList_New(0);
1190        if (pairs == NULL)
1191            return NULL;
1192    }
1193    else {
1194        rval = PyDict_New();
1195        if (rval == NULL)
1196            return NULL;
1197    }
1198
1199    /* skip whitespace after { */
1200    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1201
1202    /* only loop if the object is non-empty */
1203    if (idx <= end_idx && str[idx] != '}') {
1204        while (idx <= end_idx) {
1205            PyObject *memokey;
1206
1207            /* read key */
1208            if (str[idx] != '"') {
1209                raise_errmsg(
1210                    "Expecting property name enclosed in double quotes",
1211                    pystr, idx);
1212                goto bail;
1213            }
1214            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1215            if (key == NULL)
1216                goto bail;
1217            memokey = PyDict_GetItem(s->memo, key);
1218            if (memokey != NULL) {
1219                Py_INCREF(memokey);
1220                Py_DECREF(key);
1221                key = memokey;
1222            }
1223            else {
1224                if (PyDict_SetItem(s->memo, key, key) < 0)
1225                    goto bail;
1226            }
1227            idx = next_idx;
1228
1229            /* skip whitespace between key and : delimiter, read :, skip
1230               whitespace */
1231            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232            if (idx > end_idx || str[idx] != ':') {
1233                raise_errmsg("Expecting ':' delimiter", pystr, idx);
1234                goto bail;
1235            }
1236            idx++;
1237            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1238
1239            /* read any JSON term */
1240            val = scan_once_unicode(s, pystr, idx, &next_idx);
1241            if (val == NULL)
1242                goto bail;
1243
1244            if (has_pairs_hook) {
1245                item = PyTuple_Pack(2, key, val);
1246                if (item == NULL)
1247                    goto bail;
1248                Py_CLEAR(key);
1249                Py_CLEAR(val);
1250                if (PyList_Append(pairs, item) == -1) {
1251                    Py_DECREF(item);
1252                    goto bail;
1253                }
1254                Py_DECREF(item);
1255            }
1256            else {
1257                if (PyDict_SetItem(rval, key, val) < 0)
1258                    goto bail;
1259                Py_CLEAR(key);
1260                Py_CLEAR(val);
1261            }
1262            idx = next_idx;
1263
1264            /* skip whitespace before } or , */
1265            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1266
1267            /* bail if the object is closed or we didn't get the ,
1268               delimiter */
1269            if (idx > end_idx) break;
1270            if (str[idx] == '}') {
1271                break;
1272            }
1273            else if (str[idx] != ',') {
1274                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1275                goto bail;
1276            }
1277            idx++;
1278
1279            /* skip whitespace after , delimiter */
1280            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1281        }
1282    }
1283
1284    /* verify that idx < end_idx, str[idx] should be '}' */
1285    if (idx > end_idx || str[idx] != '}') {
1286        raise_errmsg("Expecting object", pystr, end_idx);
1287        goto bail;
1288    }
1289
1290    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1291    if (s->pairs_hook != Py_None) {
1292        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1293        if (val == NULL)
1294            goto bail;
1295        Py_DECREF(pairs);
1296        *next_idx_ptr = idx + 1;
1297        return val;
1298    }
1299
1300    /* if object_hook is not None: rval = object_hook(rval) */
1301    if (s->object_hook != Py_None) {
1302        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1303        if (val == NULL)
1304            goto bail;
1305        Py_DECREF(rval);
1306        rval = val;
1307        val = NULL;
1308    }
1309    *next_idx_ptr = idx + 1;
1310    return rval;
1311bail:
1312    Py_XDECREF(rval);
1313    Py_XDECREF(key);
1314    Py_XDECREF(val);
1315    Py_XDECREF(pairs);
1316    return NULL;
1317}
1318
1319static PyObject *
1320_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1321    /* Read a JSON array from PyString pystr.
1322    idx is the index of the first character after the opening brace.
1323    *next_idx_ptr is a return-by-reference index to the first character after
1324        the closing brace.
1325
1326    Returns a new PyList
1327    */
1328    char *str = PyString_AS_STRING(pystr);
1329    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1330    PyObject *val = NULL;
1331    PyObject *rval = PyList_New(0);
1332    Py_ssize_t next_idx;
1333    if (rval == NULL)
1334        return NULL;
1335
1336    /* skip whitespace after [ */
1337    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1338
1339    /* only loop if the array is non-empty */
1340    if (idx <= end_idx && str[idx] != ']') {
1341        while (idx <= end_idx) {
1342
1343            /* read any JSON term and de-tuplefy the (rval, idx) */
1344            val = scan_once_str(s, pystr, idx, &next_idx);
1345            if (val == NULL) {
1346                if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1347                    PyErr_Clear();
1348                    raise_errmsg("Expecting object", pystr, idx);
1349                }
1350                goto bail;
1351            }
1352
1353            if (PyList_Append(rval, val) == -1)
1354                goto bail;
1355
1356            Py_CLEAR(val);
1357            idx = next_idx;
1358
1359            /* skip whitespace between term and , */
1360            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1361
1362            /* bail if the array is closed or we didn't get the , delimiter */
1363            if (idx > end_idx) break;
1364            if (str[idx] == ']') {
1365                break;
1366            }
1367            else if (str[idx] != ',') {
1368                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1369                goto bail;
1370            }
1371            idx++;
1372
1373            /* skip whitespace after , */
1374            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1375        }
1376    }
1377
1378    /* verify that idx < end_idx, str[idx] should be ']' */
1379    if (idx > end_idx || str[idx] != ']') {
1380        raise_errmsg("Expecting object", pystr, end_idx);
1381        goto bail;
1382    }
1383    *next_idx_ptr = idx + 1;
1384    return rval;
1385bail:
1386    Py_XDECREF(val);
1387    Py_DECREF(rval);
1388    return NULL;
1389}
1390
1391static PyObject *
1392_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1393    /* Read a JSON array from PyString pystr.
1394    idx is the index of the first character after the opening brace.
1395    *next_idx_ptr is a return-by-reference index to the first character after
1396        the closing brace.
1397
1398    Returns a new PyList
1399    */
1400    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1401    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1402    PyObject *val = NULL;
1403    PyObject *rval = PyList_New(0);
1404    Py_ssize_t next_idx;
1405    if (rval == NULL)
1406        return NULL;
1407
1408    /* skip whitespace after [ */
1409    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1410
1411    /* only loop if the array is non-empty */
1412    if (idx <= end_idx && str[idx] != ']') {
1413        while (idx <= end_idx) {
1414
1415            /* read any JSON term  */
1416            val = scan_once_unicode(s, pystr, idx, &next_idx);
1417            if (val == NULL) {
1418                if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1419                    PyErr_Clear();
1420                    raise_errmsg("Expecting object", pystr, idx);
1421                }
1422                goto bail;
1423            }
1424
1425            if (PyList_Append(rval, val) == -1)
1426                goto bail;
1427
1428            Py_CLEAR(val);
1429            idx = next_idx;
1430
1431            /* skip whitespace between term and , */
1432            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1433
1434            /* bail if the array is closed or we didn't get the , delimiter */
1435            if (idx > end_idx) break;
1436            if (str[idx] == ']') {
1437                break;
1438            }
1439            else if (str[idx] != ',') {
1440                raise_errmsg("Expecting ',' delimiter", pystr, idx);
1441                goto bail;
1442            }
1443            idx++;
1444
1445            /* skip whitespace after , */
1446            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1447        }
1448    }
1449
1450    /* verify that idx < end_idx, str[idx] should be ']' */
1451    if (idx > end_idx || str[idx] != ']') {
1452        raise_errmsg("Expecting object", pystr, end_idx);
1453        goto bail;
1454    }
1455    *next_idx_ptr = idx + 1;
1456    return rval;
1457bail:
1458    Py_XDECREF(val);
1459    Py_DECREF(rval);
1460    return NULL;
1461}
1462
1463static PyObject *
1464_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1465    /* Read a JSON constant from PyString pystr.
1466    constant is the constant string that was found
1467        ("NaN", "Infinity", "-Infinity").
1468    idx is the index of the first character of the constant
1469    *next_idx_ptr is a return-by-reference index to the first character after
1470        the constant.
1471
1472    Returns the result of parse_constant
1473    */
1474    PyObject *cstr;
1475    PyObject *rval;
1476    /* constant is "NaN", "Infinity", or "-Infinity" */
1477    cstr = PyString_InternFromString(constant);
1478    if (cstr == NULL)
1479        return NULL;
1480
1481    /* rval = parse_constant(constant) */
1482    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1483    idx += PyString_GET_SIZE(cstr);
1484    Py_DECREF(cstr);
1485    *next_idx_ptr = idx;
1486    return rval;
1487}
1488
1489static PyObject *
1490_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1491    /* Read a JSON number from PyString pystr.
1492    idx is the index of the first character of the number
1493    *next_idx_ptr is a return-by-reference index to the first character after
1494        the number.
1495
1496    Returns a new PyObject representation of that number:
1497        PyInt, PyLong, or PyFloat.
1498        May return other types if parse_int or parse_float are set
1499    */
1500    char *str = PyString_AS_STRING(pystr);
1501    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1502    Py_ssize_t idx = start;
1503    int is_float = 0;
1504    PyObject *rval;
1505    PyObject *numstr;
1506
1507    /* read a sign if it's there, make sure it's not the end of the string */
1508    if (str[idx] == '-') {
1509        idx++;
1510        if (idx > end_idx) {
1511            PyErr_SetNone(PyExc_StopIteration);
1512            return NULL;
1513        }
1514    }
1515
1516    /* read as many integer digits as we find as long as it doesn't start with 0 */
1517    if (str[idx] >= '1' && str[idx] <= '9') {
1518        idx++;
1519        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1520    }
1521    /* if it starts with 0 we only expect one integer digit */
1522    else if (str[idx] == '0') {
1523        idx++;
1524    }
1525    /* no integer digits, error */
1526    else {
1527        PyErr_SetNone(PyExc_StopIteration);
1528        return NULL;
1529    }
1530
1531    /* if the next char is '.' followed by a digit then read all float digits */
1532    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1533        is_float = 1;
1534        idx += 2;
1535        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1536    }
1537
1538    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1539    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1540
1541        /* save the index of the 'e' or 'E' just in case we need to backtrack */
1542        Py_ssize_t e_start = idx;
1543        idx++;
1544
1545        /* read an exponent sign if present */
1546        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1547
1548        /* read all digits */
1549        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1550
1551        /* if we got a digit, then parse as float. if not, backtrack */
1552        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1553            is_float = 1;
1554        }
1555        else {
1556            idx = e_start;
1557        }
1558    }
1559
1560    /* copy the section we determined to be a number */
1561    numstr = PyString_FromStringAndSize(&str[start], idx - start);
1562    if (numstr == NULL)
1563        return NULL;
1564    if (is_float) {
1565        /* parse as a float using a fast path if available, otherwise call user defined method */
1566        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1567            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1568        }
1569        else {
1570            /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1571            double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1572                                             NULL, NULL);
1573            if (d == -1.0 && PyErr_Occurred())
1574                return NULL;
1575            rval = PyFloat_FromDouble(d);
1576        }
1577    }
1578    else {
1579        /* parse as an int using a fast path if available, otherwise call user defined method */
1580        if (s->parse_int != (PyObject *)&PyInt_Type) {
1581            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1582        }
1583        else {
1584            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1585        }
1586    }
1587    Py_DECREF(numstr);
1588    *next_idx_ptr = idx;
1589    return rval;
1590}
1591
1592static PyObject *
1593_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1594    /* Read a JSON number from PyUnicode pystr.
1595    idx is the index of the first character of the number
1596    *next_idx_ptr is a return-by-reference index to the first character after
1597        the number.
1598
1599    Returns a new PyObject representation of that number:
1600        PyInt, PyLong, or PyFloat.
1601        May return other types if parse_int or parse_float are set
1602    */
1603    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1604    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1605    Py_ssize_t idx = start;
1606    int is_float = 0;
1607    PyObject *rval;
1608    PyObject *numstr;
1609
1610    /* read a sign if it's there, make sure it's not the end of the string */
1611    if (str[idx] == '-') {
1612        idx++;
1613        if (idx > end_idx) {
1614            PyErr_SetNone(PyExc_StopIteration);
1615            return NULL;
1616        }
1617    }
1618
1619    /* read as many integer digits as we find as long as it doesn't start with 0 */
1620    if (str[idx] >= '1' && str[idx] <= '9') {
1621        idx++;
1622        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1623    }
1624    /* if it starts with 0 we only expect one integer digit */
1625    else if (str[idx] == '0') {
1626        idx++;
1627    }
1628    /* no integer digits, error */
1629    else {
1630        PyErr_SetNone(PyExc_StopIteration);
1631        return NULL;
1632    }
1633
1634    /* if the next char is '.' followed by a digit then read all float digits */
1635    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1636        is_float = 1;
1637        idx += 2;
1638        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1639    }
1640
1641    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1642    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1643        Py_ssize_t e_start = idx;
1644        idx++;
1645
1646        /* read an exponent sign if present */
1647        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1648
1649        /* read all digits */
1650        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1651
1652        /* if we got a digit, then parse as float. if not, backtrack */
1653        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1654            is_float = 1;
1655        }
1656        else {
1657            idx = e_start;
1658        }
1659    }
1660
1661    /* copy the section we determined to be a number */
1662    numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1663    if (numstr == NULL)
1664        return NULL;
1665    if (is_float) {
1666        /* parse as a float using a fast path if available, otherwise call user defined method */
1667        if (s->parse_float != (PyObject *)&PyFloat_Type) {
1668            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1669        }
1670        else {
1671            rval = PyFloat_FromString(numstr, NULL);
1672        }
1673    }
1674    else {
1675        /* no fast path for unicode -> int, just call */
1676        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1677    }
1678    Py_DECREF(numstr);
1679    *next_idx_ptr = idx;
1680    return rval;
1681}
1682
1683static PyObject *
1684scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1685{
1686    /* Read one JSON term (of any kind) from PyString pystr.
1687    idx is the index of the first character of the term
1688    *next_idx_ptr is a return-by-reference index to the first character after
1689        the number.
1690
1691    Returns a new PyObject representation of the term.
1692    */
1693    char *str = PyString_AS_STRING(pystr);
1694    Py_ssize_t length = PyString_GET_SIZE(pystr);
1695    PyObject *rval = NULL;
1696    int fallthrough = 0;
1697    if (idx >= length) {
1698        PyErr_SetNone(PyExc_StopIteration);
1699        return NULL;
1700    }
1701    if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1702        return NULL;
1703    switch (str[idx]) {
1704        case '"':
1705            /* string */
1706            rval = scanstring_str(pystr, idx + 1,
1707                PyString_AS_STRING(s->encoding),
1708                PyObject_IsTrue(s->strict),
1709                next_idx_ptr);
1710            break;
1711        case '{':
1712            /* object */
1713            rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1714            break;
1715        case '[':
1716            /* array */
1717            rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1718            break;
1719        case 'n':
1720            /* null */
1721            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1722                Py_INCREF(Py_None);
1723                *next_idx_ptr = idx + 4;
1724                rval = Py_None;
1725            }
1726            else
1727                fallthrough = 1;
1728            break;
1729        case 't':
1730            /* true */
1731            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1732                Py_INCREF(Py_True);
1733                *next_idx_ptr = idx + 4;
1734                rval = Py_True;
1735            }
1736            else
1737                fallthrough = 1;
1738            break;
1739        case 'f':
1740            /* false */
1741            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1742                Py_INCREF(Py_False);
1743                *next_idx_ptr = idx + 5;
1744                rval = Py_False;
1745            }
1746            else
1747                fallthrough = 1;
1748            break;
1749        case 'N':
1750            /* NaN */
1751            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1752                rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1753            }
1754            else
1755                fallthrough = 1;
1756            break;
1757        case 'I':
1758            /* Infinity */
1759            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1760                rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1761            }
1762            else
1763                fallthrough = 1;
1764            break;
1765        case '-':
1766            /* -Infinity */
1767            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1768                rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1769            }
1770            else
1771                fallthrough = 1;
1772            break;
1773        default:
1774            fallthrough = 1;
1775    }
1776    /* Didn't find a string, object, array, or named constant. Look for a number. */
1777    if (fallthrough)
1778        rval = _match_number_str(s, pystr, idx, next_idx_ptr);
1779    Py_LeaveRecursiveCall();
1780    return rval;
1781}
1782
1783static PyObject *
1784scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1785{
1786    /* Read one JSON term (of any kind) from PyUnicode pystr.
1787    idx is the index of the first character of the term
1788    *next_idx_ptr is a return-by-reference index to the first character after
1789        the number.
1790
1791    Returns a new PyObject representation of the term.
1792    */
1793    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1794    Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1795    PyObject *rval = NULL;
1796    int fallthrough = 0;
1797    if (idx >= length) {
1798        PyErr_SetNone(PyExc_StopIteration);
1799        return NULL;
1800    }
1801    if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1802        return NULL;
1803    switch (str[idx]) {
1804        case '"':
1805            /* string */
1806            rval = scanstring_unicode(pystr, idx + 1,
1807                PyObject_IsTrue(s->strict),
1808                next_idx_ptr);
1809            break;
1810        case '{':
1811            /* object */
1812            rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1813            break;
1814        case '[':
1815            /* array */
1816            rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1817            break;
1818        case 'n':
1819            /* null */
1820            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1821                Py_INCREF(Py_None);
1822                *next_idx_ptr = idx + 4;
1823                rval = Py_None;
1824            }
1825            else
1826                fallthrough = 1;
1827            break;
1828        case 't':
1829            /* true */
1830            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1831                Py_INCREF(Py_True);
1832                *next_idx_ptr = idx + 4;
1833                rval = Py_True;
1834            }
1835            else
1836                fallthrough = 1;
1837            break;
1838        case 'f':
1839            /* false */
1840            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1841                Py_INCREF(Py_False);
1842                *next_idx_ptr = idx + 5;
1843                rval = Py_False;
1844            }
1845            else
1846                fallthrough = 1;
1847            break;
1848        case 'N':
1849            /* NaN */
1850            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1851                rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1852            }
1853            else
1854                fallthrough = 1;
1855            break;
1856        case 'I':
1857            /* Infinity */
1858            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1859                rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1860            }
1861            else
1862                fallthrough = 1;
1863            break;
1864        case '-':
1865            /* -Infinity */
1866            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1867                rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1868            }
1869            else
1870                fallthrough = 1;
1871            break;
1872        default:
1873            fallthrough = 1;
1874    }
1875    /* Didn't find a string, object, array, or named constant. Look for a number. */
1876    if (fallthrough)
1877        rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
1878    Py_LeaveRecursiveCall();
1879    return rval;
1880}
1881
1882static PyObject *
1883scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1884{
1885    /* Python callable interface to scan_once_{str,unicode} */
1886    PyObject *pystr;
1887    PyObject *rval;
1888    Py_ssize_t idx;
1889    Py_ssize_t next_idx = -1;
1890    static char *kwlist[] = {"string", "idx", NULL};
1891    PyScannerObject *s;
1892    assert(PyScanner_Check(self));
1893    s = (PyScannerObject *)self;
1894    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1895        return NULL;
1896
1897    if (PyString_Check(pystr)) {
1898        rval = scan_once_str(s, pystr, idx, &next_idx);
1899    }
1900    else if (PyUnicode_Check(pystr)) {
1901        rval = scan_once_unicode(s, pystr, idx, &next_idx);
1902    }
1903    else {
1904        PyErr_Format(PyExc_TypeError,
1905                 "first argument must be a string, not %.80s",
1906                 Py_TYPE(pystr)->tp_name);
1907        return NULL;
1908    }
1909    PyDict_Clear(s->memo);
1910    return _build_rval_index_tuple(rval, next_idx);
1911}
1912
1913static PyObject *
1914scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1915{
1916    PyScannerObject *s;
1917    s = (PyScannerObject *)type->tp_alloc(type, 0);
1918    if (s != NULL) {
1919        s->encoding = NULL;
1920        s->strict = NULL;
1921        s->object_hook = NULL;
1922        s->pairs_hook = NULL;
1923        s->parse_float = NULL;
1924        s->parse_int = NULL;
1925        s->parse_constant = NULL;
1926    }
1927    return (PyObject *)s;
1928}
1929
1930static int
1931scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1932{
1933    /* Initialize Scanner object */
1934    PyObject *ctx;
1935    static char *kwlist[] = {"context", NULL};
1936    PyScannerObject *s;
1937
1938    assert(PyScanner_Check(self));
1939    s = (PyScannerObject *)self;
1940
1941    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1942        return -1;
1943
1944    if (s->memo == NULL) {
1945        s->memo = PyDict_New();
1946        if (s->memo == NULL)
1947            goto bail;
1948    }
1949
1950    /* PyString_AS_STRING is used on encoding */
1951    s->encoding = PyObject_GetAttrString(ctx, "encoding");
1952    if (s->encoding == NULL)
1953        goto bail;
1954    if (s->encoding == Py_None) {
1955        Py_DECREF(Py_None);
1956        s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1957    }
1958    else if (PyUnicode_Check(s->encoding)) {
1959        PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1960        Py_DECREF(s->encoding);
1961        s->encoding = tmp;
1962    }
1963    if (s->encoding == NULL || !PyString_Check(s->encoding))
1964        goto bail;
1965
1966    /* All of these will fail "gracefully" so we don't need to verify them */
1967    s->strict = PyObject_GetAttrString(ctx, "strict");
1968    if (s->strict == NULL)
1969        goto bail;
1970    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1971    if (s->object_hook == NULL)
1972        goto bail;
1973    s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1974    if (s->pairs_hook == NULL)
1975        goto bail;
1976    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1977    if (s->parse_float == NULL)
1978        goto bail;
1979    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1980    if (s->parse_int == NULL)
1981        goto bail;
1982    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1983    if (s->parse_constant == NULL)
1984        goto bail;
1985
1986    return 0;
1987
1988bail:
1989    Py_CLEAR(s->encoding);
1990    Py_CLEAR(s->strict);
1991    Py_CLEAR(s->object_hook);
1992    Py_CLEAR(s->pairs_hook);
1993    Py_CLEAR(s->parse_float);
1994    Py_CLEAR(s->parse_int);
1995    Py_CLEAR(s->parse_constant);
1996    return -1;
1997}
1998
1999PyDoc_STRVAR(scanner_doc, "JSON scanner object");
2000
2001static
2002PyTypeObject PyScannerType = {
2003    PyObject_HEAD_INIT(NULL)
2004    0,                    /* tp_internal */
2005    "simplejson._speedups.Scanner",       /* tp_name */
2006    sizeof(PyScannerObject), /* tp_basicsize */
2007    0,                    /* tp_itemsize */
2008    scanner_dealloc, /* tp_dealloc */
2009    0,                    /* tp_print */
2010    0,                    /* tp_getattr */
2011    0,                    /* tp_setattr */
2012    0,                    /* tp_compare */
2013    0,                    /* tp_repr */
2014    0,                    /* tp_as_number */
2015    0,                    /* tp_as_sequence */
2016    0,                    /* tp_as_mapping */
2017    0,                    /* tp_hash */
2018    scanner_call,         /* tp_call */
2019    0,                    /* tp_str */
2020    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
2021    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
2022    0,                    /* tp_as_buffer */
2023    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2024    scanner_doc,          /* tp_doc */
2025    scanner_traverse,                    /* tp_traverse */
2026    scanner_clear,                    /* tp_clear */
2027    0,                    /* tp_richcompare */
2028    0,                    /* tp_weaklistoffset */
2029    0,                    /* tp_iter */
2030    0,                    /* tp_iternext */
2031    0,                    /* tp_methods */
2032    scanner_members,                    /* tp_members */
2033    0,                    /* tp_getset */
2034    0,                    /* tp_base */
2035    0,                    /* tp_dict */
2036    0,                    /* tp_descr_get */
2037    0,                    /* tp_descr_set */
2038    0,                    /* tp_dictoffset */
2039    scanner_init,                    /* tp_init */
2040    0,/* PyType_GenericAlloc, */        /* tp_alloc */
2041    scanner_new,          /* tp_new */
2042    0,/* PyObject_GC_Del, */              /* tp_free */
2043};
2044
2045static PyObject *
2046encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2047{
2048    PyEncoderObject *s;
2049    s = (PyEncoderObject *)type->tp_alloc(type, 0);
2050    if (s != NULL) {
2051        s->markers = NULL;
2052        s->defaultfn = NULL;
2053        s->encoder = NULL;
2054        s->indent = NULL;
2055        s->key_separator = NULL;
2056        s->item_separator = NULL;
2057        s->sort_keys = NULL;
2058        s->skipkeys = NULL;
2059        s->key_memo = NULL;
2060        s->item_sort_key = NULL;
2061        s->Decimal = NULL;
2062    }
2063    return (PyObject *)s;
2064}
2065
2066static int
2067encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
2068{
2069    /* initialize Encoder object */
2070    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "Decimal", NULL};
2071
2072    PyEncoderObject *s;
2073    PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
2074    PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
2075    PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array;
2076    PyObject *bigint_as_string, *item_sort_key, *Decimal;
2077
2078    assert(PyEncoder_Check(self));
2079    s = (PyEncoderObject *)self;
2080
2081    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOO:make_encoder", kwlist,
2082        &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
2083        &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
2084        &namedtuple_as_object, &tuple_as_array, &bigint_as_string,
2085        &item_sort_key, &Decimal))
2086        return -1;
2087
2088    s->markers = markers;
2089    s->defaultfn = defaultfn;
2090    s->encoder = encoder;
2091    s->indent = indent;
2092    s->key_separator = key_separator;
2093    s->item_separator = item_separator;
2094    s->sort_keys = sort_keys;
2095    s->skipkeys = skipkeys;
2096    s->key_memo = key_memo;
2097    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
2098    s->allow_nan = PyObject_IsTrue(allow_nan);
2099    s->use_decimal = PyObject_IsTrue(use_decimal);
2100    s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
2101    s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
2102    s->bigint_as_string = PyObject_IsTrue(bigint_as_string);
2103    s->item_sort_key = item_sort_key;
2104    s->Decimal = Decimal;
2105
2106    Py_INCREF(s->markers);
2107    Py_INCREF(s->defaultfn);
2108    Py_INCREF(s->encoder);
2109    Py_INCREF(s->indent);
2110    Py_INCREF(s->key_separator);
2111    Py_INCREF(s->item_separator);
2112    Py_INCREF(s->sort_keys);
2113    Py_INCREF(s->skipkeys);
2114    Py_INCREF(s->key_memo);
2115    Py_INCREF(s->item_sort_key);
2116    Py_INCREF(s->Decimal);
2117    return 0;
2118}
2119
2120static PyObject *
2121encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
2122{
2123    /* Python callable interface to encode_listencode_obj */
2124    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
2125    PyObject *obj;
2126    PyObject *rval;
2127    Py_ssize_t indent_level;
2128    PyEncoderObject *s;
2129    assert(PyEncoder_Check(self));
2130    s = (PyEncoderObject *)self;
2131    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2132        &obj, _convertPyInt_AsSsize_t, &indent_level))
2133        return NULL;
2134    rval = PyList_New(0);
2135    if (rval == NULL)
2136        return NULL;
2137    if (encoder_listencode_obj(s, rval, obj, indent_level)) {
2138        Py_DECREF(rval);
2139        return NULL;
2140    }
2141    return rval;
2142}
2143
2144static PyObject *
2145_encoded_const(PyObject *obj)
2146{
2147    /* Return the JSON string representation of None, True, False */
2148    if (obj == Py_None) {
2149        static PyObject *s_null = NULL;
2150        if (s_null == NULL) {
2151            s_null = PyString_InternFromString("null");
2152        }
2153        Py_INCREF(s_null);
2154        return s_null;
2155    }
2156    else if (obj == Py_True) {
2157        static PyObject *s_true = NULL;
2158        if (s_true == NULL) {
2159            s_true = PyString_InternFromString("true");
2160        }
2161        Py_INCREF(s_true);
2162        return s_true;
2163    }
2164    else if (obj == Py_False) {
2165        static PyObject *s_false = NULL;
2166        if (s_false == NULL) {
2167            s_false = PyString_InternFromString("false");
2168        }
2169        Py_INCREF(s_false);
2170        return s_false;
2171    }
2172    else {
2173        PyErr_SetString(PyExc_ValueError, "not a const");
2174        return NULL;
2175    }
2176}
2177
2178static PyObject *
2179encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2180{
2181    /* Return the JSON representation of a PyFloat */
2182    double i = PyFloat_AS_DOUBLE(obj);
2183    if (!Py_IS_FINITE(i)) {
2184        if (!s->allow_nan) {
2185            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2186            return NULL;
2187        }
2188        if (i > 0) {
2189            return PyString_FromString("Infinity");
2190        }
2191        else if (i < 0) {
2192            return PyString_FromString("-Infinity");
2193        }
2194        else {
2195            return PyString_FromString("NaN");
2196        }
2197    }
2198    /* Use a better float format here? */
2199    return PyObject_Repr(obj);
2200}
2201
2202static PyObject *
2203encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2204{
2205    /* Return the JSON representation of a string */
2206    if (s->fast_encode)
2207        return py_encode_basestring_ascii(NULL, obj);
2208    else
2209        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2210}
2211
2212static int
2213_steal_list_append(PyObject *lst, PyObject *stolen)
2214{
2215    /* Append stolen and then decrement its reference count */
2216    int rval = PyList_Append(lst, stolen);
2217    Py_DECREF(stolen);
2218    return rval;
2219}
2220
2221static int
2222encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2223{
2224    /* Encode Python object obj to a JSON term, rval is a PyList */
2225    int rv = -1;
2226    if (Py_EnterRecursiveCall(" while encoding a JSON document"))
2227        return rv;
2228    do {
2229        if (obj == Py_None || obj == Py_True || obj == Py_False) {
2230            PyObject *cstr = _encoded_const(obj);
2231            if (cstr != NULL)
2232                rv = _steal_list_append(rval, cstr);
2233        }
2234        else if (PyString_Check(obj) || PyUnicode_Check(obj))
2235        {
2236            PyObject *encoded = encoder_encode_string(s, obj);
2237            if (encoded != NULL)
2238                rv = _steal_list_append(rval, encoded);
2239        }
2240        else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2241            PyObject *encoded = PyObject_Str(obj);
2242            if (encoded != NULL) {
2243                if (s->bigint_as_string) {
2244                    encoded = maybe_quote_bigint(encoded, obj);
2245                    if (encoded == NULL)
2246                        break;
2247                }
2248                rv = _steal_list_append(rval, encoded);
2249            }
2250        }
2251        else if (PyFloat_Check(obj)) {
2252            PyObject *encoded = encoder_encode_float(s, obj);
2253            if (encoded != NULL)
2254                rv = _steal_list_append(rval, encoded);
2255        }
2256        else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
2257            PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL);
2258            if (newobj != NULL) {
2259                rv = encoder_listencode_dict(s, rval, newobj, indent_level);
2260                Py_DECREF(newobj);
2261            }
2262        }
2263        else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
2264            rv = encoder_listencode_list(s, rval, obj, indent_level);
2265        }
2266        else if (PyDict_Check(obj)) {
2267            rv = encoder_listencode_dict(s, rval, obj, indent_level);
2268        }
2269        else if (s->use_decimal && PyObject_TypeCheck(obj, s->Decimal)) {
2270            PyObject *encoded = PyObject_Str(obj);
2271            if (encoded != NULL)
2272                rv = _steal_list_append(rval, encoded);
2273        }
2274        else {
2275            PyObject *ident = NULL;
2276            PyObject *newobj;
2277            if (s->markers != Py_None) {
2278                int has_key;
2279                ident = PyLong_FromVoidPtr(obj);
2280                if (ident == NULL)
2281                    break;
2282                has_key = PyDict_Contains(s->markers, ident);
2283                if (has_key) {
2284                    if (has_key != -1)
2285                        PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2286                    Py_DECREF(ident);
2287                    break;
2288                }
2289                if (PyDict_SetItem(s->markers, ident, obj)) {
2290                    Py_DECREF(ident);
2291                    break;
2292                }
2293            }
2294            newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2295            if (newobj == NULL) {
2296                Py_XDECREF(ident);
2297                break;
2298            }
2299            rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2300            Py_DECREF(newobj);
2301            if (rv) {
2302                Py_XDECREF(ident);
2303                rv = -1;
2304            }
2305            else if (ident != NULL) {
2306                if (PyDict_DelItem(s->markers, ident)) {
2307                    Py_XDECREF(ident);
2308                    rv = -1;
2309                }
2310                Py_XDECREF(ident);
2311            }
2312        }
2313    } while (0);
2314    Py_LeaveRecursiveCall();
2315    return rv;
2316}
2317
2318static int
2319encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2320{
2321    /* Encode Python dict dct a JSON term, rval is a PyList */
2322    static PyObject *open_dict = NULL;
2323    static PyObject *close_dict = NULL;
2324    static PyObject *empty_dict = NULL;
2325    static PyObject *iteritems = NULL;
2326    PyObject *kstr = NULL;
2327    PyObject *ident = NULL;
2328    PyObject *iter = NULL;
2329    PyObject *item = NULL;
2330    PyObject *items = NULL;
2331    PyObject *encoded = NULL;
2332    int skipkeys;
2333    Py_ssize_t idx;
2334
2335    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
2336        open_dict = PyString_InternFromString("{");
2337        close_dict = PyString_InternFromString("}");
2338        empty_dict = PyString_InternFromString("{}");
2339        iteritems = PyString_InternFromString("iteritems");
2340        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
2341            return -1;
2342    }
2343    if (PyDict_Size(dct) == 0)
2344        return PyList_Append(rval, empty_dict);
2345
2346    if (s->markers != Py_None) {
2347        int has_key;
2348        ident = PyLong_FromVoidPtr(dct);
2349        if (ident == NULL)
2350            goto bail;
2351        has_key = PyDict_Contains(s->markers, ident);
2352        if (has_key) {
2353            if (has_key != -1)
2354                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2355            goto bail;
2356        }
2357        if (PyDict_SetItem(s->markers, ident, dct)) {
2358            goto bail;
2359        }
2360    }
2361
2362    if (PyList_Append(rval, open_dict))
2363        goto bail;
2364
2365    if (s->indent != Py_None) {
2366        /* TODO: DOES NOT RUN */
2367        indent_level += 1;
2368        /*
2369            newline_indent = '\n' + (_indent * _current_indent_level)
2370            separator = _item_separator + newline_indent
2371            buf += newline_indent
2372        */
2373    }
2374
2375    if (PyCallable_Check(s->item_sort_key)) {
2376        if (PyDict_CheckExact(dct))
2377            items = PyDict_Items(dct);
2378        else
2379            items = PyMapping_Items(dct);
2380        PyObject_CallMethod(items, "sort", "OO", Py_None, s->item_sort_key);
2381    }
2382    else if (PyObject_IsTrue(s->sort_keys)) {
2383        /* First sort the keys then replace them with (key, value) tuples. */
2384        Py_ssize_t i, nitems;
2385        if (PyDict_CheckExact(dct))
2386            items = PyDict_Keys(dct);
2387        else
2388            items = PyMapping_Keys(dct);
2389        if (items == NULL)
2390            goto bail;
2391        if (!PyList_Check(items)) {
2392            PyErr_SetString(PyExc_ValueError, "keys must return list");
2393            goto bail;
2394        }
2395        if (PyList_Sort(items) < 0)
2396            goto bail;
2397        nitems = PyList_GET_SIZE(items);
2398        for (i = 0; i < nitems; i++) {
2399            PyObject *key, *value;
2400            key = PyList_GET_ITEM(items, i);
2401            value = PyDict_GetItem(dct, key);
2402            item = PyTuple_Pack(2, key, value);
2403            if (item == NULL)
2404                goto bail;
2405            PyList_SET_ITEM(items, i, item);
2406            Py_DECREF(key);
2407        }
2408    }
2409    else {
2410        if (PyDict_CheckExact(dct))
2411            items = PyDict_Items(dct);
2412        else
2413            items = PyMapping_Items(dct);
2414    }
2415    if (items == NULL)
2416        goto bail;
2417    iter = PyObject_GetIter(items);
2418    Py_DECREF(items);
2419    if (iter == NULL)
2420        goto bail;
2421
2422    skipkeys = PyObject_IsTrue(s->skipkeys);
2423    idx = 0;
2424    while ((item = PyIter_Next(iter))) {
2425        PyObject *encoded, *key, *value;
2426        if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
2427            PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
2428            goto bail;
2429        }
2430        key = PyTuple_GET_ITEM(item, 0);
2431        if (key == NULL)
2432            goto bail;
2433        value = PyTuple_GET_ITEM(item, 1);
2434        if (value == NULL)
2435            goto bail;
2436
2437        encoded = PyDict_GetItem(s->key_memo, key);
2438        if (encoded != NULL) {
2439            Py_INCREF(encoded);
2440        }
2441        else if (PyString_Check(key) || PyUnicode_Check(key)) {
2442            Py_INCREF(key);
2443            kstr = key;
2444        }
2445        else if (PyFloat_Check(key)) {
2446            kstr = encoder_encode_float(s, key);
2447            if (kstr == NULL)
2448                goto bail;
2449        }
2450        else if (key == Py_True || key == Py_False || key == Py_None) {
2451            /* This must come before the PyInt_Check because
2452               True and False are also 1 and 0.*/
2453            kstr = _encoded_const(key);
2454            if (kstr == NULL)
2455                goto bail;
2456        }
2457        else if (PyInt_Check(key) || PyLong_Check(key)) {
2458            kstr = PyObject_Str(key);
2459            if (kstr == NULL)
2460                goto bail;
2461        }
2462        else if (skipkeys) {
2463            Py_DECREF(item);
2464            continue;
2465        }
2466        else {
2467            /* TODO: include repr of key */
2468            PyErr_SetString(PyExc_TypeError, "keys must be a string");
2469            goto bail;
2470        }
2471
2472        if (idx) {
2473            if (PyList_Append(rval, s->item_separator))
2474                goto bail;
2475        }
2476
2477        if (encoded == NULL) {
2478            encoded = encoder_encode_string(s, kstr);
2479            Py_CLEAR(kstr);
2480            if (encoded == NULL)
2481                goto bail;
2482            if (PyDict_SetItem(s->key_memo, key, encoded))
2483                goto bail;
2484        }
2485        if (PyList_Append(rval, encoded)) {
2486            goto bail;
2487        }
2488        Py_CLEAR(encoded);
2489        if (PyList_Append(rval, s->key_separator))
2490            goto bail;
2491        if (encoder_listencode_obj(s, rval, value, indent_level))
2492            goto bail;
2493        Py_CLEAR(item);
2494        idx += 1;
2495    }
2496    Py_CLEAR(iter);
2497    if (PyErr_Occurred())
2498        goto bail;
2499    if (ident != NULL) {
2500        if (PyDict_DelItem(s->markers, ident))
2501            goto bail;
2502        Py_CLEAR(ident);
2503    }
2504    if (s->indent != Py_None) {
2505        /* TODO: DOES NOT RUN */
2506        indent_level -= 1;
2507        /*
2508            yield '\n' + (_indent * _current_indent_level)
2509        */
2510    }
2511    if (PyList_Append(rval, close_dict))
2512        goto bail;
2513    return 0;
2514
2515bail:
2516    Py_XDECREF(encoded);
2517    Py_XDECREF(items);
2518    Py_XDECREF(iter);
2519    Py_XDECREF(kstr);
2520    Py_XDECREF(ident);
2521    return -1;
2522}
2523
2524
2525static int
2526encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2527{
2528    /* Encode Python list seq to a JSON term, rval is a PyList */
2529    static PyObject *open_array = NULL;
2530    static PyObject *close_array = NULL;
2531    static PyObject *empty_array = NULL;
2532    PyObject *ident = NULL;
2533    PyObject *iter = NULL;
2534    PyObject *obj = NULL;
2535    int is_true;
2536    int i = 0;
2537
2538    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2539        open_array = PyString_InternFromString("[");
2540        close_array = PyString_InternFromString("]");
2541        empty_array = PyString_InternFromString("[]");
2542        if (open_array == NULL || close_array == NULL || empty_array == NULL)
2543            return -1;
2544    }
2545    ident = NULL;
2546    is_true = PyObject_IsTrue(seq);
2547    if (is_true == -1)
2548        return -1;
2549    else if (is_true == 0)
2550        return PyList_Append(rval, empty_array);
2551
2552    if (s->markers != Py_None) {
2553        int has_key;
2554        ident = PyLong_FromVoidPtr(seq);
2555        if (ident == NULL)
2556            goto bail;
2557        has_key = PyDict_Contains(s->markers, ident);
2558        if (has_key) {
2559            if (has_key != -1)
2560                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2561            goto bail;
2562        }
2563        if (PyDict_SetItem(s->markers, ident, seq)) {
2564            goto bail;
2565        }
2566    }
2567
2568    iter = PyObject_GetIter(seq);
2569    if (iter == NULL)
2570        goto bail;
2571
2572    if (PyList_Append(rval, open_array))
2573        goto bail;
2574    if (s->indent != Py_None) {
2575        /* TODO: DOES NOT RUN */
2576        indent_level += 1;
2577        /*
2578            newline_indent = '\n' + (_indent * _current_indent_level)
2579            separator = _item_separator + newline_indent
2580            buf += newline_indent
2581        */
2582    }
2583    while ((obj = PyIter_Next(iter))) {
2584        if (i) {
2585            if (PyList_Append(rval, s->item_separator))
2586                goto bail;
2587        }
2588        if (encoder_listencode_obj(s, rval, obj, indent_level))
2589            goto bail;
2590        i++;
2591        Py_CLEAR(obj);
2592    }
2593    Py_CLEAR(iter);
2594    if (PyErr_Occurred())
2595        goto bail;
2596    if (ident != NULL) {
2597        if (PyDict_DelItem(s->markers, ident))
2598            goto bail;
2599        Py_CLEAR(ident);
2600    }
2601    if (s->indent != Py_None) {
2602        /* TODO: DOES NOT RUN */
2603        indent_level -= 1;
2604        /*
2605            yield '\n' + (_indent * _current_indent_level)
2606        */
2607    }
2608    if (PyList_Append(rval, close_array))
2609        goto bail;
2610    return 0;
2611
2612bail:
2613    Py_XDECREF(obj);
2614    Py_XDECREF(iter);
2615    Py_XDECREF(ident);
2616    return -1;
2617}
2618
2619static void
2620encoder_dealloc(PyObject *self)
2621{
2622    /* Deallocate Encoder */
2623    encoder_clear(self);
2624    Py_TYPE(self)->tp_free(self);
2625}
2626
2627static int
2628encoder_traverse(PyObject *self, visitproc visit, void *arg)
2629{
2630    PyEncoderObject *s;
2631    assert(PyEncoder_Check(self));
2632    s = (PyEncoderObject *)self;
2633    Py_VISIT(s->markers);
2634    Py_VISIT(s->defaultfn);
2635    Py_VISIT(s->encoder);
2636    Py_VISIT(s->indent);
2637    Py_VISIT(s->key_separator);
2638    Py_VISIT(s->item_separator);
2639    Py_VISIT(s->sort_keys);
2640    Py_VISIT(s->skipkeys);
2641    Py_VISIT(s->key_memo);
2642    Py_VISIT(s->item_sort_key);
2643    return 0;
2644}
2645
2646static int
2647encoder_clear(PyObject *self)
2648{
2649    /* Deallocate Encoder */
2650    PyEncoderObject *s;
2651    assert(PyEncoder_Check(self));
2652    s = (PyEncoderObject *)self;
2653    Py_CLEAR(s->markers);
2654    Py_CLEAR(s->defaultfn);
2655    Py_CLEAR(s->encoder);
2656    Py_CLEAR(s->indent);
2657    Py_CLEAR(s->key_separator);
2658    Py_CLEAR(s->item_separator);
2659    Py_CLEAR(s->sort_keys);
2660    Py_CLEAR(s->skipkeys);
2661    Py_CLEAR(s->key_memo);
2662    Py_CLEAR(s->item_sort_key);
2663    Py_CLEAR(s->Decimal);
2664    return 0;
2665}
2666
2667PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2668
2669static
2670PyTypeObject PyEncoderType = {
2671    PyObject_HEAD_INIT(NULL)
2672    0,                    /* tp_internal */
2673    "simplejson._speedups.Encoder",       /* tp_name */
2674    sizeof(PyEncoderObject), /* tp_basicsize */
2675    0,                    /* tp_itemsize */
2676    encoder_dealloc, /* tp_dealloc */
2677    0,                    /* tp_print */
2678    0,                    /* tp_getattr */
2679    0,                    /* tp_setattr */
2680    0,                    /* tp_compare */
2681    0,                    /* tp_repr */
2682    0,                    /* tp_as_number */
2683    0,                    /* tp_as_sequence */
2684    0,                    /* tp_as_mapping */
2685    0,                    /* tp_hash */
2686    encoder_call,         /* tp_call */
2687    0,                    /* tp_str */
2688    0,                    /* tp_getattro */
2689    0,                    /* tp_setattro */
2690    0,                    /* tp_as_buffer */
2691    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2692    encoder_doc,          /* tp_doc */
2693    encoder_traverse,     /* tp_traverse */
2694    encoder_clear,        /* tp_clear */
2695    0,                    /* tp_richcompare */
2696    0,                    /* tp_weaklistoffset */
2697    0,                    /* tp_iter */
2698    0,                    /* tp_iternext */
2699    0,                    /* tp_methods */
2700    encoder_members,      /* tp_members */
2701    0,                    /* tp_getset */
2702    0,                    /* tp_base */
2703    0,                    /* tp_dict */
2704    0,                    /* tp_descr_get */
2705    0,                    /* tp_descr_set */
2706    0,                    /* tp_dictoffset */
2707    encoder_init,         /* tp_init */
2708    0,                    /* tp_alloc */
2709    encoder_new,          /* tp_new */
2710    0,                    /* tp_free */
2711};
2712
2713static PyMethodDef speedups_methods[] = {
2714    {"encode_basestring_ascii",
2715        (PyCFunction)py_encode_basestring_ascii,
2716        METH_O,
2717        pydoc_encode_basestring_ascii},
2718    {"scanstring",
2719        (PyCFunction)py_scanstring,
2720        METH_VARARGS,
2721        pydoc_scanstring},
2722    {NULL, NULL, 0, NULL}
2723};
2724
2725PyDoc_STRVAR(module_doc,
2726"simplejson speedups\n");
2727
2728void
2729init_speedups(void)
2730{
2731    PyObject *m;
2732    PyScannerType.tp_new = PyType_GenericNew;
2733    if (PyType_Ready(&PyScannerType) < 0)
2734        return;
2735    PyEncoderType.tp_new = PyType_GenericNew;
2736    if (PyType_Ready(&PyEncoderType) < 0)
2737        return;
2738
2739
2740    m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2741    Py_INCREF((PyObject*)&PyScannerType);
2742    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2743    Py_INCREF((PyObject*)&PyEncoderType);
2744    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2745}
2746