encoder.py revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1"""Implementation of JSONEncoder
2"""
3import re
4from decimal import Decimal
5
6def _import_speedups():
7    try:
8        from simplejson import _speedups
9        return _speedups.encode_basestring_ascii, _speedups.make_encoder
10    except ImportError:
11        return None, None
12c_encode_basestring_ascii, c_make_encoder = _import_speedups()
13
14from simplejson.decoder import PosInf
15
16ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
17ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
18HAS_UTF8 = re.compile(r'[\x80-\xff]')
19ESCAPE_DCT = {
20    '\\': '\\\\',
21    '"': '\\"',
22    '\b': '\\b',
23    '\f': '\\f',
24    '\n': '\\n',
25    '\r': '\\r',
26    '\t': '\\t',
27    u'\u2028': '\\u2028',
28    u'\u2029': '\\u2029',
29}
30for i in range(0x20):
31    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33
34FLOAT_REPR = repr
35
36def encode_basestring(s):
37    """Return a JSON representation of a Python string
38
39    """
40    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
41        s = s.decode('utf-8')
42    def replace(match):
43        return ESCAPE_DCT[match.group(0)]
44    return u'"' + ESCAPE.sub(replace, s) + u'"'
45
46
47def py_encode_basestring_ascii(s):
48    """Return an ASCII-only JSON representation of a Python string
49
50    """
51    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
52        s = s.decode('utf-8')
53    def replace(match):
54        s = match.group(0)
55        try:
56            return ESCAPE_DCT[s]
57        except KeyError:
58            n = ord(s)
59            if n < 0x10000:
60                #return '\\u{0:04x}'.format(n)
61                return '\\u%04x' % (n,)
62            else:
63                # surrogate pair
64                n -= 0x10000
65                s1 = 0xd800 | ((n >> 10) & 0x3ff)
66                s2 = 0xdc00 | (n & 0x3ff)
67                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68                return '\\u%04x\\u%04x' % (s1, s2)
69    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
70
71
72encode_basestring_ascii = (
73    c_encode_basestring_ascii or py_encode_basestring_ascii)
74
75class JSONEncoder(object):
76    """Extensible JSON <http://json.org> encoder for Python data structures.
77
78    Supports the following objects and types by default:
79
80    +-------------------+---------------+
81    | Python            | JSON          |
82    +===================+===============+
83    | dict, namedtuple  | object        |
84    +-------------------+---------------+
85    | list, tuple       | array         |
86    +-------------------+---------------+
87    | str, unicode      | string        |
88    +-------------------+---------------+
89    | int, long, float  | number        |
90    +-------------------+---------------+
91    | True              | true          |
92    +-------------------+---------------+
93    | False             | false         |
94    +-------------------+---------------+
95    | None              | null          |
96    +-------------------+---------------+
97
98    To extend this to recognize other objects, subclass and implement a
99    ``.default()`` method with another method that returns a serializable
100    object for ``o`` if possible, otherwise it should call the superclass
101    implementation (to raise ``TypeError``).
102
103    """
104    item_separator = ', '
105    key_separator = ': '
106    def __init__(self, skipkeys=False, ensure_ascii=True,
107            check_circular=True, allow_nan=True, sort_keys=False,
108            indent=None, separators=None, encoding='utf-8', default=None,
109            use_decimal=True, namedtuple_as_object=True,
110            tuple_as_array=True, bigint_as_string=False,
111            item_sort_key=None):
112        """Constructor for JSONEncoder, with sensible defaults.
113
114        If skipkeys is false, then it is a TypeError to attempt
115        encoding of keys that are not str, int, long, float or None.  If
116        skipkeys is True, such items are simply skipped.
117
118        If ensure_ascii is true, the output is guaranteed to be str
119        objects with all incoming unicode characters escaped.  If
120        ensure_ascii is false, the output will be unicode object.
121
122        If check_circular is true, then lists, dicts, and custom encoded
123        objects will be checked for circular references during encoding to
124        prevent an infinite recursion (which would cause an OverflowError).
125        Otherwise, no such check takes place.
126
127        If allow_nan is true, then NaN, Infinity, and -Infinity will be
128        encoded as such.  This behavior is not JSON specification compliant,
129        but is consistent with most JavaScript based encoders and decoders.
130        Otherwise, it will be a ValueError to encode such floats.
131
132        If sort_keys is true, then the output of dictionaries will be
133        sorted by key; this is useful for regression tests to ensure
134        that JSON serializations can be compared on a day-to-day basis.
135
136        If indent is a string, then JSON array elements and object members
137        will be pretty-printed with a newline followed by that string repeated
138        for each level of nesting. ``None`` (the default) selects the most compact
139        representation without any newlines. For backwards compatibility with
140        versions of simplejson earlier than 2.1.0, an integer is also accepted
141        and is converted to a string with that many spaces.
142
143        If specified, separators should be a (item_separator, key_separator)
144        tuple.  The default is (', ', ': ').  To get the most compact JSON
145        representation you should specify (',', ':') to eliminate whitespace.
146
147        If specified, default is a function that gets called for objects
148        that can't otherwise be serialized.  It should return a JSON encodable
149        version of the object or raise a ``TypeError``.
150
151        If encoding is not None, then all input strings will be
152        transformed into unicode using that encoding prior to JSON-encoding.
153        The default is UTF-8.
154
155        If use_decimal is true (not the default), ``decimal.Decimal`` will
156        be supported directly by the encoder. For the inverse, decode JSON
157        with ``parse_float=decimal.Decimal``.
158
159        If namedtuple_as_object is true (the default), objects with
160        ``_asdict()`` methods will be encoded as JSON objects.
161
162        If tuple_as_array is true (the default), tuple (and subclasses) will
163        be encoded as JSON arrays.
164
165        If bigint_as_string is true (not the default), ints 2**53 and higher
166        or lower than -2**53 will be encoded as strings. This is to avoid the
167        rounding that happens in Javascript otherwise.
168
169        If specified, item_sort_key is a callable used to sort the items in
170        each dictionary. This is useful if you want to sort items other than
171        in alphabetical order by key.
172        """
173
174        self.skipkeys = skipkeys
175        self.ensure_ascii = ensure_ascii
176        self.check_circular = check_circular
177        self.allow_nan = allow_nan
178        self.sort_keys = sort_keys
179        self.use_decimal = use_decimal
180        self.namedtuple_as_object = namedtuple_as_object
181        self.tuple_as_array = tuple_as_array
182        self.bigint_as_string = bigint_as_string
183        self.item_sort_key = item_sort_key
184        if indent is not None and not isinstance(indent, basestring):
185            indent = indent * ' '
186        self.indent = indent
187        if separators is not None:
188            self.item_separator, self.key_separator = separators
189        elif indent is not None:
190            self.item_separator = ','
191        if default is not None:
192            self.default = default
193        self.encoding = encoding
194
195    def default(self, o):
196        """Implement this method in a subclass such that it returns
197        a serializable object for ``o``, or calls the base implementation
198        (to raise a ``TypeError``).
199
200        For example, to support arbitrary iterators, you could
201        implement default like this::
202
203            def default(self, o):
204                try:
205                    iterable = iter(o)
206                except TypeError:
207                    pass
208                else:
209                    return list(iterable)
210                return JSONEncoder.default(self, o)
211
212        """
213        raise TypeError(repr(o) + " is not JSON serializable")
214
215    def encode(self, o):
216        """Return a JSON string representation of a Python data structure.
217
218        >>> from simplejson import JSONEncoder
219        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
220        '{"foo": ["bar", "baz"]}'
221
222        """
223        # This is for extremely simple cases and benchmarks.
224        if isinstance(o, basestring):
225            if isinstance(o, str):
226                _encoding = self.encoding
227                if (_encoding is not None
228                        and not (_encoding == 'utf-8')):
229                    o = o.decode(_encoding)
230            if self.ensure_ascii:
231                return encode_basestring_ascii(o)
232            else:
233                return encode_basestring(o)
234        # This doesn't pass the iterator directly to ''.join() because the
235        # exceptions aren't as detailed.  The list call should be roughly
236        # equivalent to the PySequence_Fast that ''.join() would do.
237        chunks = self.iterencode(o, _one_shot=True)
238        if not isinstance(chunks, (list, tuple)):
239            chunks = list(chunks)
240        if self.ensure_ascii:
241            return ''.join(chunks)
242        else:
243            return u''.join(chunks)
244
245    def iterencode(self, o, _one_shot=False):
246        """Encode the given object and yield each string
247        representation as available.
248
249        For example::
250
251            for chunk in JSONEncoder().iterencode(bigobject):
252                mysocket.write(chunk)
253
254        """
255        if self.check_circular:
256            markers = {}
257        else:
258            markers = None
259        if self.ensure_ascii:
260            _encoder = encode_basestring_ascii
261        else:
262            _encoder = encode_basestring
263        if self.encoding != 'utf-8':
264            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
265                if isinstance(o, str):
266                    o = o.decode(_encoding)
267                return _orig_encoder(o)
268
269        def floatstr(o, allow_nan=self.allow_nan,
270                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
271            # Check for specials. Note that this type of test is processor
272            # and/or platform-specific, so do tests which don't depend on
273            # the internals.
274
275            if o != o:
276                text = 'NaN'
277            elif o == _inf:
278                text = 'Infinity'
279            elif o == _neginf:
280                text = '-Infinity'
281            else:
282                return _repr(o)
283
284            if not allow_nan:
285                raise ValueError(
286                    "Out of range float values are not JSON compliant: " +
287                    repr(o))
288
289            return text
290
291
292        key_memo = {}
293        if (_one_shot and c_make_encoder is not None
294                and self.indent is None):
295            _iterencode = c_make_encoder(
296                markers, self.default, _encoder, self.indent,
297                self.key_separator, self.item_separator, self.sort_keys,
298                self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
299                self.namedtuple_as_object, self.tuple_as_array,
300                self.bigint_as_string, self.item_sort_key,
301                Decimal)
302        else:
303            _iterencode = _make_iterencode(
304                markers, self.default, _encoder, self.indent, floatstr,
305                self.key_separator, self.item_separator, self.sort_keys,
306                self.skipkeys, _one_shot, self.use_decimal,
307                self.namedtuple_as_object, self.tuple_as_array,
308                self.bigint_as_string, self.item_sort_key,
309                Decimal=Decimal)
310        try:
311            return _iterencode(o, 0)
312        finally:
313            key_memo.clear()
314
315
316class JSONEncoderForHTML(JSONEncoder):
317    """An encoder that produces JSON safe to embed in HTML.
318
319    To embed JSON content in, say, a script tag on a web page, the
320    characters &, < and > should be escaped. They cannot be escaped
321    with the usual entities (e.g. &amp;) because they are not expanded
322    within <script> tags.
323    """
324
325    def encode(self, o):
326        # Override JSONEncoder.encode because it has hacks for
327        # performance that make things more complicated.
328        chunks = self.iterencode(o, True)
329        if self.ensure_ascii:
330            return ''.join(chunks)
331        else:
332            return u''.join(chunks)
333
334    def iterencode(self, o, _one_shot=False):
335        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
336        for chunk in chunks:
337            chunk = chunk.replace('&', '\\u0026')
338            chunk = chunk.replace('<', '\\u003c')
339            chunk = chunk.replace('>', '\\u003e')
340            yield chunk
341
342
343def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
344        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
345        _use_decimal, _namedtuple_as_object, _tuple_as_array,
346        _bigint_as_string, _item_sort_key,
347        ## HACK: hand-optimized bytecode; turn globals into locals
348        False=False,
349        True=True,
350        ValueError=ValueError,
351        basestring=basestring,
352        Decimal=Decimal,
353        dict=dict,
354        float=float,
355        id=id,
356        int=int,
357        isinstance=isinstance,
358        list=list,
359        long=long,
360        str=str,
361        tuple=tuple,
362    ):
363    if _item_sort_key and not callable(_item_sort_key):
364        raise TypeError("item_sort_key must be None or callable")
365
366    def _iterencode_list(lst, _current_indent_level):
367        if not lst:
368            yield '[]'
369            return
370        if markers is not None:
371            markerid = id(lst)
372            if markerid in markers:
373                raise ValueError("Circular reference detected")
374            markers[markerid] = lst
375        buf = '['
376        if _indent is not None:
377            _current_indent_level += 1
378            newline_indent = '\n' + (_indent * _current_indent_level)
379            separator = _item_separator + newline_indent
380            buf += newline_indent
381        else:
382            newline_indent = None
383            separator = _item_separator
384        first = True
385        for value in lst:
386            if first:
387                first = False
388            else:
389                buf = separator
390            if isinstance(value, basestring):
391                yield buf + _encoder(value)
392            elif value is None:
393                yield buf + 'null'
394            elif value is True:
395                yield buf + 'true'
396            elif value is False:
397                yield buf + 'false'
398            elif isinstance(value, (int, long)):
399                yield ((buf + str(value))
400                       if (not _bigint_as_string or
401                           (-1 << 53) < value < (1 << 53))
402                           else (buf + '"' + str(value) + '"'))
403            elif isinstance(value, float):
404                yield buf + _floatstr(value)
405            elif _use_decimal and isinstance(value, Decimal):
406                yield buf + str(value)
407            else:
408                yield buf
409                if isinstance(value, list):
410                    chunks = _iterencode_list(value, _current_indent_level)
411                else:
412                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
413                    if _asdict and callable(_asdict):
414                        chunks = _iterencode_dict(_asdict(),
415                                                  _current_indent_level)
416                    elif _tuple_as_array and isinstance(value, tuple):
417                        chunks = _iterencode_list(value, _current_indent_level)
418                    elif isinstance(value, dict):
419                        chunks = _iterencode_dict(value, _current_indent_level)
420                    else:
421                        chunks = _iterencode(value, _current_indent_level)
422                for chunk in chunks:
423                    yield chunk
424        if newline_indent is not None:
425            _current_indent_level -= 1
426            yield '\n' + (_indent * _current_indent_level)
427        yield ']'
428        if markers is not None:
429            del markers[markerid]
430
431    def _iterencode_dict(dct, _current_indent_level):
432        if not dct:
433            yield '{}'
434            return
435        if markers is not None:
436            markerid = id(dct)
437            if markerid in markers:
438                raise ValueError("Circular reference detected")
439            markers[markerid] = dct
440        yield '{'
441        if _indent is not None:
442            _current_indent_level += 1
443            newline_indent = '\n' + (_indent * _current_indent_level)
444            item_separator = _item_separator + newline_indent
445            yield newline_indent
446        else:
447            newline_indent = None
448            item_separator = _item_separator
449        first = True
450        if _item_sort_key:
451            items = dct.items()
452            items.sort(key=_item_sort_key)
453        elif _sort_keys:
454            items = dct.items()
455            items.sort(key=lambda kv: kv[0])
456        else:
457            items = dct.iteritems()
458        for key, value in items:
459            if isinstance(key, basestring):
460                pass
461            # JavaScript is weakly typed for these, so it makes sense to
462            # also allow them.  Many encoders seem to do something like this.
463            elif isinstance(key, float):
464                key = _floatstr(key)
465            elif key is True:
466                key = 'true'
467            elif key is False:
468                key = 'false'
469            elif key is None:
470                key = 'null'
471            elif isinstance(key, (int, long)):
472                key = str(key)
473            elif _skipkeys:
474                continue
475            else:
476                raise TypeError("key " + repr(key) + " is not a string")
477            if first:
478                first = False
479            else:
480                yield item_separator
481            yield _encoder(key)
482            yield _key_separator
483            if isinstance(value, basestring):
484                yield _encoder(value)
485            elif value is None:
486                yield 'null'
487            elif value is True:
488                yield 'true'
489            elif value is False:
490                yield 'false'
491            elif isinstance(value, (int, long)):
492                yield (str(value)
493                       if (not _bigint_as_string or
494                           (-1 << 53) < value < (1 << 53))
495                           else ('"' + str(value) + '"'))
496            elif isinstance(value, float):
497                yield _floatstr(value)
498            elif _use_decimal and isinstance(value, Decimal):
499                yield str(value)
500            else:
501                if isinstance(value, list):
502                    chunks = _iterencode_list(value, _current_indent_level)
503                else:
504                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
505                    if _asdict and callable(_asdict):
506                        chunks = _iterencode_dict(_asdict(),
507                                                  _current_indent_level)
508                    elif _tuple_as_array and isinstance(value, tuple):
509                        chunks = _iterencode_list(value, _current_indent_level)
510                    elif isinstance(value, dict):
511                        chunks = _iterencode_dict(value, _current_indent_level)
512                    else:
513                        chunks = _iterencode(value, _current_indent_level)
514                for chunk in chunks:
515                    yield chunk
516        if newline_indent is not None:
517            _current_indent_level -= 1
518            yield '\n' + (_indent * _current_indent_level)
519        yield '}'
520        if markers is not None:
521            del markers[markerid]
522
523    def _iterencode(o, _current_indent_level):
524        if isinstance(o, basestring):
525            yield _encoder(o)
526        elif o is None:
527            yield 'null'
528        elif o is True:
529            yield 'true'
530        elif o is False:
531            yield 'false'
532        elif isinstance(o, (int, long)):
533            yield (str(o)
534                   if (not _bigint_as_string or
535                       (-1 << 53) < o < (1 << 53))
536                       else ('"' + str(o) + '"'))
537        elif isinstance(o, float):
538            yield _floatstr(o)
539        elif isinstance(o, list):
540            for chunk in _iterencode_list(o, _current_indent_level):
541                yield chunk
542        else:
543            _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
544            if _asdict and callable(_asdict):
545                for chunk in _iterencode_dict(_asdict(), _current_indent_level):
546                    yield chunk
547            elif (_tuple_as_array and isinstance(o, tuple)):
548                for chunk in _iterencode_list(o, _current_indent_level):
549                    yield chunk
550            elif isinstance(o, dict):
551                for chunk in _iterencode_dict(o, _current_indent_level):
552                    yield chunk
553            elif _use_decimal and isinstance(o, Decimal):
554                yield str(o)
555            else:
556                if markers is not None:
557                    markerid = id(o)
558                    if markerid in markers:
559                        raise ValueError("Circular reference detected")
560                    markers[markerid] = o
561                o = _default(o)
562                for chunk in _iterencode(o, _current_indent_level):
563                    yield chunk
564                if markers is not None:
565                    del markers[markerid]
566
567    return _iterencode
568