1"""Implementation of JSONDecoder
2"""
3import re
4import sys
5import struct
6
7from json import scanner
8try:
9    from _json import scanstring as c_scanstring
10except ImportError:
11    c_scanstring = None
12
13__all__ = ['JSONDecoder']
14
15FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
16
17def _floatconstants():
18    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19    if sys.byteorder != 'big':
20        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21    nan, inf = struct.unpack('dd', _BYTES)
22    return nan, inf, -inf
23
24NaN, PosInf, NegInf = _floatconstants()
25
26
27def linecol(doc, pos):
28    lineno = doc.count('\n', 0, pos) + 1
29    if lineno == 1:
30        colno = pos + 1
31    else:
32        colno = pos - doc.rindex('\n', 0, pos)
33    return lineno, colno
34
35
36def errmsg(msg, doc, pos, end=None):
37    # Note that this function is called from _json
38    lineno, colno = linecol(doc, pos)
39    if end is None:
40        fmt = '{0}: line {1} column {2} (char {3})'
41        return fmt.format(msg, lineno, colno, pos)
42        #fmt = '%s: line %d column %d (char %d)'
43        #return fmt % (msg, lineno, colno, pos)
44    endlineno, endcolno = linecol(doc, end)
45    fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46    return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47    #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48    #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
49
50
51_CONSTANTS = {
52    '-Infinity': NegInf,
53    'Infinity': PosInf,
54    'NaN': NaN,
55}
56
57STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
58BACKSLASH = {
59    '"': u'"', '\\': u'\\', '/': u'/',
60    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
61}
62
63DEFAULT_ENCODING = "utf-8"
64
65def py_scanstring(s, end, encoding=None, strict=True,
66        _b=BACKSLASH, _m=STRINGCHUNK.match):
67    """Scan the string s for a JSON string. End is the index of the
68    character in s after the quote that started the JSON string.
69    Unescapes all valid JSON string escape sequences and raises ValueError
70    on attempt to decode an invalid string. If strict is False then literal
71    control characters are allowed in the string.
72
73    Returns a tuple of the decoded string and the index of the character in s
74    after the end quote."""
75    if encoding is None:
76        encoding = DEFAULT_ENCODING
77    chunks = []
78    _append = chunks.append
79    begin = end - 1
80    while 1:
81        chunk = _m(s, end)
82        if chunk is None:
83            raise ValueError(
84                errmsg("Unterminated string starting at", s, begin))
85        end = chunk.end()
86        content, terminator = chunk.groups()
87        # Content is contains zero or more unescaped string characters
88        if content:
89            if not isinstance(content, unicode):
90                content = unicode(content, encoding)
91            _append(content)
92        # Terminator is the end of string, a literal control character,
93        # or a backslash denoting that an escape sequence follows
94        if terminator == '"':
95            break
96        elif terminator != '\\':
97            if strict:
98                #msg = "Invalid control character %r at" % (terminator,)
99                msg = "Invalid control character {0!r} at".format(terminator)
100                raise ValueError(errmsg(msg, s, end))
101            else:
102                _append(terminator)
103                continue
104        try:
105            esc = s[end]
106        except IndexError:
107            raise ValueError(
108                errmsg("Unterminated string starting at", s, begin))
109        # If not a unicode escape sequence, must be in the lookup table
110        if esc != 'u':
111            try:
112                char = _b[esc]
113            except KeyError:
114                msg = "Invalid \\escape: " + repr(esc)
115                raise ValueError(errmsg(msg, s, end))
116            end += 1
117        else:
118            # Unicode escape sequence
119            esc = s[end + 1:end + 5]
120            next_end = end + 5
121            if len(esc) != 4:
122                msg = "Invalid \\uXXXX escape"
123                raise ValueError(errmsg(msg, s, end))
124            uni = int(esc, 16)
125            # Check for surrogate pair on UCS-4 systems
126            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
127                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
128                if not s[end + 5:end + 7] == '\\u':
129                    raise ValueError(errmsg(msg, s, end))
130                esc2 = s[end + 7:end + 11]
131                if len(esc2) != 4:
132                    raise ValueError(errmsg(msg, s, end))
133                uni2 = int(esc2, 16)
134                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
135                next_end += 6
136            char = unichr(uni)
137            end = next_end
138        # Append the unescaped character
139        _append(char)
140    return u''.join(chunks), end
141
142
143# Use speedup if available
144scanstring = c_scanstring or py_scanstring
145
146WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
147WHITESPACE_STR = ' \t\n\r'
148
149def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
150               object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
151    s, end = s_and_end
152    pairs = []
153    pairs_append = pairs.append
154    # Use a slice to prevent IndexError from being raised, the following
155    # check will raise a more specific ValueError if the string is empty
156    nextchar = s[end:end + 1]
157    # Normally we expect nextchar == '"'
158    if nextchar != '"':
159        if nextchar in _ws:
160            end = _w(s, end).end()
161            nextchar = s[end:end + 1]
162        # Trivial empty object
163        if nextchar == '}':
164            if object_pairs_hook is not None:
165                result = object_pairs_hook(pairs)
166                return result, end + 1
167            pairs = {}
168            if object_hook is not None:
169                pairs = object_hook(pairs)
170            return pairs, end + 1
171        elif nextchar != '"':
172            raise ValueError(errmsg(
173                "Expecting property name enclosed in double quotes", s, end))
174    end += 1
175    while True:
176        key, end = scanstring(s, end, encoding, strict)
177
178        # To skip some function call overhead we optimize the fast paths where
179        # the JSON key separator is ": " or just ":".
180        if s[end:end + 1] != ':':
181            end = _w(s, end).end()
182            if s[end:end + 1] != ':':
183                raise ValueError(errmsg("Expecting ':' delimiter", s, end))
184        end += 1
185
186        try:
187            if s[end] in _ws:
188                end += 1
189                if s[end] in _ws:
190                    end = _w(s, end + 1).end()
191        except IndexError:
192            pass
193
194        try:
195            value, end = scan_once(s, end)
196        except StopIteration:
197            raise ValueError(errmsg("Expecting object", s, end))
198        pairs_append((key, value))
199
200        try:
201            nextchar = s[end]
202            if nextchar in _ws:
203                end = _w(s, end + 1).end()
204                nextchar = s[end]
205        except IndexError:
206            nextchar = ''
207        end += 1
208
209        if nextchar == '}':
210            break
211        elif nextchar != ',':
212            raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
213
214        try:
215            nextchar = s[end]
216            if nextchar in _ws:
217                end += 1
218                nextchar = s[end]
219                if nextchar in _ws:
220                    end = _w(s, end + 1).end()
221                    nextchar = s[end]
222        except IndexError:
223            nextchar = ''
224
225        end += 1
226        if nextchar != '"':
227            raise ValueError(errmsg(
228                "Expecting property name enclosed in double quotes", s, end - 1))
229    if object_pairs_hook is not None:
230        result = object_pairs_hook(pairs)
231        return result, end
232    pairs = dict(pairs)
233    if object_hook is not None:
234        pairs = object_hook(pairs)
235    return pairs, end
236
237def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
238    s, end = s_and_end
239    values = []
240    nextchar = s[end:end + 1]
241    if nextchar in _ws:
242        end = _w(s, end + 1).end()
243        nextchar = s[end:end + 1]
244    # Look-ahead for trivial empty array
245    if nextchar == ']':
246        return values, end + 1
247    _append = values.append
248    while True:
249        try:
250            value, end = scan_once(s, end)
251        except StopIteration:
252            raise ValueError(errmsg("Expecting object", s, end))
253        _append(value)
254        nextchar = s[end:end + 1]
255        if nextchar in _ws:
256            end = _w(s, end + 1).end()
257            nextchar = s[end:end + 1]
258        end += 1
259        if nextchar == ']':
260            break
261        elif nextchar != ',':
262            raise ValueError(errmsg("Expecting ',' delimiter", s, end))
263        try:
264            if s[end] in _ws:
265                end += 1
266                if s[end] in _ws:
267                    end = _w(s, end + 1).end()
268        except IndexError:
269            pass
270
271    return values, end
272
273class JSONDecoder(object):
274    """Simple JSON <http://json.org> decoder
275
276    Performs the following translations in decoding by default:
277
278    +---------------+-------------------+
279    | JSON          | Python            |
280    +===============+===================+
281    | object        | dict              |
282    +---------------+-------------------+
283    | array         | list              |
284    +---------------+-------------------+
285    | string        | unicode           |
286    +---------------+-------------------+
287    | number (int)  | int, long         |
288    +---------------+-------------------+
289    | number (real) | float             |
290    +---------------+-------------------+
291    | true          | True              |
292    +---------------+-------------------+
293    | false         | False             |
294    +---------------+-------------------+
295    | null          | None              |
296    +---------------+-------------------+
297
298    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
299    their corresponding ``float`` values, which is outside the JSON spec.
300
301    """
302
303    def __init__(self, encoding=None, object_hook=None, parse_float=None,
304            parse_int=None, parse_constant=None, strict=True,
305            object_pairs_hook=None):
306        """``encoding`` determines the encoding used to interpret any ``str``
307        objects decoded by this instance (utf-8 by default).  It has no
308        effect when decoding ``unicode`` objects.
309
310        Note that currently only encodings that are a superset of ASCII work,
311        strings of other encodings should be passed in as ``unicode``.
312
313        ``object_hook``, if specified, will be called with the result
314        of every JSON object decoded and its return value will be used in
315        place of the given ``dict``.  This can be used to provide custom
316        deserializations (e.g. to support JSON-RPC class hinting).
317
318        ``object_pairs_hook``, if specified will be called with the result of
319        every JSON object decoded with an ordered list of pairs.  The return
320        value of ``object_pairs_hook`` will be used instead of the ``dict``.
321        This feature can be used to implement custom decoders that rely on the
322        order that the key and value pairs are decoded (for example,
323        collections.OrderedDict will remember the order of insertion). If
324        ``object_hook`` is also defined, the ``object_pairs_hook`` takes
325        priority.
326
327        ``parse_float``, if specified, will be called with the string
328        of every JSON float to be decoded. By default this is equivalent to
329        float(num_str). This can be used to use another datatype or parser
330        for JSON floats (e.g. decimal.Decimal).
331
332        ``parse_int``, if specified, will be called with the string
333        of every JSON int to be decoded. By default this is equivalent to
334        int(num_str). This can be used to use another datatype or parser
335        for JSON integers (e.g. float).
336
337        ``parse_constant``, if specified, will be called with one of the
338        following strings: -Infinity, Infinity, NaN.
339        This can be used to raise an exception if invalid JSON numbers
340        are encountered.
341
342        If ``strict`` is false (true is the default), then control
343        characters will be allowed inside strings.  Control characters in
344        this context are those with character codes in the 0-31 range,
345        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
346
347        """
348        self.encoding = encoding
349        self.object_hook = object_hook
350        self.object_pairs_hook = object_pairs_hook
351        self.parse_float = parse_float or float
352        self.parse_int = parse_int or int
353        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
354        self.strict = strict
355        self.parse_object = JSONObject
356        self.parse_array = JSONArray
357        self.parse_string = scanstring
358        self.scan_once = scanner.make_scanner(self)
359
360    def decode(self, s, _w=WHITESPACE.match):
361        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
362        instance containing a JSON document)
363
364        """
365        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
366        end = _w(s, end).end()
367        if end != len(s):
368            raise ValueError(errmsg("Extra data", s, end, len(s)))
369        return obj
370
371    def raw_decode(self, s, idx=0):
372        """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
373        beginning with a JSON document) and return a 2-tuple of the Python
374        representation and the index in ``s`` where the document ended.
375
376        This can be used to decode a JSON document from a string that may
377        have extraneous data at the end.
378
379        """
380        try:
381            obj, end = self.scan_once(s, idx)
382        except StopIteration:
383            raise ValueError("No JSON object could be decoded")
384        return obj, end
385