1"""Implementation of JSONDecoder
2"""
3import re
4
5from json import scanner
6try:
7    from _json import scanstring as c_scanstring
8except ImportError:
9    c_scanstring = None
10
11__all__ = ['JSONDecoder', 'JSONDecodeError']
12
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
15NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
18
19
20class JSONDecodeError(ValueError):
21    """Subclass of ValueError with the following additional properties:
22
23    msg: The unformatted error message
24    doc: The JSON document being parsed
25    pos: The start index of doc where parsing failed
26    lineno: The line corresponding to pos
27    colno: The column corresponding to pos
28
29    """
30    # Note that this exception is used from _json
31    def __init__(self, msg, doc, pos):
32        lineno = doc.count('\n', 0, pos) + 1
33        colno = pos - doc.rfind('\n', 0, pos)
34        errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35        ValueError.__init__(self, errmsg)
36        self.msg = msg
37        self.doc = doc
38        self.pos = pos
39        self.lineno = lineno
40        self.colno = colno
41
42    def __reduce__(self):
43        return self.__class__, (self.msg, self.doc, self.pos)
44
45
46_CONSTANTS = {
47    '-Infinity': NegInf,
48    'Infinity': PosInf,
49    'NaN': NaN,
50}
51
52
53STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
54BACKSLASH = {
55    '"': '"', '\\': '\\', '/': '/',
56    'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
57}
58
59def _decode_uXXXX(s, pos):
60    esc = s[pos + 1:pos + 5]
61    if len(esc) == 4 and esc[1] not in 'xX':
62        try:
63            return int(esc, 16)
64        except ValueError:
65            pass
66    msg = "Invalid \\uXXXX escape"
67    raise JSONDecodeError(msg, s, pos)
68
69def py_scanstring(s, end, strict=True,
70        _b=BACKSLASH, _m=STRINGCHUNK.match):
71    """Scan the string s for a JSON string. End is the index of the
72    character in s after the quote that started the JSON string.
73    Unescapes all valid JSON string escape sequences and raises ValueError
74    on attempt to decode an invalid string. If strict is False then literal
75    control characters are allowed in the string.
76
77    Returns a tuple of the decoded string and the index of the character in s
78    after the end quote."""
79    chunks = []
80    _append = chunks.append
81    begin = end - 1
82    while 1:
83        chunk = _m(s, end)
84        if chunk is None:
85            raise JSONDecodeError("Unterminated string starting at", s, begin)
86        end = chunk.end()
87        content, terminator = chunk.groups()
88        # Content is contains zero or more unescaped string characters
89        if content:
90            _append(content)
91        # Terminator is the end of string, a literal control character,
92        # or a backslash denoting that an escape sequence follows
93        if terminator == '"':
94            break
95        elif terminator != '\\':
96            if strict:
97                #msg = "Invalid control character %r at" % (terminator,)
98                msg = "Invalid control character {0!r} at".format(terminator)
99                raise JSONDecodeError(msg, s, end)
100            else:
101                _append(terminator)
102                continue
103        try:
104            esc = s[end]
105        except IndexError:
106            raise JSONDecodeError("Unterminated string starting at", s, begin)
107        # If not a unicode escape sequence, must be in the lookup table
108        if esc != 'u':
109            try:
110                char = _b[esc]
111            except KeyError:
112                msg = "Invalid \\escape: {0!r}".format(esc)
113                raise JSONDecodeError(msg, s, end)
114            end += 1
115        else:
116            uni = _decode_uXXXX(s, end)
117            end += 5
118            if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
119                uni2 = _decode_uXXXX(s, end + 1)
120                if 0xdc00 <= uni2 <= 0xdfff:
121                    uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
122                    end += 6
123            char = chr(uni)
124        _append(char)
125    return ''.join(chunks), end
126
127
128# Use speedup if available
129scanstring = c_scanstring or py_scanstring
130
131WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
132WHITESPACE_STR = ' \t\n\r'
133
134
135def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
136               memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
137    s, end = s_and_end
138    pairs = []
139    pairs_append = pairs.append
140    # Backwards compatibility
141    if memo is None:
142        memo = {}
143    memo_get = memo.setdefault
144    # Use a slice to prevent IndexError from being raised, the following
145    # check will raise a more specific ValueError if the string is empty
146    nextchar = s[end:end + 1]
147    # Normally we expect nextchar == '"'
148    if nextchar != '"':
149        if nextchar in _ws:
150            end = _w(s, end).end()
151            nextchar = s[end:end + 1]
152        # Trivial empty object
153        if nextchar == '}':
154            if object_pairs_hook is not None:
155                result = object_pairs_hook(pairs)
156                return result, end + 1
157            pairs = {}
158            if object_hook is not None:
159                pairs = object_hook(pairs)
160            return pairs, end + 1
161        elif nextchar != '"':
162            raise JSONDecodeError(
163                "Expecting property name enclosed in double quotes", s, end)
164    end += 1
165    while True:
166        key, end = scanstring(s, end, strict)
167        key = memo_get(key, key)
168        # To skip some function call overhead we optimize the fast paths where
169        # the JSON key separator is ": " or just ":".
170        if s[end:end + 1] != ':':
171            end = _w(s, end).end()
172            if s[end:end + 1] != ':':
173                raise JSONDecodeError("Expecting ':' delimiter", s, end)
174        end += 1
175
176        try:
177            if s[end] in _ws:
178                end += 1
179                if s[end] in _ws:
180                    end = _w(s, end + 1).end()
181        except IndexError:
182            pass
183
184        try:
185            value, end = scan_once(s, end)
186        except StopIteration as err:
187            raise JSONDecodeError("Expecting value", s, err.value) from None
188        pairs_append((key, value))
189        try:
190            nextchar = s[end]
191            if nextchar in _ws:
192                end = _w(s, end + 1).end()
193                nextchar = s[end]
194        except IndexError:
195            nextchar = ''
196        end += 1
197
198        if nextchar == '}':
199            break
200        elif nextchar != ',':
201            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
202        end = _w(s, end).end()
203        nextchar = s[end:end + 1]
204        end += 1
205        if nextchar != '"':
206            raise JSONDecodeError(
207                "Expecting property name enclosed in double quotes", s, end - 1)
208    if object_pairs_hook is not None:
209        result = object_pairs_hook(pairs)
210        return result, end
211    pairs = dict(pairs)
212    if object_hook is not None:
213        pairs = object_hook(pairs)
214    return pairs, end
215
216def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
217    s, end = s_and_end
218    values = []
219    nextchar = s[end:end + 1]
220    if nextchar in _ws:
221        end = _w(s, end + 1).end()
222        nextchar = s[end:end + 1]
223    # Look-ahead for trivial empty array
224    if nextchar == ']':
225        return values, end + 1
226    _append = values.append
227    while True:
228        try:
229            value, end = scan_once(s, end)
230        except StopIteration as err:
231            raise JSONDecodeError("Expecting value", s, err.value) from None
232        _append(value)
233        nextchar = s[end:end + 1]
234        if nextchar in _ws:
235            end = _w(s, end + 1).end()
236            nextchar = s[end:end + 1]
237        end += 1
238        if nextchar == ']':
239            break
240        elif nextchar != ',':
241            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
242        try:
243            if s[end] in _ws:
244                end += 1
245                if s[end] in _ws:
246                    end = _w(s, end + 1).end()
247        except IndexError:
248            pass
249
250    return values, end
251
252
253class JSONDecoder(object):
254    """Simple JSON <http://json.org> decoder
255
256    Performs the following translations in decoding by default:
257
258    +---------------+-------------------+
259    | JSON          | Python            |
260    +===============+===================+
261    | object        | dict              |
262    +---------------+-------------------+
263    | array         | list              |
264    +---------------+-------------------+
265    | string        | str               |
266    +---------------+-------------------+
267    | number (int)  | int               |
268    +---------------+-------------------+
269    | number (real) | float             |
270    +---------------+-------------------+
271    | true          | True              |
272    +---------------+-------------------+
273    | false         | False             |
274    +---------------+-------------------+
275    | null          | None              |
276    +---------------+-------------------+
277
278    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
279    their corresponding ``float`` values, which is outside the JSON spec.
280
281    """
282
283    def __init__(self, *, object_hook=None, parse_float=None,
284            parse_int=None, parse_constant=None, strict=True,
285            object_pairs_hook=None):
286        """``object_hook``, if specified, will be called with the result
287        of every JSON object decoded and its return value will be used in
288        place of the given ``dict``.  This can be used to provide custom
289        deserializations (e.g. to support JSON-RPC class hinting).
290
291        ``object_pairs_hook``, if specified will be called with the result of
292        every JSON object decoded with an ordered list of pairs.  The return
293        value of ``object_pairs_hook`` will be used instead of the ``dict``.
294        This feature can be used to implement custom decoders that rely on the
295        order that the key and value pairs are decoded (for example,
296        collections.OrderedDict will remember the order of insertion). If
297        ``object_hook`` is also defined, the ``object_pairs_hook`` takes
298        priority.
299
300        ``parse_float``, if specified, will be called with the string
301        of every JSON float to be decoded. By default this is equivalent to
302        float(num_str). This can be used to use another datatype or parser
303        for JSON floats (e.g. decimal.Decimal).
304
305        ``parse_int``, if specified, will be called with the string
306        of every JSON int to be decoded. By default this is equivalent to
307        int(num_str). This can be used to use another datatype or parser
308        for JSON integers (e.g. float).
309
310        ``parse_constant``, if specified, will be called with one of the
311        following strings: -Infinity, Infinity, NaN.
312        This can be used to raise an exception if invalid JSON numbers
313        are encountered.
314
315        If ``strict`` is false (true is the default), then control
316        characters will be allowed inside strings.  Control characters in
317        this context are those with character codes in the 0-31 range,
318        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
319
320        """
321        self.object_hook = object_hook
322        self.parse_float = parse_float or float
323        self.parse_int = parse_int or int
324        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
325        self.strict = strict
326        self.object_pairs_hook = object_pairs_hook
327        self.parse_object = JSONObject
328        self.parse_array = JSONArray
329        self.parse_string = scanstring
330        self.memo = {}
331        self.scan_once = scanner.make_scanner(self)
332
333
334    def decode(self, s, _w=WHITESPACE.match):
335        """Return the Python representation of ``s`` (a ``str`` instance
336        containing a JSON document).
337
338        """
339        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340        end = _w(s, end).end()
341        if end != len(s):
342            raise JSONDecodeError("Extra data", s, end)
343        return obj
344
345    def raw_decode(self, s, idx=0):
346        """Decode a JSON document from ``s`` (a ``str`` beginning with
347        a JSON document) and return a 2-tuple of the Python
348        representation and the index in ``s`` where the document ended.
349
350        This can be used to decode a JSON document from a string that may
351        have extraneous data at the end.
352
353        """
354        try:
355            obj, end = self.scan_once(s, idx)
356        except StopIteration as err:
357            raise JSONDecodeError("Expecting value", s, err.value) from None
358        return obj, end
359