1"""Implementation of JSONDecoder 2""" 3import re 4 5from json import scanner 6try: 7 from _json import scanstring as c_scanstring 8except ImportError: 9 c_scanstring = None 10 11__all__ = ['JSONDecoder', 'JSONDecodeError'] 12 13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 14 15NaN = float('nan') 16PosInf = float('inf') 17NegInf = float('-inf') 18 19 20class JSONDecodeError(ValueError): 21 """Subclass of ValueError with the following additional properties: 22 23 msg: The unformatted error message 24 doc: The JSON document being parsed 25 pos: The start index of doc where parsing failed 26 lineno: The line corresponding to pos 27 colno: The column corresponding to pos 28 29 """ 30 # Note that this exception is used from _json 31 def __init__(self, msg, doc, pos): 32 lineno = doc.count('\n', 0, pos) + 1 33 colno = pos - doc.rfind('\n', 0, pos) 34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) 35 ValueError.__init__(self, errmsg) 36 self.msg = msg 37 self.doc = doc 38 self.pos = pos 39 self.lineno = lineno 40 self.colno = colno 41 42 def __reduce__(self): 43 return self.__class__, (self.msg, self.doc, self.pos) 44 45 46_CONSTANTS = { 47 '-Infinity': NegInf, 48 'Infinity': PosInf, 49 'NaN': NaN, 50} 51 52 53STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 54BACKSLASH = { 55 '"': '"', '\\': '\\', '/': '/', 56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 57} 58 59def _decode_uXXXX(s, pos): 60 esc = s[pos + 1:pos + 5] 61 if len(esc) == 4 and esc[1] not in 'xX': 62 try: 63 return int(esc, 16) 64 except ValueError: 65 pass 66 msg = "Invalid \\uXXXX escape" 67 raise JSONDecodeError(msg, s, pos) 68 69def py_scanstring(s, end, strict=True, 70 _b=BACKSLASH, _m=STRINGCHUNK.match): 71 """Scan the string s for a JSON string. End is the index of the 72 character in s after the quote that started the JSON string. 73 Unescapes all valid JSON string escape sequences and raises ValueError 74 on attempt to decode an invalid string. If strict is False then literal 75 control characters are allowed in the string. 76 77 Returns a tuple of the decoded string and the index of the character in s 78 after the end quote.""" 79 chunks = [] 80 _append = chunks.append 81 begin = end - 1 82 while 1: 83 chunk = _m(s, end) 84 if chunk is None: 85 raise JSONDecodeError("Unterminated string starting at", s, begin) 86 end = chunk.end() 87 content, terminator = chunk.groups() 88 # Content is contains zero or more unescaped string characters 89 if content: 90 _append(content) 91 # Terminator is the end of string, a literal control character, 92 # or a backslash denoting that an escape sequence follows 93 if terminator == '"': 94 break 95 elif terminator != '\\': 96 if strict: 97 #msg = "Invalid control character %r at" % (terminator,) 98 msg = "Invalid control character {0!r} at".format(terminator) 99 raise JSONDecodeError(msg, s, end) 100 else: 101 _append(terminator) 102 continue 103 try: 104 esc = s[end] 105 except IndexError: 106 raise JSONDecodeError("Unterminated string starting at", s, begin) 107 # If not a unicode escape sequence, must be in the lookup table 108 if esc != 'u': 109 try: 110 char = _b[esc] 111 except KeyError: 112 msg = "Invalid \\escape: {0!r}".format(esc) 113 raise JSONDecodeError(msg, s, end) 114 end += 1 115 else: 116 uni = _decode_uXXXX(s, end) 117 end += 5 118 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 119 uni2 = _decode_uXXXX(s, end + 1) 120 if 0xdc00 <= uni2 <= 0xdfff: 121 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 122 end += 6 123 char = chr(uni) 124 _append(char) 125 return ''.join(chunks), end 126 127 128# Use speedup if available 129scanstring = c_scanstring or py_scanstring 130 131WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 132WHITESPACE_STR = ' \t\n\r' 133 134 135def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, 136 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 137 s, end = s_and_end 138 pairs = [] 139 pairs_append = pairs.append 140 # Backwards compatibility 141 if memo is None: 142 memo = {} 143 memo_get = memo.setdefault 144 # Use a slice to prevent IndexError from being raised, the following 145 # check will raise a more specific ValueError if the string is empty 146 nextchar = s[end:end + 1] 147 # Normally we expect nextchar == '"' 148 if nextchar != '"': 149 if nextchar in _ws: 150 end = _w(s, end).end() 151 nextchar = s[end:end + 1] 152 # Trivial empty object 153 if nextchar == '}': 154 if object_pairs_hook is not None: 155 result = object_pairs_hook(pairs) 156 return result, end + 1 157 pairs = {} 158 if object_hook is not None: 159 pairs = object_hook(pairs) 160 return pairs, end + 1 161 elif nextchar != '"': 162 raise JSONDecodeError( 163 "Expecting property name enclosed in double quotes", s, end) 164 end += 1 165 while True: 166 key, end = scanstring(s, end, strict) 167 key = memo_get(key, key) 168 # To skip some function call overhead we optimize the fast paths where 169 # the JSON key separator is ": " or just ":". 170 if s[end:end + 1] != ':': 171 end = _w(s, end).end() 172 if s[end:end + 1] != ':': 173 raise JSONDecodeError("Expecting ':' delimiter", s, end) 174 end += 1 175 176 try: 177 if s[end] in _ws: 178 end += 1 179 if s[end] in _ws: 180 end = _w(s, end + 1).end() 181 except IndexError: 182 pass 183 184 try: 185 value, end = scan_once(s, end) 186 except StopIteration as err: 187 raise JSONDecodeError("Expecting value", s, err.value) from None 188 pairs_append((key, value)) 189 try: 190 nextchar = s[end] 191 if nextchar in _ws: 192 end = _w(s, end + 1).end() 193 nextchar = s[end] 194 except IndexError: 195 nextchar = '' 196 end += 1 197 198 if nextchar == '}': 199 break 200 elif nextchar != ',': 201 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 202 end = _w(s, end).end() 203 nextchar = s[end:end + 1] 204 end += 1 205 if nextchar != '"': 206 raise JSONDecodeError( 207 "Expecting property name enclosed in double quotes", s, end - 1) 208 if object_pairs_hook is not None: 209 result = object_pairs_hook(pairs) 210 return result, end 211 pairs = dict(pairs) 212 if object_hook is not None: 213 pairs = object_hook(pairs) 214 return pairs, end 215 216def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 217 s, end = s_and_end 218 values = [] 219 nextchar = s[end:end + 1] 220 if nextchar in _ws: 221 end = _w(s, end + 1).end() 222 nextchar = s[end:end + 1] 223 # Look-ahead for trivial empty array 224 if nextchar == ']': 225 return values, end + 1 226 _append = values.append 227 while True: 228 try: 229 value, end = scan_once(s, end) 230 except StopIteration as err: 231 raise JSONDecodeError("Expecting value", s, err.value) from None 232 _append(value) 233 nextchar = s[end:end + 1] 234 if nextchar in _ws: 235 end = _w(s, end + 1).end() 236 nextchar = s[end:end + 1] 237 end += 1 238 if nextchar == ']': 239 break 240 elif nextchar != ',': 241 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 242 try: 243 if s[end] in _ws: 244 end += 1 245 if s[end] in _ws: 246 end = _w(s, end + 1).end() 247 except IndexError: 248 pass 249 250 return values, end 251 252 253class JSONDecoder(object): 254 """Simple JSON <http://json.org> decoder 255 256 Performs the following translations in decoding by default: 257 258 +---------------+-------------------+ 259 | JSON | Python | 260 +===============+===================+ 261 | object | dict | 262 +---------------+-------------------+ 263 | array | list | 264 +---------------+-------------------+ 265 | string | str | 266 +---------------+-------------------+ 267 | number (int) | int | 268 +---------------+-------------------+ 269 | number (real) | float | 270 +---------------+-------------------+ 271 | true | True | 272 +---------------+-------------------+ 273 | false | False | 274 +---------------+-------------------+ 275 | null | None | 276 +---------------+-------------------+ 277 278 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 279 their corresponding ``float`` values, which is outside the JSON spec. 280 281 """ 282 283 def __init__(self, *, object_hook=None, parse_float=None, 284 parse_int=None, parse_constant=None, strict=True, 285 object_pairs_hook=None): 286 """``object_hook``, if specified, will be called with the result 287 of every JSON object decoded and its return value will be used in 288 place of the given ``dict``. This can be used to provide custom 289 deserializations (e.g. to support JSON-RPC class hinting). 290 291 ``object_pairs_hook``, if specified will be called with the result of 292 every JSON object decoded with an ordered list of pairs. The return 293 value of ``object_pairs_hook`` will be used instead of the ``dict``. 294 This feature can be used to implement custom decoders that rely on the 295 order that the key and value pairs are decoded (for example, 296 collections.OrderedDict will remember the order of insertion). If 297 ``object_hook`` is also defined, the ``object_pairs_hook`` takes 298 priority. 299 300 ``parse_float``, if specified, will be called with the string 301 of every JSON float to be decoded. By default this is equivalent to 302 float(num_str). This can be used to use another datatype or parser 303 for JSON floats (e.g. decimal.Decimal). 304 305 ``parse_int``, if specified, will be called with the string 306 of every JSON int to be decoded. By default this is equivalent to 307 int(num_str). This can be used to use another datatype or parser 308 for JSON integers (e.g. float). 309 310 ``parse_constant``, if specified, will be called with one of the 311 following strings: -Infinity, Infinity, NaN. 312 This can be used to raise an exception if invalid JSON numbers 313 are encountered. 314 315 If ``strict`` is false (true is the default), then control 316 characters will be allowed inside strings. Control characters in 317 this context are those with character codes in the 0-31 range, 318 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 319 320 """ 321 self.object_hook = object_hook 322 self.parse_float = parse_float or float 323 self.parse_int = parse_int or int 324 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 325 self.strict = strict 326 self.object_pairs_hook = object_pairs_hook 327 self.parse_object = JSONObject 328 self.parse_array = JSONArray 329 self.parse_string = scanstring 330 self.memo = {} 331 self.scan_once = scanner.make_scanner(self) 332 333 334 def decode(self, s, _w=WHITESPACE.match): 335 """Return the Python representation of ``s`` (a ``str`` instance 336 containing a JSON document). 337 338 """ 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 340 end = _w(s, end).end() 341 if end != len(s): 342 raise JSONDecodeError("Extra data", s, end) 343 return obj 344 345 def raw_decode(self, s, idx=0): 346 """Decode a JSON document from ``s`` (a ``str`` beginning with 347 a JSON document) and return a 2-tuple of the Python 348 representation and the index in ``s`` where the document ended. 349 350 This can be used to decode a JSON document from a string that may 351 have extraneous data at the end. 352 353 """ 354 try: 355 obj, end = self.scan_once(s, idx) 356 except StopIteration as err: 357 raise JSONDecodeError("Expecting value", s, err.value) from None 358 return obj, end 359