1 2""" 3httplib2 4 5A caching http interface that supports ETags and gzip 6to conserve bandwidth. 7 8Requires Python 3.0 or later 9 10Changelog: 112009-05-28, Pilgrim: ported to Python 3 122007-08-18, Rick: Modified so it's able to use a socks proxy if needed. 13 14""" 15 16__author__ = "Joe Gregorio (joe@bitworking.org)" 17__copyright__ = "Copyright 2006, Joe Gregorio" 18__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", 19 "James Antill", 20 "Xavier Verges Farrero", 21 "Jonathan Feinberg", 22 "Blair Zajac", 23 "Sam Ruby", 24 "Louis Nyffenegger", 25 "Mark Pilgrim"] 26__license__ = "MIT" 27__version__ = "0.7.7" 28 29import re 30import sys 31import email 32import email.utils 33import email.message 34import email.feedparser 35import io 36import gzip 37import zlib 38import http.client 39import urllib.parse 40import base64 41import os 42import copy 43import calendar 44import time 45import random 46import errno 47from hashlib import sha1 as _sha, md5 as _md5 48import hmac 49from gettext import gettext as _ 50import socket 51import ssl 52_ssl_wrap_socket = ssl.wrap_socket 53 54try: 55 import socks 56except ImportError: 57 socks = None 58 59from .iri2uri import iri2uri 60 61def has_timeout(timeout): 62 if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'): 63 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT) 64 return (timeout is not None) 65 66__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', 67 'RedirectMissingLocation', 'RedirectLimit', 68 'FailedToDecompressContent', 'UnimplementedDigestAuthOptionError', 69 'UnimplementedHmacDigestAuthOptionError', 70 'debuglevel', 'RETRIES'] 71 72 73# The httplib debug level, set to a non-zero value to get debug output 74debuglevel = 0 75 76# A request will be tried 'RETRIES' times if it fails at the socket/connection level. 77RETRIES = 2 78 79# All exceptions raised here derive from HttpLib2Error 80class HttpLib2Error(Exception): pass 81 82# Some exceptions can be caught and optionally 83# be turned back into responses. 84class HttpLib2ErrorWithResponse(HttpLib2Error): 85 def __init__(self, desc, response, content): 86 self.response = response 87 self.content = content 88 HttpLib2Error.__init__(self, desc) 89 90class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass 91class RedirectLimit(HttpLib2ErrorWithResponse): pass 92class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass 93class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass 94class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass 95 96class MalformedHeader(HttpLib2Error): pass 97class RelativeURIError(HttpLib2Error): pass 98class ServerNotFoundError(HttpLib2Error): pass 99class CertificateValidationUnsupportedInPython31(HttpLib2Error): pass 100 101# Open Items: 102# ----------- 103# Proxy support 104 105# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 106 107# Pluggable cache storage (supports storing the cache in 108# flat files by default. We need a plug-in architecture 109# that can support Berkeley DB and Squid) 110 111# == Known Issues == 112# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 113# Does not handle Cache-Control: max-stale 114# Does not use Age: headers when calculating cache freshness. 115 116 117# The number of redirections to follow before giving up. 118# Note that only GET redirects are automatically followed. 119# Will also honor 301 requests by saving that info and never 120# requesting that URI again. 121DEFAULT_MAX_REDIRECTS = 5 122 123# Which headers are hop-by-hop headers by default 124HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade'] 125 126# Default CA certificates file bundled with httplib2. 127CA_CERTS = os.path.join( 128 os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt") 129 130def _get_end2end_headers(response): 131 hopbyhop = list(HOP_BY_HOP) 132 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')]) 133 return [header for header in list(response.keys()) if header not in hopbyhop] 134 135URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 136 137def parse_uri(uri): 138 """Parses a URI using the regex given in Appendix B of RFC 3986. 139 140 (scheme, authority, path, query, fragment) = parse_uri(uri) 141 """ 142 groups = URI.match(uri).groups() 143 return (groups[1], groups[3], groups[4], groups[6], groups[8]) 144 145def urlnorm(uri): 146 (scheme, authority, path, query, fragment) = parse_uri(uri) 147 if not scheme or not authority: 148 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 149 authority = authority.lower() 150 scheme = scheme.lower() 151 if not path: 152 path = "/" 153 # Could do syntax based normalization of the URI before 154 # computing the digest. See Section 6.2.2 of Std 66. 155 request_uri = query and "?".join([path, query]) or path 156 scheme = scheme.lower() 157 defrag_uri = scheme + "://" + authority + request_uri 158 return scheme, authority, request_uri, defrag_uri 159 160 161# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 162re_url_scheme = re.compile(br'^\w+://') 163re_url_scheme_s = re.compile(r'^\w+://') 164re_slash = re.compile(br'[?/:|]+') 165 166def safename(filename): 167 """Return a filename suitable for the cache. 168 169 Strips dangerous and common characters to create a filename we 170 can use to store the cache in. 171 """ 172 173 try: 174 if re_url_scheme_s.match(filename): 175 if isinstance(filename,bytes): 176 filename = filename.decode('utf-8') 177 filename = filename.encode('idna') 178 else: 179 filename = filename.encode('idna') 180 except UnicodeError: 181 pass 182 if isinstance(filename,str): 183 filename=filename.encode('utf-8') 184 filemd5 = _md5(filename).hexdigest().encode('utf-8') 185 filename = re_url_scheme.sub(b"", filename) 186 filename = re_slash.sub(b",", filename) 187 188 # limit length of filename 189 if len(filename)>200: 190 filename=filename[:200] 191 return b",".join((filename, filemd5)).decode('utf-8') 192 193NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+') 194def _normalize_headers(headers): 195 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.items()]) 196 197def _parse_cache_control(headers): 198 retval = {} 199 if 'cache-control' in headers: 200 parts = headers['cache-control'].split(',') 201 parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")] 202 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")] 203 retval = dict(parts_with_args + parts_wo_args) 204 return retval 205 206# Whether to use a strict mode to parse WWW-Authenticate headers 207# Might lead to bad results in case of ill-formed header value, 208# so disabled by default, falling back to relaxed parsing. 209# Set to true to turn on, usefull for testing servers. 210USE_WWW_AUTH_STRICT_PARSING = 0 211 212# In regex below: 213# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP 214# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space 215# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: 216# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? 217WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") 218WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") 219UNQUOTE_PAIRS = re.compile(r'\\(.)') 220def _parse_www_authenticate(headers, headername='www-authenticate'): 221 """Returns a dictionary of dictionaries, one dict 222 per auth_scheme.""" 223 retval = {} 224 if headername in headers: 225 try: 226 authenticate = headers[headername].strip() 227 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED 228 while authenticate: 229 # Break off the scheme at the beginning of the line 230 if headername == 'authentication-info': 231 (auth_scheme, the_rest) = ('digest', authenticate) 232 else: 233 (auth_scheme, the_rest) = authenticate.split(" ", 1) 234 # Now loop over all the key value pairs that come after the scheme, 235 # being careful not to roll into the next scheme 236 match = www_auth.search(the_rest) 237 auth_params = {} 238 while match: 239 if match and len(match.groups()) == 3: 240 (key, value, the_rest) = match.groups() 241 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) 242 match = www_auth.search(the_rest) 243 retval[auth_scheme.lower()] = auth_params 244 authenticate = the_rest.strip() 245 except ValueError: 246 raise MalformedHeader("WWW-Authenticate") 247 return retval 248 249 250def _entry_disposition(response_headers, request_headers): 251 """Determine freshness from the Date, Expires and Cache-Control headers. 252 253 We don't handle the following: 254 255 1. Cache-Control: max-stale 256 2. Age: headers are not used in the calculations. 257 258 Not that this algorithm is simpler than you might think 259 because we are operating as a private (non-shared) cache. 260 This lets us ignore 's-maxage'. We can also ignore 261 'proxy-invalidate' since we aren't a proxy. 262 We will never return a stale document as 263 fresh as a design decision, and thus the non-implementation 264 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 265 since we operate as if every server has sent 'must-revalidate'. 266 Since we are private we get to ignore both 'public' and 267 'private' parameters. We also ignore 'no-transform' since 268 we don't do any transformations. 269 The 'no-store' parameter is handled at a higher level. 270 So the only Cache-Control parameters we look at are: 271 272 no-cache 273 only-if-cached 274 max-age 275 min-fresh 276 """ 277 278 retval = "STALE" 279 cc = _parse_cache_control(request_headers) 280 cc_response = _parse_cache_control(response_headers) 281 282 if 'pragma' in request_headers and request_headers['pragma'].lower().find('no-cache') != -1: 283 retval = "TRANSPARENT" 284 if 'cache-control' not in request_headers: 285 request_headers['cache-control'] = 'no-cache' 286 elif 'no-cache' in cc: 287 retval = "TRANSPARENT" 288 elif 'no-cache' in cc_response: 289 retval = "STALE" 290 elif 'only-if-cached' in cc: 291 retval = "FRESH" 292 elif 'date' in response_headers: 293 date = calendar.timegm(email.utils.parsedate_tz(response_headers['date'])) 294 now = time.time() 295 current_age = max(0, now - date) 296 if 'max-age' in cc_response: 297 try: 298 freshness_lifetime = int(cc_response['max-age']) 299 except ValueError: 300 freshness_lifetime = 0 301 elif 'expires' in response_headers: 302 expires = email.utils.parsedate_tz(response_headers['expires']) 303 if None == expires: 304 freshness_lifetime = 0 305 else: 306 freshness_lifetime = max(0, calendar.timegm(expires) - date) 307 else: 308 freshness_lifetime = 0 309 if 'max-age' in cc: 310 try: 311 freshness_lifetime = int(cc['max-age']) 312 except ValueError: 313 freshness_lifetime = 0 314 if 'min-fresh' in cc: 315 try: 316 min_fresh = int(cc['min-fresh']) 317 except ValueError: 318 min_fresh = 0 319 current_age += min_fresh 320 if freshness_lifetime > current_age: 321 retval = "FRESH" 322 return retval 323 324def _decompressContent(response, new_content): 325 content = new_content 326 try: 327 encoding = response.get('content-encoding', None) 328 if encoding in ['gzip', 'deflate']: 329 if encoding == 'gzip': 330 content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read() 331 if encoding == 'deflate': 332 content = zlib.decompress(content) 333 response['content-length'] = str(len(content)) 334 # Record the historical presence of the encoding in a way the won't interfere. 335 response['-content-encoding'] = response['content-encoding'] 336 del response['content-encoding'] 337 except IOError: 338 content = "" 339 raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content) 340 return content 341 342def _bind_write_headers(msg): 343 from email.header import Header 344 def _write_headers(self): 345 # Self refers to the Generator object 346 for h, v in msg.items(): 347 print('%s:' % h, end=' ', file=self._fp) 348 if isinstance(v, Header): 349 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp) 350 else: 351 # Header's got lots of smarts, so use it. 352 header = Header(v, maxlinelen=self._maxheaderlen, charset='utf-8', 353 header_name=h) 354 print(header.encode(), file=self._fp) 355 # A blank line always separates headers from body 356 print(file=self._fp) 357 return _write_headers 358 359def _updateCache(request_headers, response_headers, content, cache, cachekey): 360 if cachekey: 361 cc = _parse_cache_control(request_headers) 362 cc_response = _parse_cache_control(response_headers) 363 if 'no-store' in cc or 'no-store' in cc_response: 364 cache.delete(cachekey) 365 else: 366 info = email.message.Message() 367 for key, value in response_headers.items(): 368 if key not in ['status','content-encoding','transfer-encoding']: 369 info[key] = value 370 371 # Add annotations to the cache to indicate what headers 372 # are variant for this request. 373 vary = response_headers.get('vary', None) 374 if vary: 375 vary_headers = vary.lower().replace(' ', '').split(',') 376 for header in vary_headers: 377 key = '-varied-%s' % header 378 try: 379 info[key] = request_headers[header] 380 except KeyError: 381 pass 382 383 status = response_headers.status 384 if status == 304: 385 status = 200 386 387 status_header = 'status: %d\r\n' % status 388 389 try: 390 header_str = info.as_string() 391 except UnicodeEncodeError: 392 setattr(info, '_write_headers', _bind_write_headers(info)) 393 header_str = info.as_string() 394 395 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 396 text = b"".join([status_header.encode('utf-8'), header_str.encode('utf-8'), content]) 397 398 cache.set(cachekey, text) 399 400def _cnonce(): 401 dig = _md5(("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode('utf-8')).hexdigest() 402 return dig[:16] 403 404def _wsse_username_token(cnonce, iso_now, password): 405 return base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode('utf-8')).digest()).strip() 406 407 408# For credentials we need two things, first 409# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 410# Then we also need a list of URIs that have already demanded authentication 411# That list is tricky since sub-URIs can take the same auth, or the 412# auth scheme may change as you descend the tree. 413# So we also need each Auth instance to be able to tell us 414# how close to the 'top' it is. 415 416class Authentication(object): 417 def __init__(self, credentials, host, request_uri, headers, response, content, http): 418 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 419 self.path = path 420 self.host = host 421 self.credentials = credentials 422 self.http = http 423 424 def depth(self, request_uri): 425 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 426 return request_uri[len(self.path):].count("/") 427 428 def inscope(self, host, request_uri): 429 # XXX Should we normalize the request_uri? 430 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 431 return (host == self.host) and path.startswith(self.path) 432 433 def request(self, method, request_uri, headers, content): 434 """Modify the request headers to add the appropriate 435 Authorization header. Over-rise this in sub-classes.""" 436 pass 437 438 def response(self, response, content): 439 """Gives us a chance to update with new nonces 440 or such returned from the last authorized response. 441 Over-rise this in sub-classes if necessary. 442 443 Return TRUE is the request is to be retried, for 444 example Digest may return stale=true. 445 """ 446 return False 447 448 def __eq__(self, auth): 449 return False 450 451 def __ne__(self, auth): 452 return True 453 454 def __lt__(self, auth): 455 return True 456 457 def __gt__(self, auth): 458 return False 459 460 def __le__(self, auth): 461 return True 462 463 def __ge__(self, auth): 464 return False 465 466 def __bool__(self): 467 return True 468 469 470class BasicAuthentication(Authentication): 471 def __init__(self, credentials, host, request_uri, headers, response, content, http): 472 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 473 474 def request(self, method, request_uri, headers, content): 475 """Modify the request headers to add the appropriate 476 Authorization header.""" 477 headers['authorization'] = 'Basic ' + base64.b64encode(("%s:%s" % self.credentials).encode('utf-8')).strip().decode('utf-8') 478 479 480class DigestAuthentication(Authentication): 481 """Only do qop='auth' and MD5, since that 482 is all Apache currently implements""" 483 def __init__(self, credentials, host, request_uri, headers, response, content, http): 484 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 485 challenge = _parse_www_authenticate(response, 'www-authenticate') 486 self.challenge = challenge['digest'] 487 qop = self.challenge.get('qop', 'auth') 488 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None 489 if self.challenge['qop'] is None: 490 raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop)) 491 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper() 492 if self.challenge['algorithm'] != 'MD5': 493 raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) 494 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) 495 self.challenge['nc'] = 1 496 497 def request(self, method, request_uri, headers, content, cnonce = None): 498 """Modify the request headers""" 499 H = lambda x: _md5(x.encode('utf-8')).hexdigest() 500 KD = lambda s, d: H("%s:%s" % (s, d)) 501 A2 = "".join([method, ":", request_uri]) 502 self.challenge['cnonce'] = cnonce or _cnonce() 503 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % ( 504 self.challenge['nonce'], 505 '%08x' % self.challenge['nc'], 506 self.challenge['cnonce'], 507 self.challenge['qop'], H(A2))) 508 headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( 509 self.credentials[0], 510 self.challenge['realm'], 511 self.challenge['nonce'], 512 request_uri, 513 self.challenge['algorithm'], 514 request_digest, 515 self.challenge['qop'], 516 self.challenge['nc'], 517 self.challenge['cnonce']) 518 if self.challenge.get('opaque'): 519 headers['authorization'] += ', opaque="%s"' % self.challenge['opaque'] 520 self.challenge['nc'] += 1 521 522 def response(self, response, content): 523 if 'authentication-info' not in response: 524 challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {}) 525 if 'true' == challenge.get('stale'): 526 self.challenge['nonce'] = challenge['nonce'] 527 self.challenge['nc'] = 1 528 return True 529 else: 530 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) 531 532 if 'nextnonce' in updated_challenge: 533 self.challenge['nonce'] = updated_challenge['nextnonce'] 534 self.challenge['nc'] = 1 535 return False 536 537 538class HmacDigestAuthentication(Authentication): 539 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 540 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 541 542 def __init__(self, credentials, host, request_uri, headers, response, content, http): 543 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 544 challenge = _parse_www_authenticate(response, 'www-authenticate') 545 self.challenge = challenge['hmacdigest'] 546 # TODO: self.challenge['domain'] 547 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') 548 if self.challenge['reason'] not in ['unauthorized', 'integrity']: 549 self.challenge['reason'] = 'unauthorized' 550 self.challenge['salt'] = self.challenge.get('salt', '') 551 if not self.challenge.get('snonce'): 552 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty.")) 553 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1') 554 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: 555 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) 556 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1') 557 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: 558 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm'])) 559 if self.challenge['algorithm'] == 'HMAC-MD5': 560 self.hashmod = _md5 561 else: 562 self.hashmod = _sha 563 if self.challenge['pw-algorithm'] == 'MD5': 564 self.pwhashmod = _md5 565 else: 566 self.pwhashmod = _sha 567 self.key = "".join([self.credentials[0], ":", 568 self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), 569 ":", self.challenge['realm']]) 570 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 571 572 def request(self, method, request_uri, headers, content): 573 """Modify the request headers""" 574 keys = _get_end2end_headers(headers) 575 keylist = "".join(["%s " % k for k in keys]) 576 headers_val = "".join([headers[k] for k in keys]) 577 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime()) 578 cnonce = _cnonce() 579 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) 580 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 581 headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( 582 self.credentials[0], 583 self.challenge['realm'], 584 self.challenge['snonce'], 585 cnonce, 586 request_uri, 587 created, 588 request_digest, 589 keylist) 590 591 def response(self, response, content): 592 challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {}) 593 if challenge.get('reason') in ['integrity', 'stale']: 594 return True 595 return False 596 597 598class WsseAuthentication(Authentication): 599 """This is thinly tested and should not be relied upon. 600 At this time there isn't any third party server to test against. 601 Blogger and TypePad implemented this algorithm at one point 602 but Blogger has since switched to Basic over HTTPS and 603 TypePad has implemented it wrong, by never issuing a 401 604 challenge but instead requiring your client to telepathically know that 605 their endpoint is expecting WSSE profile="UsernameToken".""" 606 def __init__(self, credentials, host, request_uri, headers, response, content, http): 607 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 608 609 def request(self, method, request_uri, headers, content): 610 """Modify the request headers to add the appropriate 611 Authorization header.""" 612 headers['authorization'] = 'WSSE profile="UsernameToken"' 613 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 614 cnonce = _cnonce() 615 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 616 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % ( 617 self.credentials[0], 618 password_digest, 619 cnonce, 620 iso_now) 621 622class GoogleLoginAuthentication(Authentication): 623 def __init__(self, credentials, host, request_uri, headers, response, content, http): 624 from urllib.parse import urlencode 625 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 626 challenge = _parse_www_authenticate(response, 'www-authenticate') 627 service = challenge['googlelogin'].get('service', 'xapi') 628 # Bloggger actually returns the service in the challenge 629 # For the rest we guess based on the URI 630 if service == 'xapi' and request_uri.find("calendar") > 0: 631 service = "cl" 632 # No point in guessing Base or Spreadsheet 633 #elif request_uri.find("spreadsheets") > 0: 634 # service = "wise" 635 636 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent']) 637 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'}) 638 lines = content.split('\n') 639 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 640 if resp.status == 403: 641 self.Auth = "" 642 else: 643 self.Auth = d['Auth'] 644 645 def request(self, method, request_uri, headers, content): 646 """Modify the request headers to add the appropriate 647 Authorization header.""" 648 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth 649 650 651AUTH_SCHEME_CLASSES = { 652 "basic": BasicAuthentication, 653 "wsse": WsseAuthentication, 654 "digest": DigestAuthentication, 655 "hmacdigest": HmacDigestAuthentication, 656 "googlelogin": GoogleLoginAuthentication 657} 658 659AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 660 661class FileCache(object): 662 """Uses a local directory as a store for cached files. 663 Not really safe to use if multiple threads or processes are going to 664 be running on the same cache. 665 """ 666 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 667 self.cache = cache 668 self.safe = safe 669 if not os.path.exists(cache): 670 os.makedirs(self.cache) 671 672 def get(self, key): 673 retval = None 674 cacheFullPath = os.path.join(self.cache, self.safe(key)) 675 try: 676 f = open(cacheFullPath, "rb") 677 retval = f.read() 678 f.close() 679 except IOError: 680 pass 681 return retval 682 683 def set(self, key, value): 684 cacheFullPath = os.path.join(self.cache, self.safe(key)) 685 f = open(cacheFullPath, "wb") 686 f.write(value) 687 f.close() 688 689 def delete(self, key): 690 cacheFullPath = os.path.join(self.cache, self.safe(key)) 691 if os.path.exists(cacheFullPath): 692 os.remove(cacheFullPath) 693 694class Credentials(object): 695 def __init__(self): 696 self.credentials = [] 697 698 def add(self, name, password, domain=""): 699 self.credentials.append((domain.lower(), name, password)) 700 701 def clear(self): 702 self.credentials = [] 703 704 def iter(self, domain): 705 for (cdomain, name, password) in self.credentials: 706 if cdomain == "" or domain == cdomain: 707 yield (name, password) 708 709class KeyCerts(Credentials): 710 """Identical to Credentials except that 711 name/password are mapped to key/cert.""" 712 pass 713 714 715class ProxyInfo(object): 716 """Collect information required to use a proxy.""" 717 def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None): 718 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX 719 constants. For example: 720 721p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000) 722 """ 723 self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass 724 725 def astuple(self): 726 return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, 727 self.proxy_user, self.proxy_pass) 728 729 def isgood(self): 730 return socks and (self.proxy_host != None) and (self.proxy_port != None) 731 732 733def proxy_info_from_environment(method='http'): 734 """ 735 Read proxy info from the environment variables. 736 """ 737 if method not in ('http', 'https'): 738 return 739 740 env_var = method + '_proxy' 741 url = os.environ.get(env_var, os.environ.get(env_var.upper())) 742 if not url: 743 return 744 return proxy_info_from_url(url, method) 745 746 747def proxy_info_from_url(url, method='http'): 748 """ 749 Construct a ProxyInfo from a URL (such as http_proxy env var) 750 """ 751 url = urllib.parse.urlparse(url) 752 username = None 753 password = None 754 port = None 755 if '@' in url[1]: 756 ident, host_port = url[1].split('@', 1) 757 if ':' in ident: 758 username, password = ident.split(':', 1) 759 else: 760 password = ident 761 else: 762 host_port = url[1] 763 if ':' in host_port: 764 host, port = host_port.split(':', 1) 765 else: 766 host = host_port 767 768 if port: 769 port = int(port) 770 else: 771 port = dict(https=443, http=80)[method] 772 773 proxy_type = 3 # socks.PROXY_TYPE_HTTP 774 return ProxyInfo( 775 proxy_type = proxy_type, 776 proxy_host = host, 777 proxy_port = port, 778 proxy_user = username or None, 779 proxy_pass = password or None, 780 ) 781 782 783class HTTPConnectionWithTimeout(http.client.HTTPConnection): 784 """HTTPConnection subclass that supports timeouts 785 786 HTTPConnection subclass that supports timeouts 787 788 All timeouts are in seconds. If None is passed for timeout then 789 Python's default timeout for sockets will be used. See for example 790 the docs of socket.setdefaulttimeout(): 791 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 792 """ 793 794 def __init__(self, host, port=None, timeout=None, proxy_info=None): 795 http.client.HTTPConnection.__init__(self, host, port=port, 796 timeout=timeout) 797 self.proxy_info = proxy_info 798 799 800class HTTPSConnectionWithTimeout(http.client.HTTPSConnection): 801 """ 802 This class allows communication via SSL. 803 804 All timeouts are in seconds. If None is passed for timeout then 805 Python's default timeout for sockets will be used. See for example 806 the docs of socket.setdefaulttimeout(): 807 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 808 """ 809 810 def __init__(self, host, port=None, key_file=None, cert_file=None, 811 timeout=None, proxy_info=None, 812 ca_certs=None, disable_ssl_certificate_validation=False): 813 self.proxy_info = proxy_info 814 context = None 815 if ca_certs is None: 816 ca_certs = CA_CERTS 817 if (cert_file or ca_certs) and not disable_ssl_certificate_validation: 818 if not hasattr(ssl, 'SSLContext'): 819 raise CertificateValidationUnsupportedInPython31() 820 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 821 context.verify_mode = ssl.CERT_REQUIRED 822 if cert_file: 823 context.load_cert_chain(cert_file, key_file) 824 if ca_certs: 825 context.load_verify_locations(ca_certs) 826 http.client.HTTPSConnection.__init__( 827 self, host, port=port, key_file=key_file, 828 cert_file=cert_file, timeout=timeout, context=context, 829 check_hostname=True) 830 831 832SCHEME_TO_CONNECTION = { 833 'http': HTTPConnectionWithTimeout, 834 'https': HTTPSConnectionWithTimeout, 835} 836 837class Http(object): 838 """An HTTP client that handles: 839 840 - all methods 841 - caching 842 - ETags 843 - compression, 844 - HTTPS 845 - Basic 846 - Digest 847 - WSSE 848 849 and more. 850 """ 851 def __init__(self, cache=None, timeout=None, 852 proxy_info=proxy_info_from_environment, 853 ca_certs=None, disable_ssl_certificate_validation=False): 854 """If 'cache' is a string then it is used as a directory name for 855 a disk cache. Otherwise it must be an object that supports the 856 same interface as FileCache. 857 858 All timeouts are in seconds. If None is passed for timeout 859 then Python's default timeout for sockets will be used. See 860 for example the docs of socket.setdefaulttimeout(): 861 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 862 863 `proxy_info` may be: 864 - a callable that takes the http scheme ('http' or 'https') and 865 returns a ProxyInfo instance per request. By default, uses 866 proxy_info_from_environment. 867 - a ProxyInfo instance (static proxy config). 868 - None (proxy disabled). 869 870 ca_certs is the path of a file containing root CA certificates for SSL 871 server certificate validation. By default, a CA cert file bundled with 872 httplib2 is used. 873 874 If disable_ssl_certificate_validation is true, SSL cert validation will 875 not be performed. 876""" 877 self.proxy_info = proxy_info 878 self.ca_certs = ca_certs 879 self.disable_ssl_certificate_validation = \ 880 disable_ssl_certificate_validation 881 # Map domain name to an httplib connection 882 self.connections = {} 883 # The location of the cache, for now a directory 884 # where cached responses are held. 885 if cache and isinstance(cache, str): 886 self.cache = FileCache(cache) 887 else: 888 self.cache = cache 889 890 # Name/password 891 self.credentials = Credentials() 892 893 # Key/cert 894 self.certificates = KeyCerts() 895 896 # authorization objects 897 self.authorizations = [] 898 899 # If set to False then no redirects are followed, even safe ones. 900 self.follow_redirects = True 901 902 # Which HTTP methods do we apply optimistic concurrency to, i.e. 903 # which methods get an "if-match:" etag header added to them. 904 self.optimistic_concurrency_methods = ["PUT", "PATCH"] 905 906 # If 'follow_redirects' is True, and this is set to True then 907 # all redirecs are followed, including unsafe ones. 908 self.follow_all_redirects = False 909 910 self.ignore_etag = False 911 912 self.force_exception_to_status_code = False 913 914 self.timeout = timeout 915 916 # Keep Authorization: headers on a redirect. 917 self.forward_authorization_headers = False 918 919 def __getstate__(self): 920 state_dict = copy.copy(self.__dict__) 921 # In case request is augmented by some foreign object such as 922 # credentials which handle auth 923 if 'request' in state_dict: 924 del state_dict['request'] 925 if 'connections' in state_dict: 926 del state_dict['connections'] 927 return state_dict 928 929 def __setstate__(self, state): 930 self.__dict__.update(state) 931 self.connections = {} 932 933 def _auth_from_challenge(self, host, request_uri, headers, response, content): 934 """A generator that creates Authorization objects 935 that can be applied to requests. 936 """ 937 challenges = _parse_www_authenticate(response, 'www-authenticate') 938 for cred in self.credentials.iter(host): 939 for scheme in AUTH_SCHEME_ORDER: 940 if scheme in challenges: 941 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) 942 943 def add_credentials(self, name, password, domain=""): 944 """Add a name and password that will be used 945 any time a request requires authentication.""" 946 self.credentials.add(name, password, domain) 947 948 def add_certificate(self, key, cert, domain): 949 """Add a key and cert that will be used 950 any time a request requires authentication.""" 951 self.certificates.add(key, cert, domain) 952 953 def clear_credentials(self): 954 """Remove all the names and passwords 955 that are used for authentication""" 956 self.credentials.clear() 957 self.authorizations = [] 958 959 def _conn_request(self, conn, request_uri, method, body, headers): 960 for i in range(RETRIES): 961 try: 962 if conn.sock is None: 963 conn.connect() 964 conn.request(method, request_uri, body, headers) 965 except socket.timeout: 966 conn.close() 967 raise 968 except socket.gaierror: 969 conn.close() 970 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 971 except socket.error as e: 972 errno_ = (e.args[0].errno if isinstance(e.args[0], socket.error) else e.errno) 973 if errno_ == errno.ECONNREFUSED: # Connection refused 974 raise 975 except http.client.HTTPException: 976 if conn.sock is None: 977 if i < RETRIES-1: 978 conn.close() 979 conn.connect() 980 continue 981 else: 982 conn.close() 983 raise 984 if i < RETRIES-1: 985 conn.close() 986 conn.connect() 987 continue 988 # Just because the server closed the connection doesn't apparently mean 989 # that the server didn't send a response. 990 pass 991 try: 992 response = conn.getresponse() 993 except socket.timeout: 994 raise 995 except (socket.error, http.client.HTTPException): 996 conn.close() 997 if i == 0: 998 conn.close() 999 conn.connect() 1000 continue 1001 else: 1002 raise 1003 else: 1004 content = b"" 1005 if method == "HEAD": 1006 conn.close() 1007 else: 1008 content = response.read() 1009 response = Response(response) 1010 if method != "HEAD": 1011 content = _decompressContent(response, content) 1012 1013 break 1014 return (response, content) 1015 1016 1017 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): 1018 """Do the actual request using the connection object 1019 and also follow one level of redirects if necessary""" 1020 1021 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] 1022 auth = auths and sorted(auths)[0][1] or None 1023 if auth: 1024 auth.request(method, request_uri, headers, body) 1025 1026 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 1027 1028 if auth: 1029 if auth.response(response, body): 1030 auth.request(method, request_uri, headers, body) 1031 (response, content) = self._conn_request(conn, request_uri, method, body, headers ) 1032 response._stale_digest = 1 1033 1034 if response.status == 401: 1035 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): 1036 authorization.request(method, request_uri, headers, body) 1037 (response, content) = self._conn_request(conn, request_uri, method, body, headers, ) 1038 if response.status != 401: 1039 self.authorizations.append(authorization) 1040 authorization.response(response, body) 1041 break 1042 1043 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303): 1044 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: 1045 # Pick out the location header and basically start from the beginning 1046 # remembering first to strip the ETag header and decrement our 'depth' 1047 if redirections: 1048 if 'location' not in response and response.status != 300: 1049 raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content) 1050 # Fix-up relative redirects (which violate an RFC 2616 MUST) 1051 if 'location' in response: 1052 location = response['location'] 1053 (scheme, authority, path, query, fragment) = parse_uri(location) 1054 if authority == None: 1055 response['location'] = urllib.parse.urljoin(absolute_uri, location) 1056 if response.status == 301 and method in ["GET", "HEAD"]: 1057 response['-x-permanent-redirect-url'] = response['location'] 1058 if 'content-location' not in response: 1059 response['content-location'] = absolute_uri 1060 _updateCache(headers, response, content, self.cache, cachekey) 1061 if 'if-none-match' in headers: 1062 del headers['if-none-match'] 1063 if 'if-modified-since' in headers: 1064 del headers['if-modified-since'] 1065 if 'authorization' in headers and not self.forward_authorization_headers: 1066 del headers['authorization'] 1067 if 'location' in response: 1068 location = response['location'] 1069 old_response = copy.deepcopy(response) 1070 if 'content-location' not in old_response: 1071 old_response['content-location'] = absolute_uri 1072 redirect_method = method 1073 if response.status in [302, 303]: 1074 redirect_method = "GET" 1075 body = None 1076 (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1) 1077 response.previous = old_response 1078 else: 1079 raise RedirectLimit("Redirected more times than redirection_limit allows.", response, content) 1080 elif response.status in [200, 203] and method in ["GET", "HEAD"]: 1081 # Don't cache 206's since we aren't going to handle byte range requests 1082 if 'content-location' not in response: 1083 response['content-location'] = absolute_uri 1084 _updateCache(headers, response, content, self.cache, cachekey) 1085 1086 return (response, content) 1087 1088 def _normalize_headers(self, headers): 1089 return _normalize_headers(headers) 1090 1091# Need to catch and rebrand some exceptions 1092# Then need to optionally turn all exceptions into status codes 1093# including all socket.* and httplib.* exceptions. 1094 1095 1096 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None): 1097 """ Performs a single HTTP request. 1098The 'uri' is the URI of the HTTP resource and can begin 1099with either 'http' or 'https'. The value of 'uri' must be an absolute URI. 1100 1101The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. 1102There is no restriction on the methods allowed. 1103 1104The 'body' is the entity body to be sent with the request. It is a string 1105object. 1106 1107Any extra headers that are to be sent with the request should be provided in the 1108'headers' dictionary. 1109 1110The maximum number of redirect to follow before raising an 1111exception is 'redirections. The default is 5. 1112 1113The return value is a tuple of (response, content), the first 1114being and instance of the 'Response' class, the second being 1115a string that contains the response entity body. 1116 """ 1117 try: 1118 if headers is None: 1119 headers = {} 1120 else: 1121 headers = self._normalize_headers(headers) 1122 1123 if 'user-agent' not in headers: 1124 headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__ 1125 1126 uri = iri2uri(uri) 1127 1128 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 1129 domain_port = authority.split(":")[0:2] 1130 if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http': 1131 scheme = 'https' 1132 authority = domain_port[0] 1133 1134 conn_key = scheme+":"+authority 1135 if conn_key in self.connections: 1136 conn = self.connections[conn_key] 1137 else: 1138 if not connection_type: 1139 connection_type = SCHEME_TO_CONNECTION[scheme] 1140 certs = list(self.certificates.iter(authority)) 1141 if issubclass(connection_type, HTTPSConnectionWithTimeout): 1142 if certs: 1143 conn = self.connections[conn_key] = connection_type( 1144 authority, key_file=certs[0][0], 1145 cert_file=certs[0][1], timeout=self.timeout, 1146 proxy_info=self.proxy_info, 1147 ca_certs=self.ca_certs, 1148 disable_ssl_certificate_validation= 1149 self.disable_ssl_certificate_validation) 1150 else: 1151 conn = self.connections[conn_key] = connection_type( 1152 authority, timeout=self.timeout, 1153 proxy_info=self.proxy_info, 1154 ca_certs=self.ca_certs, 1155 disable_ssl_certificate_validation= 1156 self.disable_ssl_certificate_validation) 1157 else: 1158 conn = self.connections[conn_key] = connection_type( 1159 authority, timeout=self.timeout, 1160 proxy_info=self.proxy_info) 1161 conn.set_debuglevel(debuglevel) 1162 1163 if 'range' not in headers and 'accept-encoding' not in headers: 1164 headers['accept-encoding'] = 'gzip, deflate' 1165 1166 info = email.message.Message() 1167 cached_value = None 1168 if self.cache: 1169 cachekey = defrag_uri 1170 cached_value = self.cache.get(cachekey) 1171 if cached_value: 1172 try: 1173 info, content = cached_value.split(b'\r\n\r\n', 1) 1174 info = email.message_from_bytes(info) 1175 for k, v in info.items(): 1176 if v.startswith('=?') and v.endswith('?='): 1177 info.replace_header(k, 1178 str(*email.header.decode_header(v)[0])) 1179 except (IndexError, ValueError): 1180 self.cache.delete(cachekey) 1181 cachekey = None 1182 cached_value = None 1183 else: 1184 cachekey = None 1185 1186 if method in self.optimistic_concurrency_methods and self.cache and 'etag' in info and not self.ignore_etag and 'if-match' not in headers: 1187 # http://www.w3.org/1999/04/Editing/ 1188 headers['if-match'] = info['etag'] 1189 1190 if method not in ["GET", "HEAD"] and self.cache and cachekey: 1191 # RFC 2616 Section 13.10 1192 self.cache.delete(cachekey) 1193 1194 # Check the vary header in the cache to see if this request 1195 # matches what varies in the cache. 1196 if method in ['GET', 'HEAD'] and 'vary' in info: 1197 vary = info['vary'] 1198 vary_headers = vary.lower().replace(' ', '').split(',') 1199 for header in vary_headers: 1200 key = '-varied-%s' % header 1201 value = info[key] 1202 if headers.get(header, None) != value: 1203 cached_value = None 1204 break 1205 1206 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: 1207 if '-x-permanent-redirect-url' in info: 1208 # Should cached permanent redirects be counted in our redirection count? For now, yes. 1209 if redirections <= 0: 1210 raise RedirectLimit("Redirected more times than redirection_limit allows.", {}, "") 1211 (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1) 1212 response.previous = Response(info) 1213 response.previous.fromcache = True 1214 else: 1215 # Determine our course of action: 1216 # Is the cached entry fresh or stale? 1217 # Has the client requested a non-cached response? 1218 # 1219 # There seems to be three possible answers: 1220 # 1. [FRESH] Return the cache entry w/o doing a GET 1221 # 2. [STALE] Do the GET (but add in cache validators if available) 1222 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 1223 entry_disposition = _entry_disposition(info, headers) 1224 1225 if entry_disposition == "FRESH": 1226 if not cached_value: 1227 info['status'] = '504' 1228 content = b"" 1229 response = Response(info) 1230 if cached_value: 1231 response.fromcache = True 1232 return (response, content) 1233 1234 if entry_disposition == "STALE": 1235 if 'etag' in info and not self.ignore_etag and not 'if-none-match' in headers: 1236 headers['if-none-match'] = info['etag'] 1237 if 'last-modified' in info and not 'last-modified' in headers: 1238 headers['if-modified-since'] = info['last-modified'] 1239 elif entry_disposition == "TRANSPARENT": 1240 pass 1241 1242 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) 1243 1244 if response.status == 304 and method == "GET": 1245 # Rewrite the cache entry with the new end-to-end headers 1246 # Take all headers that are in response 1247 # and overwrite their values in info. 1248 # unless they are hop-by-hop, or are listed in the connection header. 1249 1250 for key in _get_end2end_headers(response): 1251 info[key] = response[key] 1252 merged_response = Response(info) 1253 if hasattr(response, "_stale_digest"): 1254 merged_response._stale_digest = response._stale_digest 1255 _updateCache(headers, merged_response, content, self.cache, cachekey) 1256 response = merged_response 1257 response.status = 200 1258 response.fromcache = True 1259 1260 elif response.status == 200: 1261 content = new_content 1262 else: 1263 self.cache.delete(cachekey) 1264 content = new_content 1265 else: 1266 cc = _parse_cache_control(headers) 1267 if 'only-if-cached'in cc: 1268 info['status'] = '504' 1269 response = Response(info) 1270 content = b"" 1271 else: 1272 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) 1273 except Exception as e: 1274 if self.force_exception_to_status_code: 1275 if isinstance(e, HttpLib2ErrorWithResponse): 1276 response = e.response 1277 content = e.content 1278 response.status = 500 1279 response.reason = str(e) 1280 elif isinstance(e, socket.timeout): 1281 content = b"Request Timeout" 1282 response = Response({ 1283 "content-type": "text/plain", 1284 "status": "408", 1285 "content-length": len(content) 1286 }) 1287 response.reason = "Request Timeout" 1288 else: 1289 content = str(e).encode('utf-8') 1290 response = Response({ 1291 "content-type": "text/plain", 1292 "status": "400", 1293 "content-length": len(content) 1294 }) 1295 response.reason = "Bad Request" 1296 else: 1297 raise 1298 1299 1300 return (response, content) 1301 1302 1303 1304class Response(dict): 1305 """An object more like email.message than httplib.HTTPResponse.""" 1306 1307 """Is this response from our local cache""" 1308 fromcache = False 1309 1310 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """ 1311 version = 11 1312 1313 "Status code returned by server. " 1314 status = 200 1315 1316 """Reason phrase returned by server.""" 1317 reason = "Ok" 1318 1319 previous = None 1320 1321 def __init__(self, info): 1322 # info is either an email.message or 1323 # an httplib.HTTPResponse object. 1324 if isinstance(info, http.client.HTTPResponse): 1325 for key, value in info.getheaders(): 1326 key = key.lower() 1327 prev = self.get(key) 1328 if prev is not None: 1329 value = ', '.join((prev, value)) 1330 self[key] = value 1331 self.status = info.status 1332 self['status'] = str(self.status) 1333 self.reason = info.reason 1334 self.version = info.version 1335 elif isinstance(info, email.message.Message): 1336 for key, value in list(info.items()): 1337 self[key.lower()] = value 1338 self.status = int(self['status']) 1339 else: 1340 for key, value in info.items(): 1341 self[key.lower()] = value 1342 self.status = int(self.get('status', self.status)) 1343 1344 1345 def __getattr__(self, name): 1346 if name == 'dict': 1347 return self 1348 else: 1349 raise AttributeError(name) 1350