1
2"""
3httplib2
4
5A caching http interface that supports ETags and gzip
6to conserve bandwidth.
7
8Requires Python 3.0 or later
9
10Changelog:
112009-05-28, Pilgrim: ported to Python 3
122007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
13
14"""
15
16__author__ = "Joe Gregorio (joe@bitworking.org)"
17__copyright__ = "Copyright 2006, Joe Gregorio"
18__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
19    "James Antill",
20    "Xavier Verges Farrero",
21    "Jonathan Feinberg",
22    "Blair Zajac",
23    "Sam Ruby",
24    "Louis Nyffenegger",
25    "Mark Pilgrim"]
26__license__ = "MIT"
27__version__ = "0.7.7"
28
29import re
30import sys
31import email
32import email.utils
33import email.message
34import email.feedparser
35import io
36import gzip
37import zlib
38import http.client
39import urllib.parse
40import base64
41import os
42import copy
43import calendar
44import time
45import random
46import errno
47from hashlib import sha1 as _sha, md5 as _md5
48import hmac
49from gettext import gettext as _
50import socket
51import ssl
52_ssl_wrap_socket = ssl.wrap_socket
53
54try:
55    import socks
56except ImportError:
57    socks = None
58
59from .iri2uri import iri2uri
60
61def has_timeout(timeout):
62    if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
63        return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
64    return (timeout is not None)
65
66__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
67           'RedirectMissingLocation', 'RedirectLimit',
68           'FailedToDecompressContent', 'UnimplementedDigestAuthOptionError',
69           'UnimplementedHmacDigestAuthOptionError',
70           'debuglevel', 'RETRIES']
71
72
73# The httplib debug level, set to a non-zero value to get debug output
74debuglevel = 0
75
76# A request will be tried 'RETRIES' times if it fails at the socket/connection level.
77RETRIES = 2
78
79# All exceptions raised here derive from HttpLib2Error
80class HttpLib2Error(Exception): pass
81
82# Some exceptions can be caught and optionally
83# be turned back into responses.
84class HttpLib2ErrorWithResponse(HttpLib2Error):
85    def __init__(self, desc, response, content):
86        self.response = response
87        self.content = content
88        HttpLib2Error.__init__(self, desc)
89
90class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
91class RedirectLimit(HttpLib2ErrorWithResponse): pass
92class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
93class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
94class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
95
96class MalformedHeader(HttpLib2Error): pass
97class RelativeURIError(HttpLib2Error): pass
98class ServerNotFoundError(HttpLib2Error): pass
99class CertificateValidationUnsupportedInPython31(HttpLib2Error): pass
100
101# Open Items:
102# -----------
103# Proxy support
104
105# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
106
107# Pluggable cache storage (supports storing the cache in
108#   flat files by default. We need a plug-in architecture
109#   that can support Berkeley DB and Squid)
110
111# == Known Issues ==
112# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
113# Does not handle Cache-Control: max-stale
114# Does not use Age: headers when calculating cache freshness.
115
116
117# The number of redirections to follow before giving up.
118# Note that only GET redirects are automatically followed.
119# Will also honor 301 requests by saving that info and never
120# requesting that URI again.
121DEFAULT_MAX_REDIRECTS = 5
122
123# Which headers are hop-by-hop headers by default
124HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
125
126# Default CA certificates file bundled with httplib2.
127CA_CERTS = os.path.join(
128        os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")
129
130def _get_end2end_headers(response):
131    hopbyhop = list(HOP_BY_HOP)
132    hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
133    return [header for header in list(response.keys()) if header not in hopbyhop]
134
135URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
136
137def parse_uri(uri):
138    """Parses a URI using the regex given in Appendix B of RFC 3986.
139
140        (scheme, authority, path, query, fragment) = parse_uri(uri)
141    """
142    groups = URI.match(uri).groups()
143    return (groups[1], groups[3], groups[4], groups[6], groups[8])
144
145def urlnorm(uri):
146    (scheme, authority, path, query, fragment) = parse_uri(uri)
147    if not scheme or not authority:
148        raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
149    authority = authority.lower()
150    scheme = scheme.lower()
151    if not path:
152        path = "/"
153    # Could do syntax based normalization of the URI before
154    # computing the digest. See Section 6.2.2 of Std 66.
155    request_uri = query and "?".join([path, query]) or path
156    scheme = scheme.lower()
157    defrag_uri = scheme + "://" + authority + request_uri
158    return scheme, authority, request_uri, defrag_uri
159
160
161# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
162re_url_scheme    = re.compile(br'^\w+://')
163re_url_scheme_s  = re.compile(r'^\w+://')
164re_slash         = re.compile(br'[?/:|]+')
165
166def safename(filename):
167    """Return a filename suitable for the cache.
168
169    Strips dangerous and common characters to create a filename we
170    can use to store the cache in.
171    """
172
173    try:
174        if re_url_scheme_s.match(filename):
175            if isinstance(filename,bytes):
176                filename = filename.decode('utf-8')
177                filename = filename.encode('idna')
178            else:
179                filename = filename.encode('idna')
180    except UnicodeError:
181        pass
182    if isinstance(filename,str):
183        filename=filename.encode('utf-8')
184    filemd5 = _md5(filename).hexdigest().encode('utf-8')
185    filename = re_url_scheme.sub(b"", filename)
186    filename = re_slash.sub(b",", filename)
187
188    # limit length of filename
189    if len(filename)>200:
190        filename=filename[:200]
191    return b",".join((filename, filemd5)).decode('utf-8')
192
193NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
194def _normalize_headers(headers):
195    return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip())  for (key, value) in headers.items()])
196
197def _parse_cache_control(headers):
198    retval = {}
199    if 'cache-control' in headers:
200        parts =  headers['cache-control'].split(',')
201        parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
202        parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
203        retval = dict(parts_with_args + parts_wo_args)
204    return retval
205
206# Whether to use a strict mode to parse WWW-Authenticate headers
207# Might lead to bad results in case of ill-formed header value,
208# so disabled by default, falling back to relaxed parsing.
209# Set to true to turn on, usefull for testing servers.
210USE_WWW_AUTH_STRICT_PARSING = 0
211
212# In regex below:
213#    [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+             matches a "token" as defined by HTTP
214#    "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?"    matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
215# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
216#    \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
217WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
218WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
219UNQUOTE_PAIRS = re.compile(r'\\(.)')
220def _parse_www_authenticate(headers, headername='www-authenticate'):
221    """Returns a dictionary of dictionaries, one dict
222    per auth_scheme."""
223    retval = {}
224    if headername in headers:
225        try:
226            authenticate = headers[headername].strip()
227            www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
228            while authenticate:
229                # Break off the scheme at the beginning of the line
230                if headername == 'authentication-info':
231                    (auth_scheme, the_rest) = ('digest', authenticate)
232                else:
233                    (auth_scheme, the_rest) = authenticate.split(" ", 1)
234                # Now loop over all the key value pairs that come after the scheme,
235                # being careful not to roll into the next scheme
236                match = www_auth.search(the_rest)
237                auth_params = {}
238                while match:
239                    if match and len(match.groups()) == 3:
240                        (key, value, the_rest) = match.groups()
241                        auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
242                    match = www_auth.search(the_rest)
243                retval[auth_scheme.lower()] = auth_params
244                authenticate = the_rest.strip()
245        except ValueError:
246            raise MalformedHeader("WWW-Authenticate")
247    return retval
248
249
250def _entry_disposition(response_headers, request_headers):
251    """Determine freshness from the Date, Expires and Cache-Control headers.
252
253    We don't handle the following:
254
255    1. Cache-Control: max-stale
256    2. Age: headers are not used in the calculations.
257
258    Not that this algorithm is simpler than you might think
259    because we are operating as a private (non-shared) cache.
260    This lets us ignore 's-maxage'. We can also ignore
261    'proxy-invalidate' since we aren't a proxy.
262    We will never return a stale document as
263    fresh as a design decision, and thus the non-implementation
264    of 'max-stale'. This also lets us safely ignore 'must-revalidate'
265    since we operate as if every server has sent 'must-revalidate'.
266    Since we are private we get to ignore both 'public' and
267    'private' parameters. We also ignore 'no-transform' since
268    we don't do any transformations.
269    The 'no-store' parameter is handled at a higher level.
270    So the only Cache-Control parameters we look at are:
271
272    no-cache
273    only-if-cached
274    max-age
275    min-fresh
276    """
277
278    retval = "STALE"
279    cc = _parse_cache_control(request_headers)
280    cc_response = _parse_cache_control(response_headers)
281
282    if 'pragma' in request_headers and request_headers['pragma'].lower().find('no-cache') != -1:
283        retval = "TRANSPARENT"
284        if 'cache-control' not in request_headers:
285            request_headers['cache-control'] = 'no-cache'
286    elif 'no-cache' in cc:
287        retval = "TRANSPARENT"
288    elif 'no-cache' in cc_response:
289        retval = "STALE"
290    elif 'only-if-cached' in cc:
291        retval = "FRESH"
292    elif 'date' in response_headers:
293        date = calendar.timegm(email.utils.parsedate_tz(response_headers['date']))
294        now = time.time()
295        current_age = max(0, now - date)
296        if 'max-age' in cc_response:
297            try:
298                freshness_lifetime = int(cc_response['max-age'])
299            except ValueError:
300                freshness_lifetime = 0
301        elif 'expires' in response_headers:
302            expires = email.utils.parsedate_tz(response_headers['expires'])
303            if None == expires:
304                freshness_lifetime = 0
305            else:
306                freshness_lifetime = max(0, calendar.timegm(expires) - date)
307        else:
308            freshness_lifetime = 0
309        if 'max-age' in cc:
310            try:
311                freshness_lifetime = int(cc['max-age'])
312            except ValueError:
313                freshness_lifetime = 0
314        if 'min-fresh' in cc:
315            try:
316                min_fresh = int(cc['min-fresh'])
317            except ValueError:
318                min_fresh = 0
319            current_age += min_fresh
320        if freshness_lifetime > current_age:
321            retval = "FRESH"
322    return retval
323
324def _decompressContent(response, new_content):
325    content = new_content
326    try:
327        encoding = response.get('content-encoding', None)
328        if encoding in ['gzip', 'deflate']:
329            if encoding == 'gzip':
330                content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read()
331            if encoding == 'deflate':
332                content = zlib.decompress(content)
333            response['content-length'] = str(len(content))
334            # Record the historical presence of the encoding in a way the won't interfere.
335            response['-content-encoding'] = response['content-encoding']
336            del response['content-encoding']
337    except IOError:
338        content = ""
339        raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
340    return content
341
342def _bind_write_headers(msg):
343  from email.header import Header
344  def _write_headers(self):
345      # Self refers to the Generator object
346      for h, v in msg.items():
347          print('%s:' % h, end=' ', file=self._fp)
348          if isinstance(v, Header):
349              print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
350          else:
351              # Header's got lots of smarts, so use it.
352              header = Header(v, maxlinelen=self._maxheaderlen, charset='utf-8',
353                              header_name=h)
354              print(header.encode(), file=self._fp)
355      # A blank line always separates headers from body
356      print(file=self._fp)
357  return _write_headers
358
359def _updateCache(request_headers, response_headers, content, cache, cachekey):
360    if cachekey:
361        cc = _parse_cache_control(request_headers)
362        cc_response = _parse_cache_control(response_headers)
363        if 'no-store' in cc or 'no-store' in cc_response:
364            cache.delete(cachekey)
365        else:
366            info = email.message.Message()
367            for key, value in response_headers.items():
368                if key not in ['status','content-encoding','transfer-encoding']:
369                    info[key] = value
370
371            # Add annotations to the cache to indicate what headers
372            # are variant for this request.
373            vary = response_headers.get('vary', None)
374            if vary:
375                vary_headers = vary.lower().replace(' ', '').split(',')
376                for header in vary_headers:
377                    key = '-varied-%s' % header
378                    try:
379                        info[key] = request_headers[header]
380                    except KeyError:
381                        pass
382
383            status = response_headers.status
384            if status == 304:
385                status = 200
386
387            status_header = 'status: %d\r\n' % status
388
389            try:
390                header_str = info.as_string()
391            except UnicodeEncodeError:
392                setattr(info, '_write_headers', _bind_write_headers(info))
393                header_str = info.as_string()
394
395            header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
396            text = b"".join([status_header.encode('utf-8'), header_str.encode('utf-8'), content])
397
398            cache.set(cachekey, text)
399
400def _cnonce():
401    dig = _md5(("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode('utf-8')).hexdigest()
402    return dig[:16]
403
404def _wsse_username_token(cnonce, iso_now, password):
405    return base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode('utf-8')).digest()).strip()
406
407
408# For credentials we need two things, first
409# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
410# Then we also need a list of URIs that have already demanded authentication
411# That list is tricky since sub-URIs can take the same auth, or the
412# auth scheme may change as you descend the tree.
413# So we also need each Auth instance to be able to tell us
414# how close to the 'top' it is.
415
416class Authentication(object):
417    def __init__(self, credentials, host, request_uri, headers, response, content, http):
418        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
419        self.path = path
420        self.host = host
421        self.credentials = credentials
422        self.http = http
423
424    def depth(self, request_uri):
425        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
426        return request_uri[len(self.path):].count("/")
427
428    def inscope(self, host, request_uri):
429        # XXX Should we normalize the request_uri?
430        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
431        return (host == self.host) and path.startswith(self.path)
432
433    def request(self, method, request_uri, headers, content):
434        """Modify the request headers to add the appropriate
435        Authorization header. Over-rise this in sub-classes."""
436        pass
437
438    def response(self, response, content):
439        """Gives us a chance to update with new nonces
440        or such returned from the last authorized response.
441        Over-rise this in sub-classes if necessary.
442
443        Return TRUE is the request is to be retried, for
444        example Digest may return stale=true.
445        """
446        return False
447
448    def __eq__(self, auth):
449        return False
450
451    def __ne__(self, auth):
452        return True
453
454    def __lt__(self, auth):
455        return True
456
457    def __gt__(self, auth):
458        return False
459
460    def __le__(self, auth):
461        return True
462
463    def __ge__(self, auth):
464        return False
465
466    def __bool__(self):
467        return True
468
469
470class BasicAuthentication(Authentication):
471    def __init__(self, credentials, host, request_uri, headers, response, content, http):
472        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
473
474    def request(self, method, request_uri, headers, content):
475        """Modify the request headers to add the appropriate
476        Authorization header."""
477        headers['authorization'] = 'Basic ' + base64.b64encode(("%s:%s" % self.credentials).encode('utf-8')).strip().decode('utf-8')
478
479
480class DigestAuthentication(Authentication):
481    """Only do qop='auth' and MD5, since that
482    is all Apache currently implements"""
483    def __init__(self, credentials, host, request_uri, headers, response, content, http):
484        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
485        challenge = _parse_www_authenticate(response, 'www-authenticate')
486        self.challenge = challenge['digest']
487        qop = self.challenge.get('qop', 'auth')
488        self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
489        if self.challenge['qop'] is None:
490            raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
491        self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
492        if self.challenge['algorithm'] != 'MD5':
493            raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
494        self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
495        self.challenge['nc'] = 1
496
497    def request(self, method, request_uri, headers, content, cnonce = None):
498        """Modify the request headers"""
499        H = lambda x: _md5(x.encode('utf-8')).hexdigest()
500        KD = lambda s, d: H("%s:%s" % (s, d))
501        A2 = "".join([method, ":", request_uri])
502        self.challenge['cnonce'] = cnonce or _cnonce()
503        request_digest  = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (
504                self.challenge['nonce'],
505                '%08x' % self.challenge['nc'],
506                self.challenge['cnonce'],
507                self.challenge['qop'], H(A2)))
508        headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
509                self.credentials[0],
510                self.challenge['realm'],
511                self.challenge['nonce'],
512                request_uri,
513                self.challenge['algorithm'],
514                request_digest,
515                self.challenge['qop'],
516                self.challenge['nc'],
517                self.challenge['cnonce'])
518        if self.challenge.get('opaque'):
519            headers['authorization'] += ', opaque="%s"' % self.challenge['opaque']
520        self.challenge['nc'] += 1
521
522    def response(self, response, content):
523        if 'authentication-info' not in response:
524            challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
525            if 'true' == challenge.get('stale'):
526                self.challenge['nonce'] = challenge['nonce']
527                self.challenge['nc'] = 1
528                return True
529        else:
530            updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
531
532            if 'nextnonce' in updated_challenge:
533                self.challenge['nonce'] = updated_challenge['nextnonce']
534                self.challenge['nc'] = 1
535        return False
536
537
538class HmacDigestAuthentication(Authentication):
539    """Adapted from Robert Sayre's code and DigestAuthentication above."""
540    __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
541
542    def __init__(self, credentials, host, request_uri, headers, response, content, http):
543        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
544        challenge = _parse_www_authenticate(response, 'www-authenticate')
545        self.challenge = challenge['hmacdigest']
546        # TODO: self.challenge['domain']
547        self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
548        if self.challenge['reason'] not in ['unauthorized', 'integrity']:
549            self.challenge['reason'] = 'unauthorized'
550        self.challenge['salt'] = self.challenge.get('salt', '')
551        if not self.challenge.get('snonce'):
552            raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
553        self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
554        if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
555            raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
556        self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
557        if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
558            raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
559        if self.challenge['algorithm'] == 'HMAC-MD5':
560            self.hashmod = _md5
561        else:
562            self.hashmod = _sha
563        if self.challenge['pw-algorithm'] == 'MD5':
564            self.pwhashmod = _md5
565        else:
566            self.pwhashmod = _sha
567        self.key = "".join([self.credentials[0], ":",
568                            self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
569                            ":", self.challenge['realm']])
570        self.key = self.pwhashmod.new(self.key).hexdigest().lower()
571
572    def request(self, method, request_uri, headers, content):
573        """Modify the request headers"""
574        keys = _get_end2end_headers(headers)
575        keylist = "".join(["%s " % k for k in keys])
576        headers_val = "".join([headers[k] for k in keys])
577        created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
578        cnonce = _cnonce()
579        request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
580        request_digest  = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
581        headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
582                self.credentials[0],
583                self.challenge['realm'],
584                self.challenge['snonce'],
585                cnonce,
586                request_uri,
587                created,
588                request_digest,
589                keylist)
590
591    def response(self, response, content):
592        challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
593        if challenge.get('reason') in ['integrity', 'stale']:
594            return True
595        return False
596
597
598class WsseAuthentication(Authentication):
599    """This is thinly tested and should not be relied upon.
600    At this time there isn't any third party server to test against.
601    Blogger and TypePad implemented this algorithm at one point
602    but Blogger has since switched to Basic over HTTPS and
603    TypePad has implemented it wrong, by never issuing a 401
604    challenge but instead requiring your client to telepathically know that
605    their endpoint is expecting WSSE profile="UsernameToken"."""
606    def __init__(self, credentials, host, request_uri, headers, response, content, http):
607        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
608
609    def request(self, method, request_uri, headers, content):
610        """Modify the request headers to add the appropriate
611        Authorization header."""
612        headers['authorization'] = 'WSSE profile="UsernameToken"'
613        iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
614        cnonce = _cnonce()
615        password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
616        headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
617                self.credentials[0],
618                password_digest,
619                cnonce,
620                iso_now)
621
622class GoogleLoginAuthentication(Authentication):
623    def __init__(self, credentials, host, request_uri, headers, response, content, http):
624        from urllib.parse import urlencode
625        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
626        challenge = _parse_www_authenticate(response, 'www-authenticate')
627        service = challenge['googlelogin'].get('service', 'xapi')
628        # Bloggger actually returns the service in the challenge
629        # For the rest we guess based on the URI
630        if service == 'xapi' and  request_uri.find("calendar") > 0:
631            service = "cl"
632        # No point in guessing Base or Spreadsheet
633        #elif request_uri.find("spreadsheets") > 0:
634        #    service = "wise"
635
636        auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
637        resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
638        lines = content.split('\n')
639        d = dict([tuple(line.split("=", 1)) for line in lines if line])
640        if resp.status == 403:
641            self.Auth = ""
642        else:
643            self.Auth = d['Auth']
644
645    def request(self, method, request_uri, headers, content):
646        """Modify the request headers to add the appropriate
647        Authorization header."""
648        headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
649
650
651AUTH_SCHEME_CLASSES = {
652    "basic": BasicAuthentication,
653    "wsse": WsseAuthentication,
654    "digest": DigestAuthentication,
655    "hmacdigest": HmacDigestAuthentication,
656    "googlelogin": GoogleLoginAuthentication
657}
658
659AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
660
661class FileCache(object):
662    """Uses a local directory as a store for cached files.
663    Not really safe to use if multiple threads or processes are going to
664    be running on the same cache.
665    """
666    def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
667        self.cache = cache
668        self.safe = safe
669        if not os.path.exists(cache):
670            os.makedirs(self.cache)
671
672    def get(self, key):
673        retval = None
674        cacheFullPath = os.path.join(self.cache, self.safe(key))
675        try:
676            f = open(cacheFullPath, "rb")
677            retval = f.read()
678            f.close()
679        except IOError:
680            pass
681        return retval
682
683    def set(self, key, value):
684        cacheFullPath = os.path.join(self.cache, self.safe(key))
685        f = open(cacheFullPath, "wb")
686        f.write(value)
687        f.close()
688
689    def delete(self, key):
690        cacheFullPath = os.path.join(self.cache, self.safe(key))
691        if os.path.exists(cacheFullPath):
692            os.remove(cacheFullPath)
693
694class Credentials(object):
695    def __init__(self):
696        self.credentials = []
697
698    def add(self, name, password, domain=""):
699        self.credentials.append((domain.lower(), name, password))
700
701    def clear(self):
702        self.credentials = []
703
704    def iter(self, domain):
705        for (cdomain, name, password) in self.credentials:
706            if cdomain == "" or domain == cdomain:
707                yield (name, password)
708
709class KeyCerts(Credentials):
710    """Identical to Credentials except that
711    name/password are mapped to key/cert."""
712    pass
713
714
715class ProxyInfo(object):
716  """Collect information required to use a proxy."""
717  def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
718      """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
719      constants. For example:
720
721p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
722      """
723      self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
724
725  def astuple(self):
726    return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
727            self.proxy_user, self.proxy_pass)
728
729  def isgood(self):
730    return socks and (self.proxy_host != None) and (self.proxy_port != None)
731
732
733def proxy_info_from_environment(method='http'):
734    """
735    Read proxy info from the environment variables.
736    """
737    if method not in ('http', 'https'):
738        return
739
740    env_var = method + '_proxy'
741    url = os.environ.get(env_var, os.environ.get(env_var.upper()))
742    if not url:
743        return
744    return proxy_info_from_url(url, method)
745
746
747def proxy_info_from_url(url, method='http'):
748    """
749    Construct a ProxyInfo from a URL (such as http_proxy env var)
750    """
751    url = urllib.parse.urlparse(url)
752    username = None
753    password = None
754    port = None
755    if '@' in url[1]:
756        ident, host_port = url[1].split('@', 1)
757        if ':' in ident:
758            username, password = ident.split(':', 1)
759        else:
760            password = ident
761    else:
762        host_port = url[1]
763    if ':' in host_port:
764        host, port = host_port.split(':', 1)
765    else:
766        host = host_port
767
768    if port:
769        port = int(port)
770    else:
771        port = dict(https=443, http=80)[method]
772
773    proxy_type = 3 # socks.PROXY_TYPE_HTTP
774    return ProxyInfo(
775        proxy_type = proxy_type,
776        proxy_host = host,
777        proxy_port = port,
778        proxy_user = username or None,
779        proxy_pass = password or None,
780    )
781
782
783class HTTPConnectionWithTimeout(http.client.HTTPConnection):
784    """HTTPConnection subclass that supports timeouts
785
786    HTTPConnection subclass that supports timeouts
787
788    All timeouts are in seconds. If None is passed for timeout then
789    Python's default timeout for sockets will be used. See for example
790    the docs of socket.setdefaulttimeout():
791    http://docs.python.org/library/socket.html#socket.setdefaulttimeout
792    """
793
794    def __init__(self, host, port=None, timeout=None, proxy_info=None):
795        http.client.HTTPConnection.__init__(self, host, port=port,
796                                            timeout=timeout)
797        self.proxy_info = proxy_info
798
799
800class HTTPSConnectionWithTimeout(http.client.HTTPSConnection):
801    """
802    This class allows communication via SSL.
803
804    All timeouts are in seconds. If None is passed for timeout then
805    Python's default timeout for sockets will be used. See for example
806    the docs of socket.setdefaulttimeout():
807    http://docs.python.org/library/socket.html#socket.setdefaulttimeout
808    """
809
810    def __init__(self, host, port=None, key_file=None, cert_file=None,
811                 timeout=None, proxy_info=None,
812                 ca_certs=None, disable_ssl_certificate_validation=False):
813        self.proxy_info = proxy_info
814        context = None
815        if ca_certs is None:
816            ca_certs = CA_CERTS
817        if (cert_file or ca_certs) and not disable_ssl_certificate_validation:
818            if not hasattr(ssl, 'SSLContext'):
819                raise CertificateValidationUnsupportedInPython31()
820            context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
821            context.verify_mode = ssl.CERT_REQUIRED
822            if cert_file:
823                context.load_cert_chain(cert_file, key_file)
824            if ca_certs:
825                context.load_verify_locations(ca_certs)
826        http.client.HTTPSConnection.__init__(
827                self, host, port=port, key_file=key_file,
828                cert_file=cert_file, timeout=timeout, context=context,
829                check_hostname=True)
830
831
832SCHEME_TO_CONNECTION = {
833    'http': HTTPConnectionWithTimeout,
834    'https': HTTPSConnectionWithTimeout,
835}
836
837class Http(object):
838    """An HTTP client that handles:
839
840    - all methods
841    - caching
842    - ETags
843    - compression,
844    - HTTPS
845    - Basic
846    - Digest
847    - WSSE
848
849    and more.
850    """
851    def __init__(self, cache=None, timeout=None,
852                 proxy_info=proxy_info_from_environment,
853                 ca_certs=None, disable_ssl_certificate_validation=False):
854        """If 'cache' is a string then it is used as a directory name for
855        a disk cache. Otherwise it must be an object that supports the
856        same interface as FileCache.
857
858        All timeouts are in seconds. If None is passed for timeout
859        then Python's default timeout for sockets will be used. See
860        for example the docs of socket.setdefaulttimeout():
861        http://docs.python.org/library/socket.html#socket.setdefaulttimeout
862
863        `proxy_info` may be:
864          - a callable that takes the http scheme ('http' or 'https') and
865            returns a ProxyInfo instance per request. By default, uses
866            proxy_info_from_environment.
867          - a ProxyInfo instance (static proxy config).
868          - None (proxy disabled).
869
870        ca_certs is the path of a file containing root CA certificates for SSL
871        server certificate validation.  By default, a CA cert file bundled with
872        httplib2 is used.
873
874        If disable_ssl_certificate_validation is true, SSL cert validation will
875        not be performed.
876"""
877        self.proxy_info = proxy_info
878        self.ca_certs = ca_certs
879        self.disable_ssl_certificate_validation = \
880                disable_ssl_certificate_validation
881        # Map domain name to an httplib connection
882        self.connections = {}
883        # The location of the cache, for now a directory
884        # where cached responses are held.
885        if cache and isinstance(cache, str):
886            self.cache = FileCache(cache)
887        else:
888            self.cache = cache
889
890        # Name/password
891        self.credentials = Credentials()
892
893        # Key/cert
894        self.certificates = KeyCerts()
895
896        # authorization objects
897        self.authorizations = []
898
899        # If set to False then no redirects are followed, even safe ones.
900        self.follow_redirects = True
901
902        # Which HTTP methods do we apply optimistic concurrency to, i.e.
903        # which methods get an "if-match:" etag header added to them.
904        self.optimistic_concurrency_methods = ["PUT", "PATCH"]
905
906        # If 'follow_redirects' is True, and this is set to True then
907        # all redirecs are followed, including unsafe ones.
908        self.follow_all_redirects = False
909
910        self.ignore_etag = False
911
912        self.force_exception_to_status_code = False
913
914        self.timeout = timeout
915
916        # Keep Authorization: headers on a redirect.
917        self.forward_authorization_headers = False
918
919    def __getstate__(self):
920        state_dict = copy.copy(self.__dict__)
921        # In case request is augmented by some foreign object such as
922        # credentials which handle auth
923        if 'request' in state_dict:
924            del state_dict['request']
925        if 'connections' in state_dict:
926            del state_dict['connections']
927        return state_dict
928
929    def __setstate__(self, state):
930        self.__dict__.update(state)
931        self.connections = {}
932
933    def _auth_from_challenge(self, host, request_uri, headers, response, content):
934        """A generator that creates Authorization objects
935           that can be applied to requests.
936        """
937        challenges = _parse_www_authenticate(response, 'www-authenticate')
938        for cred in self.credentials.iter(host):
939            for scheme in AUTH_SCHEME_ORDER:
940                if scheme in challenges:
941                    yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
942
943    def add_credentials(self, name, password, domain=""):
944        """Add a name and password that will be used
945        any time a request requires authentication."""
946        self.credentials.add(name, password, domain)
947
948    def add_certificate(self, key, cert, domain):
949        """Add a key and cert that will be used
950        any time a request requires authentication."""
951        self.certificates.add(key, cert, domain)
952
953    def clear_credentials(self):
954        """Remove all the names and passwords
955        that are used for authentication"""
956        self.credentials.clear()
957        self.authorizations = []
958
959    def _conn_request(self, conn, request_uri, method, body, headers):
960        for i in range(RETRIES):
961            try:
962                if conn.sock is None:
963                    conn.connect()
964                conn.request(method, request_uri, body, headers)
965            except socket.timeout:
966                conn.close()
967                raise
968            except socket.gaierror:
969                conn.close()
970                raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
971            except socket.error as e:
972                errno_ = (e.args[0].errno if isinstance(e.args[0], socket.error) else e.errno)
973                if errno_ == errno.ECONNREFUSED: # Connection refused
974                    raise
975            except http.client.HTTPException:
976                if conn.sock is None:
977                    if i < RETRIES-1:
978                        conn.close()
979                        conn.connect()
980                        continue
981                    else:
982                        conn.close()
983                        raise
984                if i < RETRIES-1:
985                    conn.close()
986                    conn.connect()
987                    continue
988                # Just because the server closed the connection doesn't apparently mean
989                # that the server didn't send a response.
990                pass
991            try:
992                response = conn.getresponse()
993            except socket.timeout:
994                raise
995            except (socket.error, http.client.HTTPException):
996                conn.close()
997                if i == 0:
998                    conn.close()
999                    conn.connect()
1000                    continue
1001                else:
1002                    raise
1003            else:
1004                content = b""
1005                if method == "HEAD":
1006                    conn.close()
1007                else:
1008                    content = response.read()
1009                response = Response(response)
1010                if method != "HEAD":
1011                    content = _decompressContent(response, content)
1012
1013            break
1014        return (response, content)
1015
1016
1017    def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
1018        """Do the actual request using the connection object
1019        and also follow one level of redirects if necessary"""
1020
1021        auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1022        auth = auths and sorted(auths)[0][1] or None
1023        if auth:
1024            auth.request(method, request_uri, headers, body)
1025
1026        (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1027
1028        if auth:
1029            if auth.response(response, body):
1030                auth.request(method, request_uri, headers, body)
1031                (response, content) = self._conn_request(conn, request_uri, method, body, headers )
1032                response._stale_digest = 1
1033
1034        if response.status == 401:
1035            for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1036                authorization.request(method, request_uri, headers, body)
1037                (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
1038                if response.status != 401:
1039                    self.authorizations.append(authorization)
1040                    authorization.response(response, body)
1041                    break
1042
1043        if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
1044            if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
1045                # Pick out the location header and basically start from the beginning
1046                # remembering first to strip the ETag header and decrement our 'depth'
1047                if redirections:
1048                    if 'location' not in response and response.status != 300:
1049                        raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
1050                    # Fix-up relative redirects (which violate an RFC 2616 MUST)
1051                    if 'location' in response:
1052                        location = response['location']
1053                        (scheme, authority, path, query, fragment) = parse_uri(location)
1054                        if authority == None:
1055                            response['location'] = urllib.parse.urljoin(absolute_uri, location)
1056                    if response.status == 301 and method in ["GET", "HEAD"]:
1057                        response['-x-permanent-redirect-url'] = response['location']
1058                        if 'content-location' not in response:
1059                            response['content-location'] = absolute_uri
1060                        _updateCache(headers, response, content, self.cache, cachekey)
1061                    if 'if-none-match' in headers:
1062                        del headers['if-none-match']
1063                    if 'if-modified-since' in headers:
1064                        del headers['if-modified-since']
1065                    if 'authorization' in headers and not self.forward_authorization_headers:
1066                        del headers['authorization']
1067                    if 'location' in response:
1068                        location = response['location']
1069                        old_response = copy.deepcopy(response)
1070                        if 'content-location' not in old_response:
1071                            old_response['content-location'] = absolute_uri
1072                        redirect_method = method
1073                        if response.status in [302, 303]:
1074                          redirect_method = "GET"
1075                          body = None
1076                        (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
1077                        response.previous = old_response
1078                else:
1079                    raise RedirectLimit("Redirected more times than redirection_limit allows.", response, content)
1080            elif response.status in [200, 203] and method in ["GET", "HEAD"]:
1081                # Don't cache 206's since we aren't going to handle byte range requests
1082                if 'content-location' not in response:
1083                    response['content-location'] = absolute_uri
1084                _updateCache(headers, response, content, self.cache, cachekey)
1085
1086        return (response, content)
1087
1088    def _normalize_headers(self, headers):
1089        return _normalize_headers(headers)
1090
1091# Need to catch and rebrand some exceptions
1092# Then need to optionally turn all exceptions into status codes
1093# including all socket.* and httplib.* exceptions.
1094
1095
1096    def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
1097        """ Performs a single HTTP request.
1098The 'uri' is the URI of the HTTP resource and can begin
1099with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1100
1101The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1102There is no restriction on the methods allowed.
1103
1104The 'body' is the entity body to be sent with the request. It is a string
1105object.
1106
1107Any extra headers that are to be sent with the request should be provided in the
1108'headers' dictionary.
1109
1110The maximum number of redirect to follow before raising an
1111exception is 'redirections. The default is 5.
1112
1113The return value is a tuple of (response, content), the first
1114being and instance of the 'Response' class, the second being
1115a string that contains the response entity body.
1116        """
1117        try:
1118            if headers is None:
1119                headers = {}
1120            else:
1121                headers = self._normalize_headers(headers)
1122
1123            if 'user-agent' not in headers:
1124                headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__
1125
1126            uri = iri2uri(uri)
1127
1128            (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1129            domain_port = authority.split(":")[0:2]
1130            if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
1131                scheme = 'https'
1132                authority = domain_port[0]
1133
1134            conn_key = scheme+":"+authority
1135            if conn_key in self.connections:
1136                conn = self.connections[conn_key]
1137            else:
1138                if not connection_type:
1139                    connection_type = SCHEME_TO_CONNECTION[scheme]
1140                certs = list(self.certificates.iter(authority))
1141                if issubclass(connection_type, HTTPSConnectionWithTimeout):
1142                    if certs:
1143                        conn = self.connections[conn_key] = connection_type(
1144                                authority, key_file=certs[0][0],
1145                                cert_file=certs[0][1], timeout=self.timeout,
1146                                proxy_info=self.proxy_info,
1147                                ca_certs=self.ca_certs,
1148                                disable_ssl_certificate_validation=
1149                                        self.disable_ssl_certificate_validation)
1150                    else:
1151                        conn = self.connections[conn_key] = connection_type(
1152                                authority, timeout=self.timeout,
1153                                proxy_info=self.proxy_info,
1154                                ca_certs=self.ca_certs,
1155                                disable_ssl_certificate_validation=
1156                                        self.disable_ssl_certificate_validation)
1157                else:
1158                    conn = self.connections[conn_key] = connection_type(
1159                            authority, timeout=self.timeout,
1160                            proxy_info=self.proxy_info)
1161                conn.set_debuglevel(debuglevel)
1162
1163            if 'range' not in headers and 'accept-encoding' not in headers:
1164                headers['accept-encoding'] = 'gzip, deflate'
1165
1166            info = email.message.Message()
1167            cached_value = None
1168            if self.cache:
1169                cachekey = defrag_uri
1170                cached_value = self.cache.get(cachekey)
1171                if cached_value:
1172                    try:
1173                        info, content = cached_value.split(b'\r\n\r\n', 1)
1174                        info = email.message_from_bytes(info)
1175                        for k, v in info.items():
1176                          if v.startswith('=?') and v.endswith('?='):
1177                            info.replace_header(k,
1178                                                str(*email.header.decode_header(v)[0]))
1179                    except (IndexError, ValueError):
1180                        self.cache.delete(cachekey)
1181                        cachekey = None
1182                        cached_value = None
1183            else:
1184                cachekey = None
1185
1186            if method in self.optimistic_concurrency_methods and self.cache and 'etag' in info and not self.ignore_etag and 'if-match' not in headers:
1187                # http://www.w3.org/1999/04/Editing/
1188                headers['if-match'] = info['etag']
1189
1190            if method not in ["GET", "HEAD"] and self.cache and cachekey:
1191                # RFC 2616 Section 13.10
1192                self.cache.delete(cachekey)
1193
1194            # Check the vary header in the cache to see if this request
1195            # matches what varies in the cache.
1196            if method in ['GET', 'HEAD'] and 'vary' in info:
1197                vary = info['vary']
1198                vary_headers = vary.lower().replace(' ', '').split(',')
1199                for header in vary_headers:
1200                    key = '-varied-%s' % header
1201                    value = info[key]
1202                    if headers.get(header, None) != value:
1203                            cached_value = None
1204                            break
1205
1206            if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
1207                if '-x-permanent-redirect-url' in info:
1208                    # Should cached permanent redirects be counted in our redirection count? For now, yes.
1209                    if redirections <= 0:
1210                        raise RedirectLimit("Redirected more times than redirection_limit allows.", {}, "")
1211                    (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
1212                    response.previous = Response(info)
1213                    response.previous.fromcache = True
1214                else:
1215                    # Determine our course of action:
1216                    #   Is the cached entry fresh or stale?
1217                    #   Has the client requested a non-cached response?
1218                    #
1219                    # There seems to be three possible answers:
1220                    # 1. [FRESH] Return the cache entry w/o doing a GET
1221                    # 2. [STALE] Do the GET (but add in cache validators if available)
1222                    # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1223                    entry_disposition = _entry_disposition(info, headers)
1224
1225                    if entry_disposition == "FRESH":
1226                        if not cached_value:
1227                            info['status'] = '504'
1228                            content = b""
1229                        response = Response(info)
1230                        if cached_value:
1231                            response.fromcache = True
1232                        return (response, content)
1233
1234                    if entry_disposition == "STALE":
1235                        if 'etag' in info and not self.ignore_etag and not 'if-none-match' in headers:
1236                            headers['if-none-match'] = info['etag']
1237                        if 'last-modified' in info and not 'last-modified' in headers:
1238                            headers['if-modified-since'] = info['last-modified']
1239                    elif entry_disposition == "TRANSPARENT":
1240                        pass
1241
1242                    (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1243
1244                if response.status == 304 and method == "GET":
1245                    # Rewrite the cache entry with the new end-to-end headers
1246                    # Take all headers that are in response
1247                    # and overwrite their values in info.
1248                    # unless they are hop-by-hop, or are listed in the connection header.
1249
1250                    for key in _get_end2end_headers(response):
1251                        info[key] = response[key]
1252                    merged_response = Response(info)
1253                    if hasattr(response, "_stale_digest"):
1254                        merged_response._stale_digest = response._stale_digest
1255                    _updateCache(headers, merged_response, content, self.cache, cachekey)
1256                    response = merged_response
1257                    response.status = 200
1258                    response.fromcache = True
1259
1260                elif response.status == 200:
1261                    content = new_content
1262                else:
1263                    self.cache.delete(cachekey)
1264                    content = new_content
1265            else:
1266                cc = _parse_cache_control(headers)
1267                if 'only-if-cached'in cc:
1268                    info['status'] = '504'
1269                    response = Response(info)
1270                    content = b""
1271                else:
1272                    (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1273        except Exception as e:
1274            if self.force_exception_to_status_code:
1275                if isinstance(e, HttpLib2ErrorWithResponse):
1276                    response = e.response
1277                    content = e.content
1278                    response.status = 500
1279                    response.reason = str(e)
1280                elif isinstance(e, socket.timeout):
1281                    content = b"Request Timeout"
1282                    response = Response({
1283                        "content-type": "text/plain",
1284                        "status": "408",
1285                        "content-length": len(content)
1286                    })
1287                    response.reason = "Request Timeout"
1288                else:
1289                    content = str(e).encode('utf-8')
1290                    response = Response({
1291                        "content-type": "text/plain",
1292                        "status": "400",
1293                        "content-length": len(content)
1294                    })
1295                    response.reason = "Bad Request"
1296            else:
1297                raise
1298
1299
1300        return (response, content)
1301
1302
1303
1304class Response(dict):
1305    """An object more like email.message than httplib.HTTPResponse."""
1306
1307    """Is this response from our local cache"""
1308    fromcache = False
1309
1310    """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1311    version = 11
1312
1313    "Status code returned by server. "
1314    status = 200
1315
1316    """Reason phrase returned by server."""
1317    reason = "Ok"
1318
1319    previous = None
1320
1321    def __init__(self, info):
1322        # info is either an email.message or
1323        # an httplib.HTTPResponse object.
1324        if isinstance(info, http.client.HTTPResponse):
1325            for key, value in info.getheaders():
1326                key = key.lower()
1327                prev = self.get(key)
1328                if prev is not None:
1329                    value = ', '.join((prev, value))
1330                self[key] = value
1331            self.status = info.status
1332            self['status'] = str(self.status)
1333            self.reason = info.reason
1334            self.version = info.version
1335        elif isinstance(info, email.message.Message):
1336            for key, value in list(info.items()):
1337                self[key.lower()] = value
1338            self.status = int(self['status'])
1339        else:
1340            for key, value in info.items():
1341                self[key.lower()] = value
1342            self.status = int(self.get('status', self.status))
1343
1344
1345    def __getattr__(self, name):
1346        if name == 'dict':
1347            return self
1348        else:
1349            raise AttributeError(name)
1350