http_wrapper.py revision cef7893435aa41160dd1255c43cb8498279738cc
1#!/usr/bin/env python
2"""HTTP wrapper for apitools.
3
4This library wraps the underlying http library we use, which is
5currently httplib2.
6"""
7
8import collections
9import contextlib
10import logging
11import socket
12import time
13
14import httplib2
15import six
16from six.moves import http_client
17from six.moves.urllib import parse
18
19from apitools.base.py import exceptions
20from apitools.base.py import util
21
22__all__ = [
23    'CheckResponse',
24    'GetHttp',
25    'HandleExceptionsAndRebuildHttpConnections',
26    'MakeRequest',
27    'RebuildHttpConnections',
28    'Request',
29    'Response',
30    'RethrowExceptionHandler',
31]
32
33
34# 308 and 429 don't have names in httplib.
35RESUME_INCOMPLETE = 308
36TOO_MANY_REQUESTS = 429
37_REDIRECT_STATUS_CODES = (
38    http_client.MOVED_PERMANENTLY,
39    http_client.FOUND,
40    http_client.SEE_OTHER,
41    http_client.TEMPORARY_REDIRECT,
42    RESUME_INCOMPLETE,
43)
44
45# http: An httplib2.Http instance.
46# http_request: A http_wrapper.Request.
47# exc: Exception being raised.
48# num_retries: Number of retries consumed; used for exponential backoff.
49ExceptionRetryArgs = collections.namedtuple(
50    'ExceptionRetryArgs', ['http', 'http_request', 'exc', 'num_retries',
51                           'max_retry_wait'])
52
53
54@contextlib.contextmanager
55def _Httplib2Debuglevel(http_request, level, http=None):
56    """Temporarily change the value of httplib2.debuglevel, if necessary.
57
58    If http_request has a `loggable_body` distinct from `body`, then we
59    need to prevent httplib2 from logging the full body. This sets
60    httplib2.debuglevel for the duration of the `with` block; however,
61    that alone won't change the value of existing HTTP connections. If
62    an httplib2.Http object is provided, we'll also change the level on
63    any cached connections attached to it.
64
65    Args:
66      http_request: a Request we're logging.
67      level: (int) the debuglevel for logging.
68      http: (optional) an httplib2.Http whose connections we should
69        set the debuglevel on.
70
71    Yields:
72      None.
73    """
74    if http_request.loggable_body is None:
75        yield
76        return
77    old_level = httplib2.debuglevel
78    http_levels = {}
79    httplib2.debuglevel = level
80    if http is not None:
81        for connection_key, connection in http.connections.items():
82            # httplib2 stores two kinds of values in this dict, connection
83            # classes and instances. Since the connection types are all
84            # old-style classes, we can't easily distinguish by connection
85            # type -- so instead we use the key pattern.
86            if ':' not in connection_key:
87                continue
88            http_levels[connection_key] = connection.debuglevel
89            connection.set_debuglevel(level)
90    yield
91    httplib2.debuglevel = old_level
92    if http is not None:
93        for connection_key, old_level in http_levels.items():
94            if connection_key in http.connections:
95                http.connections[connection_key].set_debuglevel(old_level)
96
97
98class Request(object):
99
100    """Class encapsulating the data for an HTTP request."""
101
102    def __init__(self, url='', http_method='GET', headers=None, body=''):
103        self.url = url
104        self.http_method = http_method
105        self.headers = headers or {}
106        self.__body = None
107        self.__loggable_body = None
108        self.body = body
109
110    @property
111    def loggable_body(self):
112        return self.__loggable_body
113
114    @loggable_body.setter
115    def loggable_body(self, value):
116        if self.body is None:
117            raise exceptions.RequestError(
118                'Cannot set loggable body on request with no body')
119        self.__loggable_body = value
120
121    @property
122    def body(self):
123        return self.__body
124
125    @body.setter
126    def body(self, value):
127        """Sets the request body; handles logging and length measurement."""
128        self.__body = value
129        if value is not None:
130            # Avoid calling len() which cannot exceed 4GiB in 32-bit python.
131            body_length = getattr(
132                self.__body, 'length', None) or len(self.__body)
133            self.headers['content-length'] = str(body_length)
134        else:
135            self.headers.pop('content-length', None)
136        # This line ensures we don't try to print large requests.
137        if not isinstance(value, (type(None), six.string_types)):
138            self.loggable_body = '<media body>'
139
140
141# Note: currently the order of fields here is important, since we want
142# to be able to pass in the result from httplib2.request.
143class Response(collections.namedtuple(
144        'HttpResponse', ['info', 'content', 'request_url'])):
145
146    """Class encapsulating data for an HTTP response."""
147    __slots__ = ()
148
149    def __len__(self):
150        return self.length
151
152    @property
153    def length(self):
154        """Return the length of this response.
155
156        We expose this as an attribute since using len() directly can fail
157        for responses larger than sys.maxint.
158
159        Returns:
160          Response length (as int or long)
161        """
162        def ProcessContentRange(content_range):
163            _, _, range_spec = content_range.partition(' ')
164            byte_range, _, _ = range_spec.partition('/')
165            start, _, end = byte_range.partition('-')
166            return int(end) - int(start) + 1
167
168        if '-content-encoding' in self.info and 'content-range' in self.info:
169            # httplib2 rewrites content-length in the case of a compressed
170            # transfer; we can't trust the content-length header in that
171            # case, but we *can* trust content-range, if it's present.
172            return ProcessContentRange(self.info['content-range'])
173        elif 'content-length' in self.info:
174            return int(self.info.get('content-length'))
175        elif 'content-range' in self.info:
176            return ProcessContentRange(self.info['content-range'])
177        return len(self.content)
178
179    @property
180    def status_code(self):
181        return int(self.info['status'])
182
183    @property
184    def retry_after(self):
185        if 'retry-after' in self.info:
186            return int(self.info['retry-after'])
187
188    @property
189    def is_redirect(self):
190        return (self.status_code in _REDIRECT_STATUS_CODES and
191                'location' in self.info)
192
193
194def CheckResponse(response):
195    if response is None:
196        # Caller shouldn't call us if the response is None, but handle anyway.
197        raise exceptions.RequestError(
198            'Request to url %s did not return a response.' %
199            response.request_url)
200    elif (response.status_code >= 500 or
201          response.status_code == TOO_MANY_REQUESTS):
202        raise exceptions.BadStatusCodeError.FromResponse(response)
203    elif response.retry_after:
204        raise exceptions.RetryAfterError.FromResponse(response)
205
206
207def RebuildHttpConnections(http):
208    """Rebuilds all http connections in the httplib2.Http instance.
209
210    httplib2 overloads the map in http.connections to contain two different
211    types of values:
212    { scheme string:  connection class } and
213    { scheme + authority string : actual http connection }
214    Here we remove all of the entries for actual connections so that on the
215    next request httplib2 will rebuild them from the connection types.
216
217    Args:
218      http: An httplib2.Http instance.
219    """
220    if getattr(http, 'connections', None):
221        for conn_key in list(http.connections.keys()):
222            if ':' in conn_key:
223                del http.connections[conn_key]
224
225
226def RethrowExceptionHandler(*unused_args):
227    raise
228
229
230def HandleExceptionsAndRebuildHttpConnections(retry_args):
231    """Exception handler for http failures.
232
233    This catches known failures and rebuilds the underlying HTTP connections.
234
235    Args:
236      retry_args: An ExceptionRetryArgs tuple.
237    """
238    # If the server indicates how long to wait, use that value.  Otherwise,
239    # calculate the wait time on our own.
240    retry_after = None
241
242    # Transport failures
243    if isinstance(retry_args.exc, (http_client.BadStatusLine,
244                                   http_client.IncompleteRead,
245                                   http_client.ResponseNotReady)):
246        logging.debug('Caught HTTP error %s, retrying: %s',
247                      type(retry_args.exc).__name__, retry_args.exc)
248    elif isinstance(retry_args.exc, socket.error):
249        logging.debug('Caught socket error, retrying: %s', retry_args.exc)
250    elif isinstance(retry_args.exc, socket.gaierror):
251        logging.debug(
252            'Caught socket address error, retrying: %s', retry_args.exc)
253    elif isinstance(retry_args.exc, socket.timeout):
254        logging.debug(
255            'Caught socket timeout error, retrying: %s', retry_args.exc)
256    elif isinstance(retry_args.exc, httplib2.ServerNotFoundError):
257        logging.debug(
258            'Caught server not found error, retrying: %s', retry_args.exc)
259    elif isinstance(retry_args.exc, ValueError):
260        # oauth2client tries to JSON-decode the response, which can result
261        # in a ValueError if the response was invalid. Until that is fixed in
262        # oauth2client, need to handle it here.
263        logging.debug('Response content was invalid (%s), retrying',
264                      retry_args.exc)
265    elif isinstance(retry_args.exc, exceptions.RequestError):
266        logging.debug('Request returned no response, retrying')
267    # API-level failures
268    elif isinstance(retry_args.exc, exceptions.BadStatusCodeError):
269        logging.debug('Response returned status %s, retrying',
270                      retry_args.exc.status_code)
271    elif isinstance(retry_args.exc, exceptions.RetryAfterError):
272        logging.debug('Response returned a retry-after header, retrying')
273        retry_after = retry_args.exc.retry_after
274    else:
275        raise
276    RebuildHttpConnections(retry_args.http)
277    logging.debug('Retrying request to url %s after exception %s',
278                  retry_args.http_request.url, retry_args.exc)
279    time.sleep(
280        retry_after or util.CalculateWaitForRetry(
281            retry_args.num_retries, max_wait=retry_args.max_retry_wait))
282
283
284def MakeRequest(http, http_request, retries=7, max_retry_wait=60,
285                redirections=5,
286                retry_func=HandleExceptionsAndRebuildHttpConnections,
287                check_response_func=CheckResponse):
288    """Send http_request via the given http, performing error/retry handling.
289
290    Args:
291      http: An httplib2.Http instance, or a http multiplexer that delegates to
292          an underlying http, for example, HTTPMultiplexer.
293      http_request: A Request to send.
294      retries: (int, default 7) Number of retries to attempt on retryable
295          replies (such as 429 or 5XX).
296      max_retry_wait: (int, default 60) Maximum number of seconds to wait
297          when retrying.
298      redirections: (int, default 5) Number of redirects to follow.
299      retry_func: Function to handle retries on exceptions. Arguments are
300          (Httplib2.Http, Request, Exception, int num_retries).
301      check_response_func: Function to validate the HTTP response.
302          Arguments are (Response, response content, url).
303
304    Raises:
305      InvalidDataFromServerError: if there is no response after retries.
306
307    Returns:
308      A Response object.
309
310    """
311    retry = 0
312    while True:
313        try:
314            return _MakeRequestNoRetry(
315                http, http_request, redirections=redirections,
316                check_response_func=check_response_func)
317        # retry_func will consume the exception types it handles and raise.
318        # pylint: disable=broad-except
319        except Exception as e:
320            retry += 1
321            if retry >= retries:
322                raise
323            else:
324                retry_func(ExceptionRetryArgs(
325                    http, http_request, e, retry, max_retry_wait))
326
327
328def _MakeRequestNoRetry(http, http_request, redirections=5,
329                        check_response_func=CheckResponse):
330    """Send http_request via the given http.
331
332    This wrapper exists to handle translation between the plain httplib2
333    request/response types and the Request and Response types above.
334
335    Args:
336      http: An httplib2.Http instance, or a http multiplexer that delegates to
337          an underlying http, for example, HTTPMultiplexer.
338      http_request: A Request to send.
339      redirections: (int, default 5) Number of redirects to follow.
340      check_response_func: Function to validate the HTTP response.
341          Arguments are (Response, response content, url).
342
343    Returns:
344      A Response object.
345
346    Raises:
347      RequestError if no response could be parsed.
348
349    """
350    connection_type = None
351    # Handle overrides for connection types.  This is used if the caller
352    # wants control over the underlying connection for managing callbacks
353    # or hash digestion.
354    if getattr(http, 'connections', None):
355        url_scheme = parse.urlsplit(http_request.url).scheme
356        if url_scheme and url_scheme in http.connections:
357            connection_type = http.connections[url_scheme]
358
359    # Custom printing only at debuglevel 4
360    new_debuglevel = 4 if httplib2.debuglevel == 4 else 0
361    with _Httplib2Debuglevel(http_request, new_debuglevel, http=http):
362        info, content = http.request(
363            str(http_request.url), method=str(http_request.http_method),
364            body=http_request.body, headers=http_request.headers,
365            redirections=redirections, connection_type=connection_type)
366
367    if info is None:
368        raise exceptions.RequestError()
369
370    response = Response(info, content, http_request.url)
371    check_response_func(response)
372    return response
373
374
375_HTTP_FACTORIES = []
376
377
378def _RegisterHttpFactory(factory):
379    _HTTP_FACTORIES.append(factory)
380
381
382def GetHttp(**kwds):
383    for factory in _HTTP_FACTORIES:
384        http = factory(**kwds)
385        if http is not None:
386            return http
387    return httplib2.Http(**kwds)
388