1r"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |
24      | response = getresponse()
25      v
26    Unread-response   [Response-headers-read]
27      |\____________________
28      |                     |
29      | response.read()     | putrequest()
30      v                     v
31    Idle                  Req-started-unread-response
32                     ______/|
33                   /        |
34   response.read() |        | ( putheader() )*  endheaders()
35                   v        v
36       Request-started    Req-sent-unread-response
37                            |
38                            | response.read()
39                            v
40                          Request-sent
41
42This diagram presents the following rules:
43  -- a second request may not be started until {response-headers-read}
44  -- a response [object] cannot be retrieved until {request-sent}
45  -- there is no differentiation between an unread response body and a
46     partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49      HTTPResponse class does not enforce this state machine, which
50      implies sophisticated clients may accelerate the request/response
51      pipeline. Caution should be taken, though: accelerating the states
52      beyond the above pattern may imply knowledge of the server's
53      connection-close behavior for certain requests. For example, it
54      is impossible to tell whether the server will close the connection
55      UNTIL the response headers have been read; this means that further
56      requests cannot be placed into the pipeline until it is known that
57      the server will NOT be closing the connection.
58
59Logical State                  __state            __response
60-------------                  -------            ----------
61Idle                           _CS_IDLE           None
62Request-started                _CS_REQ_STARTED    None
63Request-sent                   _CS_REQ_SENT       None
64Unread-response                _CS_IDLE           <response_class>
65Req-started-unread-response    _CS_REQ_STARTED    <response_class>
66Req-sent-unread-response       _CS_REQ_SENT       <response_class>
67"""
68
69from array import array
70import os
71import socket
72from sys import py3kwarning
73from urlparse import urlsplit
74import warnings
75with warnings.catch_warnings():
76    if py3kwarning:
77        warnings.filterwarnings("ignore", ".*mimetools has been removed",
78                                DeprecationWarning)
79    import mimetools
80
81try:
82    from cStringIO import StringIO
83except ImportError:
84    from StringIO import StringIO
85
86__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
87           "HTTPException", "NotConnected", "UnknownProtocol",
88           "UnknownTransferEncoding", "UnimplementedFileMode",
89           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
90           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
91           "BadStatusLine", "error", "responses"]
92
93HTTP_PORT = 80
94HTTPS_PORT = 443
95
96_UNKNOWN = 'UNKNOWN'
97
98# connection states
99_CS_IDLE = 'Idle'
100_CS_REQ_STARTED = 'Request-started'
101_CS_REQ_SENT = 'Request-sent'
102
103# status codes
104# informational
105CONTINUE = 100
106SWITCHING_PROTOCOLS = 101
107PROCESSING = 102
108
109# successful
110OK = 200
111CREATED = 201
112ACCEPTED = 202
113NON_AUTHORITATIVE_INFORMATION = 203
114NO_CONTENT = 204
115RESET_CONTENT = 205
116PARTIAL_CONTENT = 206
117MULTI_STATUS = 207
118IM_USED = 226
119
120# redirection
121MULTIPLE_CHOICES = 300
122MOVED_PERMANENTLY = 301
123FOUND = 302
124SEE_OTHER = 303
125NOT_MODIFIED = 304
126USE_PROXY = 305
127TEMPORARY_REDIRECT = 307
128
129# client error
130BAD_REQUEST = 400
131UNAUTHORIZED = 401
132PAYMENT_REQUIRED = 402
133FORBIDDEN = 403
134NOT_FOUND = 404
135METHOD_NOT_ALLOWED = 405
136NOT_ACCEPTABLE = 406
137PROXY_AUTHENTICATION_REQUIRED = 407
138REQUEST_TIMEOUT = 408
139CONFLICT = 409
140GONE = 410
141LENGTH_REQUIRED = 411
142PRECONDITION_FAILED = 412
143REQUEST_ENTITY_TOO_LARGE = 413
144REQUEST_URI_TOO_LONG = 414
145UNSUPPORTED_MEDIA_TYPE = 415
146REQUESTED_RANGE_NOT_SATISFIABLE = 416
147EXPECTATION_FAILED = 417
148UNPROCESSABLE_ENTITY = 422
149LOCKED = 423
150FAILED_DEPENDENCY = 424
151UPGRADE_REQUIRED = 426
152
153# server error
154INTERNAL_SERVER_ERROR = 500
155NOT_IMPLEMENTED = 501
156BAD_GATEWAY = 502
157SERVICE_UNAVAILABLE = 503
158GATEWAY_TIMEOUT = 504
159HTTP_VERSION_NOT_SUPPORTED = 505
160INSUFFICIENT_STORAGE = 507
161NOT_EXTENDED = 510
162
163# Mapping status codes to official W3C names
164responses = {
165    100: 'Continue',
166    101: 'Switching Protocols',
167
168    200: 'OK',
169    201: 'Created',
170    202: 'Accepted',
171    203: 'Non-Authoritative Information',
172    204: 'No Content',
173    205: 'Reset Content',
174    206: 'Partial Content',
175
176    300: 'Multiple Choices',
177    301: 'Moved Permanently',
178    302: 'Found',
179    303: 'See Other',
180    304: 'Not Modified',
181    305: 'Use Proxy',
182    306: '(Unused)',
183    307: 'Temporary Redirect',
184
185    400: 'Bad Request',
186    401: 'Unauthorized',
187    402: 'Payment Required',
188    403: 'Forbidden',
189    404: 'Not Found',
190    405: 'Method Not Allowed',
191    406: 'Not Acceptable',
192    407: 'Proxy Authentication Required',
193    408: 'Request Timeout',
194    409: 'Conflict',
195    410: 'Gone',
196    411: 'Length Required',
197    412: 'Precondition Failed',
198    413: 'Request Entity Too Large',
199    414: 'Request-URI Too Long',
200    415: 'Unsupported Media Type',
201    416: 'Requested Range Not Satisfiable',
202    417: 'Expectation Failed',
203
204    500: 'Internal Server Error',
205    501: 'Not Implemented',
206    502: 'Bad Gateway',
207    503: 'Service Unavailable',
208    504: 'Gateway Timeout',
209    505: 'HTTP Version Not Supported',
210}
211
212# maximal amount of data to read at one time in _safe_read
213MAXAMOUNT = 1048576
214
215# maximal line length when calling readline().
216_MAXLINE = 65536
217
218class HTTPMessage(mimetools.Message):
219
220    def addheader(self, key, value):
221        """Add header for field key handling repeats."""
222        prev = self.dict.get(key)
223        if prev is None:
224            self.dict[key] = value
225        else:
226            combined = ", ".join((prev, value))
227            self.dict[key] = combined
228
229    def addcontinue(self, key, more):
230        """Add more field data from a continuation line."""
231        prev = self.dict[key]
232        self.dict[key] = prev + "\n " + more
233
234    def readheaders(self):
235        """Read header lines.
236
237        Read header lines up to the entirely blank line that terminates them.
238        The (normally blank) line that ends the headers is skipped, but not
239        included in the returned list.  If a non-header line ends the headers,
240        (which is an error), an attempt is made to backspace over it; it is
241        never included in the returned list.
242
243        The variable self.status is set to the empty string if all went well,
244        otherwise it is an error message.  The variable self.headers is a
245        completely uninterpreted list of lines contained in the header (so
246        printing them will reproduce the header exactly as it appears in the
247        file).
248
249        If multiple header fields with the same name occur, they are combined
250        according to the rules in RFC 2616 sec 4.2:
251
252        Appending each subsequent field-value to the first, each separated
253        by a comma. The order in which header fields with the same field-name
254        are received is significant to the interpretation of the combined
255        field value.
256        """
257        # XXX The implementation overrides the readheaders() method of
258        # rfc822.Message.  The base class design isn't amenable to
259        # customized behavior here so the method here is a copy of the
260        # base class code with a few small changes.
261
262        self.dict = {}
263        self.unixfrom = ''
264        self.headers = hlist = []
265        self.status = ''
266        headerseen = ""
267        firstline = 1
268        startofline = unread = tell = None
269        if hasattr(self.fp, 'unread'):
270            unread = self.fp.unread
271        elif self.seekable:
272            tell = self.fp.tell
273        while True:
274            if tell:
275                try:
276                    startofline = tell()
277                except IOError:
278                    startofline = tell = None
279                    self.seekable = 0
280            line = self.fp.readline(_MAXLINE + 1)
281            if len(line) > _MAXLINE:
282                raise LineTooLong("header line")
283            if not line:
284                self.status = 'EOF in headers'
285                break
286            # Skip unix From name time lines
287            if firstline and line.startswith('From '):
288                self.unixfrom = self.unixfrom + line
289                continue
290            firstline = 0
291            if headerseen and line[0] in ' \t':
292                # XXX Not sure if continuation lines are handled properly
293                # for http and/or for repeating headers
294                # It's a continuation line.
295                hlist.append(line)
296                self.addcontinue(headerseen, line.strip())
297                continue
298            elif self.iscomment(line):
299                # It's a comment.  Ignore it.
300                continue
301            elif self.islast(line):
302                # Note! No pushback here!  The delimiter line gets eaten.
303                break
304            headerseen = self.isheader(line)
305            if headerseen:
306                # It's a legal header line, save it.
307                hlist.append(line)
308                self.addheader(headerseen, line[len(headerseen)+1:].strip())
309                continue
310            else:
311                # It's not a header line; throw it back and stop here.
312                if not self.dict:
313                    self.status = 'No headers'
314                else:
315                    self.status = 'Non-header line where header expected'
316                # Try to undo the read.
317                if unread:
318                    unread(line)
319                elif tell:
320                    self.fp.seek(startofline)
321                else:
322                    self.status = self.status + '; bad seek'
323                break
324
325class HTTPResponse:
326
327    # strict: If true, raise BadStatusLine if the status line can't be
328    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
329    # false because it prevents clients from talking to HTTP/0.9
330    # servers.  Note that a response with a sufficiently corrupted
331    # status line will look like an HTTP/0.9 response.
332
333    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
334
335    def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
336        if buffering:
337            # The caller won't be using any sock.recv() calls, so buffering
338            # is fine and recommended for performance.
339            self.fp = sock.makefile('rb')
340        else:
341            # The buffer size is specified as zero, because the headers of
342            # the response are read with readline().  If the reads were
343            # buffered the readline() calls could consume some of the
344            # response, which make be read via a recv() on the underlying
345            # socket.
346            self.fp = sock.makefile('rb', 0)
347        self.debuglevel = debuglevel
348        self.strict = strict
349        self._method = method
350
351        self.msg = None
352
353        # from the Status-Line of the response
354        self.version = _UNKNOWN # HTTP-Version
355        self.status = _UNKNOWN  # Status-Code
356        self.reason = _UNKNOWN  # Reason-Phrase
357
358        self.chunked = _UNKNOWN         # is "chunked" being used?
359        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
360        self.length = _UNKNOWN          # number of bytes left in response
361        self.will_close = _UNKNOWN      # conn will close at end of response
362
363    def _read_status(self):
364        # Initialize with Simple-Response defaults
365        line = self.fp.readline(_MAXLINE + 1)
366        if len(line) > _MAXLINE:
367            raise LineTooLong("header line")
368        if self.debuglevel > 0:
369            print "reply:", repr(line)
370        if not line:
371            # Presumably, the server closed the connection before
372            # sending a valid response.
373            raise BadStatusLine(line)
374        try:
375            [version, status, reason] = line.split(None, 2)
376        except ValueError:
377            try:
378                [version, status] = line.split(None, 1)
379                reason = ""
380            except ValueError:
381                # empty version will cause next test to fail and status
382                # will be treated as 0.9 response.
383                version = ""
384        if not version.startswith('HTTP/'):
385            if self.strict:
386                self.close()
387                raise BadStatusLine(line)
388            else:
389                # assume it's a Simple-Response from an 0.9 server
390                self.fp = LineAndFileWrapper(line, self.fp)
391                return "HTTP/0.9", 200, ""
392
393        # The status code is a three-digit number
394        try:
395            status = int(status)
396            if status < 100 or status > 999:
397                raise BadStatusLine(line)
398        except ValueError:
399            raise BadStatusLine(line)
400        return version, status, reason
401
402    def begin(self):
403        if self.msg is not None:
404            # we've already started reading the response
405            return
406
407        # read until we get a non-100 response
408        while True:
409            version, status, reason = self._read_status()
410            if status != CONTINUE:
411                break
412            # skip the header from the 100 response
413            while True:
414                skip = self.fp.readline(_MAXLINE + 1)
415                if len(skip) > _MAXLINE:
416                    raise LineTooLong("header line")
417                skip = skip.strip()
418                if not skip:
419                    break
420                if self.debuglevel > 0:
421                    print "header:", skip
422
423        self.status = status
424        self.reason = reason.strip()
425        if version == 'HTTP/1.0':
426            self.version = 10
427        elif version.startswith('HTTP/1.'):
428            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
429        elif version == 'HTTP/0.9':
430            self.version = 9
431        else:
432            raise UnknownProtocol(version)
433
434        if self.version == 9:
435            self.length = None
436            self.chunked = 0
437            self.will_close = 1
438            self.msg = HTTPMessage(StringIO())
439            return
440
441        self.msg = HTTPMessage(self.fp, 0)
442        if self.debuglevel > 0:
443            for hdr in self.msg.headers:
444                print "header:", hdr,
445
446        # don't let the msg keep an fp
447        self.msg.fp = None
448
449        # are we using the chunked-style of transfer encoding?
450        tr_enc = self.msg.getheader('transfer-encoding')
451        if tr_enc and tr_enc.lower() == "chunked":
452            self.chunked = 1
453            self.chunk_left = None
454        else:
455            self.chunked = 0
456
457        # will the connection close at the end of the response?
458        self.will_close = self._check_close()
459
460        # do we have a Content-Length?
461        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
462        length = self.msg.getheader('content-length')
463        if length and not self.chunked:
464            try:
465                self.length = int(length)
466            except ValueError:
467                self.length = None
468            else:
469                if self.length < 0:  # ignore nonsensical negative lengths
470                    self.length = None
471        else:
472            self.length = None
473
474        # does the body have a fixed length? (of zero)
475        if (status == NO_CONTENT or status == NOT_MODIFIED or
476            100 <= status < 200 or      # 1xx codes
477            self._method == 'HEAD'):
478            self.length = 0
479
480        # if the connection remains open, and we aren't using chunked, and
481        # a content-length was not provided, then assume that the connection
482        # WILL close.
483        if not self.will_close and \
484           not self.chunked and \
485           self.length is None:
486            self.will_close = 1
487
488    def _check_close(self):
489        conn = self.msg.getheader('connection')
490        if self.version == 11:
491            # An HTTP/1.1 proxy is assumed to stay open unless
492            # explicitly closed.
493            conn = self.msg.getheader('connection')
494            if conn and "close" in conn.lower():
495                return True
496            return False
497
498        # Some HTTP/1.0 implementations have support for persistent
499        # connections, using rules different than HTTP/1.1.
500
501        # For older HTTP, Keep-Alive indicates persistent connection.
502        if self.msg.getheader('keep-alive'):
503            return False
504
505        # At least Akamai returns a "Connection: Keep-Alive" header,
506        # which was supposed to be sent by the client.
507        if conn and "keep-alive" in conn.lower():
508            return False
509
510        # Proxy-Connection is a netscape hack.
511        pconn = self.msg.getheader('proxy-connection')
512        if pconn and "keep-alive" in pconn.lower():
513            return False
514
515        # otherwise, assume it will close
516        return True
517
518    def close(self):
519        if self.fp:
520            self.fp.close()
521            self.fp = None
522
523    def isclosed(self):
524        # NOTE: it is possible that we will not ever call self.close(). This
525        #       case occurs when will_close is TRUE, length is None, and we
526        #       read up to the last byte, but NOT past it.
527        #
528        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
529        #          called, meaning self.isclosed() is meaningful.
530        return self.fp is None
531
532    # XXX It would be nice to have readline and __iter__ for this, too.
533
534    def read(self, amt=None):
535        if self.fp is None:
536            return ''
537
538        if self._method == 'HEAD':
539            self.close()
540            return ''
541
542        if self.chunked:
543            return self._read_chunked(amt)
544
545        if amt is None:
546            # unbounded read
547            if self.length is None:
548                s = self.fp.read()
549            else:
550                try:
551                    s = self._safe_read(self.length)
552                except IncompleteRead:
553                    self.close()
554                    raise
555                self.length = 0
556            self.close()        # we read everything
557            return s
558
559        if self.length is not None:
560            if amt > self.length:
561                # clip the read to the "end of response"
562                amt = self.length
563
564        # we do not use _safe_read() here because this may be a .will_close
565        # connection, and the user is reading more bytes than will be provided
566        # (for example, reading in 1k chunks)
567        s = self.fp.read(amt)
568        if not s:
569            # Ideally, we would raise IncompleteRead if the content-length
570            # wasn't satisfied, but it might break compatibility.
571            self.close()
572        if self.length is not None:
573            self.length -= len(s)
574            if not self.length:
575                self.close()
576
577        return s
578
579    def _read_chunked(self, amt):
580        assert self.chunked != _UNKNOWN
581        chunk_left = self.chunk_left
582        value = []
583        while True:
584            if chunk_left is None:
585                line = self.fp.readline(_MAXLINE + 1)
586                if len(line) > _MAXLINE:
587                    raise LineTooLong("chunk size")
588                i = line.find(';')
589                if i >= 0:
590                    line = line[:i] # strip chunk-extensions
591                try:
592                    chunk_left = int(line, 16)
593                except ValueError:
594                    # close the connection as protocol synchronisation is
595                    # probably lost
596                    self.close()
597                    raise IncompleteRead(''.join(value))
598                if chunk_left == 0:
599                    break
600            if amt is None:
601                value.append(self._safe_read(chunk_left))
602            elif amt < chunk_left:
603                value.append(self._safe_read(amt))
604                self.chunk_left = chunk_left - amt
605                return ''.join(value)
606            elif amt == chunk_left:
607                value.append(self._safe_read(amt))
608                self._safe_read(2)  # toss the CRLF at the end of the chunk
609                self.chunk_left = None
610                return ''.join(value)
611            else:
612                value.append(self._safe_read(chunk_left))
613                amt -= chunk_left
614
615            # we read the whole chunk, get another
616            self._safe_read(2)      # toss the CRLF at the end of the chunk
617            chunk_left = None
618
619        # read and discard trailer up to the CRLF terminator
620        ### note: we shouldn't have any trailers!
621        while True:
622            line = self.fp.readline(_MAXLINE + 1)
623            if len(line) > _MAXLINE:
624                raise LineTooLong("trailer line")
625            if not line:
626                # a vanishingly small number of sites EOF without
627                # sending the trailer
628                break
629            if line == '\r\n':
630                break
631
632        # we read everything; close the "file"
633        self.close()
634
635        return ''.join(value)
636
637    def _safe_read(self, amt):
638        """Read the number of bytes requested, compensating for partial reads.
639
640        Normally, we have a blocking socket, but a read() can be interrupted
641        by a signal (resulting in a partial read).
642
643        Note that we cannot distinguish between EOF and an interrupt when zero
644        bytes have been read. IncompleteRead() will be raised in this
645        situation.
646
647        This function should be used when <amt> bytes "should" be present for
648        reading. If the bytes are truly not available (due to EOF), then the
649        IncompleteRead exception can be used to detect the problem.
650        """
651        # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
652        # return less than x bytes unless EOF is encountered.  It now handles
653        # signal interruptions (socket.error EINTR) internally.  This code
654        # never caught that exception anyways.  It seems largely pointless.
655        # self.fp.read(amt) will work fine.
656        s = []
657        while amt > 0:
658            chunk = self.fp.read(min(amt, MAXAMOUNT))
659            if not chunk:
660                raise IncompleteRead(''.join(s), amt)
661            s.append(chunk)
662            amt -= len(chunk)
663        return ''.join(s)
664
665    def fileno(self):
666        return self.fp.fileno()
667
668    def getheader(self, name, default=None):
669        if self.msg is None:
670            raise ResponseNotReady()
671        return self.msg.getheader(name, default)
672
673    def getheaders(self):
674        """Return list of (header, value) tuples."""
675        if self.msg is None:
676            raise ResponseNotReady()
677        return self.msg.items()
678
679
680class HTTPConnection:
681
682    _http_vsn = 11
683    _http_vsn_str = 'HTTP/1.1'
684
685    response_class = HTTPResponse
686    default_port = HTTP_PORT
687    auto_open = 1
688    debuglevel = 0
689    strict = 0
690
691    def __init__(self, host, port=None, strict=None,
692                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
693        self.timeout = timeout
694        self.source_address = source_address
695        self.sock = None
696        self._buffer = []
697        self.__response = None
698        self.__state = _CS_IDLE
699        self._method = None
700        self._tunnel_host = None
701        self._tunnel_port = None
702        self._tunnel_headers = {}
703
704        self._set_hostport(host, port)
705        if strict is not None:
706            self.strict = strict
707
708    def set_tunnel(self, host, port=None, headers=None):
709        """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
710
711        The headers argument should be a mapping of extra HTTP headers
712        to send with the CONNECT request.
713        """
714        self._tunnel_host = host
715        self._tunnel_port = port
716        if headers:
717            self._tunnel_headers = headers
718        else:
719            self._tunnel_headers.clear()
720
721    def _set_hostport(self, host, port):
722        if port is None:
723            i = host.rfind(':')
724            j = host.rfind(']')         # ipv6 addresses have [...]
725            if i > j:
726                try:
727                    port = int(host[i+1:])
728                except ValueError:
729                    if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
730                        port = self.default_port
731                    else:
732                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
733                host = host[:i]
734            else:
735                port = self.default_port
736            if host and host[0] == '[' and host[-1] == ']':
737                host = host[1:-1]
738        self.host = host
739        self.port = port
740
741    def set_debuglevel(self, level):
742        self.debuglevel = level
743
744    def _tunnel(self):
745        self._set_hostport(self._tunnel_host, self._tunnel_port)
746        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
747        for header, value in self._tunnel_headers.iteritems():
748            self.send("%s: %s\r\n" % (header, value))
749        self.send("\r\n")
750        response = self.response_class(self.sock, strict = self.strict,
751                                       method = self._method)
752        (version, code, message) = response._read_status()
753
754        if code != 200:
755            self.close()
756            raise socket.error("Tunnel connection failed: %d %s" % (code,
757                                                                    message.strip()))
758        while True:
759            line = response.fp.readline(_MAXLINE + 1)
760            if len(line) > _MAXLINE:
761                raise LineTooLong("header line")
762            if not line:
763                # for sites which EOF without sending trailer
764                break
765            if line == '\r\n':
766                break
767
768
769    def connect(self):
770        """Connect to the host and port specified in __init__."""
771        self.sock = socket.create_connection((self.host,self.port),
772                                             self.timeout, self.source_address)
773
774        if self._tunnel_host:
775            self._tunnel()
776
777    def close(self):
778        """Close the connection to the HTTP server."""
779        if self.sock:
780            self.sock.close()   # close it manually... there may be other refs
781            self.sock = None
782        if self.__response:
783            self.__response.close()
784            self.__response = None
785        self.__state = _CS_IDLE
786
787    def send(self, data):
788        """Send `data' to the server."""
789        if self.sock is None:
790            if self.auto_open:
791                self.connect()
792            else:
793                raise NotConnected()
794
795        if self.debuglevel > 0:
796            print "send:", repr(data)
797        blocksize = 8192
798        if hasattr(data,'read') and not isinstance(data, array):
799            if self.debuglevel > 0: print "sendIng a read()able"
800            datablock = data.read(blocksize)
801            while datablock:
802                self.sock.sendall(datablock)
803                datablock = data.read(blocksize)
804        else:
805            self.sock.sendall(data)
806
807    def _output(self, s):
808        """Add a line of output to the current request buffer.
809
810        Assumes that the line does *not* end with \\r\\n.
811        """
812        self._buffer.append(s)
813
814    def _send_output(self, message_body=None):
815        """Send the currently buffered request and clear the buffer.
816
817        Appends an extra \\r\\n to the buffer.
818        A message_body may be specified, to be appended to the request.
819        """
820        self._buffer.extend(("", ""))
821        msg = "\r\n".join(self._buffer)
822        del self._buffer[:]
823        # If msg and message_body are sent in a single send() call,
824        # it will avoid performance problems caused by the interaction
825        # between delayed ack and the Nagle algorithm.
826        if isinstance(message_body, str):
827            msg += message_body
828            message_body = None
829        self.send(msg)
830        if message_body is not None:
831            #message_body was not a string (i.e. it is a file) and
832            #we must run the risk of Nagle
833            self.send(message_body)
834
835    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
836        """Send a request to the server.
837
838        `method' specifies an HTTP request method, e.g. 'GET'.
839        `url' specifies the object being requested, e.g. '/index.html'.
840        `skip_host' if True does not add automatically a 'Host:' header
841        `skip_accept_encoding' if True does not add automatically an
842           'Accept-Encoding:' header
843        """
844
845        # if a prior response has been completed, then forget about it.
846        if self.__response and self.__response.isclosed():
847            self.__response = None
848
849
850        # in certain cases, we cannot issue another request on this connection.
851        # this occurs when:
852        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
853        #   2) a response to a previous request has signalled that it is going
854        #      to close the connection upon completion.
855        #   3) the headers for the previous response have not been read, thus
856        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
857        #
858        # if there is no prior response, then we can request at will.
859        #
860        # if point (2) is true, then we will have passed the socket to the
861        # response (effectively meaning, "there is no prior response"), and
862        # will open a new one when a new request is made.
863        #
864        # Note: if a prior response exists, then we *can* start a new request.
865        #       We are not allowed to begin fetching the response to this new
866        #       request, however, until that prior response is complete.
867        #
868        if self.__state == _CS_IDLE:
869            self.__state = _CS_REQ_STARTED
870        else:
871            raise CannotSendRequest()
872
873        # Save the method we use, we need it later in the response phase
874        self._method = method
875        if not url:
876            url = '/'
877        hdr = '%s %s %s' % (method, url, self._http_vsn_str)
878
879        self._output(hdr)
880
881        if self._http_vsn == 11:
882            # Issue some standard headers for better HTTP/1.1 compliance
883
884            if not skip_host:
885                # this header is issued *only* for HTTP/1.1
886                # connections. more specifically, this means it is
887                # only issued when the client uses the new
888                # HTTPConnection() class. backwards-compat clients
889                # will be using HTTP/1.0 and those clients may be
890                # issuing this header themselves. we should NOT issue
891                # it twice; some web servers (such as Apache) barf
892                # when they see two Host: headers
893
894                # If we need a non-standard port,include it in the
895                # header.  If the request is going through a proxy,
896                # but the host of the actual URL, not the host of the
897                # proxy.
898
899                netloc = ''
900                if url.startswith('http'):
901                    nil, netloc, nil, nil, nil = urlsplit(url)
902
903                if netloc:
904                    try:
905                        netloc_enc = netloc.encode("ascii")
906                    except UnicodeEncodeError:
907                        netloc_enc = netloc.encode("idna")
908                    self.putheader('Host', netloc_enc)
909                else:
910                    try:
911                        host_enc = self.host.encode("ascii")
912                    except UnicodeEncodeError:
913                        host_enc = self.host.encode("idna")
914                    # Wrap the IPv6 Host Header with [] (RFC 2732)
915                    if host_enc.find(':') >= 0:
916                        host_enc = "[" + host_enc + "]"
917                    if self.port == self.default_port:
918                        self.putheader('Host', host_enc)
919                    else:
920                        self.putheader('Host', "%s:%s" % (host_enc, self.port))
921
922            # note: we are assuming that clients will not attempt to set these
923            #       headers since *this* library must deal with the
924            #       consequences. this also means that when the supporting
925            #       libraries are updated to recognize other forms, then this
926            #       code should be changed (removed or updated).
927
928            # we only want a Content-Encoding of "identity" since we don't
929            # support encodings such as x-gzip or x-deflate.
930            if not skip_accept_encoding:
931                self.putheader('Accept-Encoding', 'identity')
932
933            # we can accept "chunked" Transfer-Encodings, but no others
934            # NOTE: no TE header implies *only* "chunked"
935            #self.putheader('TE', 'chunked')
936
937            # if TE is supplied in the header, then it must appear in a
938            # Connection header.
939            #self.putheader('Connection', 'TE')
940
941        else:
942            # For HTTP/1.0, the server will assume "not chunked"
943            pass
944
945    def putheader(self, header, *values):
946        """Send a request header line to the server.
947
948        For example: h.putheader('Accept', 'text/html')
949        """
950        if self.__state != _CS_REQ_STARTED:
951            raise CannotSendHeader()
952
953        hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
954        self._output(hdr)
955
956    def endheaders(self, message_body=None):
957        """Indicate that the last header line has been sent to the server.
958
959        This method sends the request to the server.  The optional
960        message_body argument can be used to pass a message body
961        associated with the request.  The message body will be sent in
962        the same packet as the message headers if it is string, otherwise it is
963        sent as a separate packet.
964        """
965        if self.__state == _CS_REQ_STARTED:
966            self.__state = _CS_REQ_SENT
967        else:
968            raise CannotSendHeader()
969        self._send_output(message_body)
970
971    def request(self, method, url, body=None, headers={}):
972        """Send a complete request to the server."""
973        self._send_request(method, url, body, headers)
974
975    def _set_content_length(self, body):
976        # Set the content-length based on the body.
977        thelen = None
978        try:
979            thelen = str(len(body))
980        except TypeError, te:
981            # If this is a file-like object, try to
982            # fstat its file descriptor
983            try:
984                thelen = str(os.fstat(body.fileno()).st_size)
985            except (AttributeError, OSError):
986                # Don't send a length if this failed
987                if self.debuglevel > 0: print "Cannot stat!!"
988
989        if thelen is not None:
990            self.putheader('Content-Length', thelen)
991
992    def _send_request(self, method, url, body, headers):
993        # Honor explicitly requested Host: and Accept-Encoding: headers.
994        header_names = dict.fromkeys([k.lower() for k in headers])
995        skips = {}
996        if 'host' in header_names:
997            skips['skip_host'] = 1
998        if 'accept-encoding' in header_names:
999            skips['skip_accept_encoding'] = 1
1000
1001        self.putrequest(method, url, **skips)
1002
1003        if body is not None and 'content-length' not in header_names:
1004            self._set_content_length(body)
1005        for hdr, value in headers.iteritems():
1006            self.putheader(hdr, value)
1007        self.endheaders(body)
1008
1009    def getresponse(self, buffering=False):
1010        "Get the response from the server."
1011
1012        # if a prior response has been completed, then forget about it.
1013        if self.__response and self.__response.isclosed():
1014            self.__response = None
1015
1016        #
1017        # if a prior response exists, then it must be completed (otherwise, we
1018        # cannot read this response's header to determine the connection-close
1019        # behavior)
1020        #
1021        # note: if a prior response existed, but was connection-close, then the
1022        # socket and response were made independent of this HTTPConnection
1023        # object since a new request requires that we open a whole new
1024        # connection
1025        #
1026        # this means the prior response had one of two states:
1027        #   1) will_close: this connection was reset and the prior socket and
1028        #                  response operate independently
1029        #   2) persistent: the response was retained and we await its
1030        #                  isclosed() status to become true.
1031        #
1032        if self.__state != _CS_REQ_SENT or self.__response:
1033            raise ResponseNotReady()
1034
1035        args = (self.sock,)
1036        kwds = {"strict":self.strict, "method":self._method}
1037        if self.debuglevel > 0:
1038            args += (self.debuglevel,)
1039        if buffering:
1040            #only add this keyword if non-default, for compatibility with
1041            #other response_classes.
1042            kwds["buffering"] = True;
1043        response = self.response_class(*args, **kwds)
1044
1045        response.begin()
1046        assert response.will_close != _UNKNOWN
1047        self.__state = _CS_IDLE
1048
1049        if response.will_close:
1050            # this effectively passes the connection to the response
1051            self.close()
1052        else:
1053            # remember this, so we can tell when it is complete
1054            self.__response = response
1055
1056        return response
1057
1058
1059class HTTP:
1060    "Compatibility class with httplib.py from 1.5."
1061
1062    _http_vsn = 10
1063    _http_vsn_str = 'HTTP/1.0'
1064
1065    debuglevel = 0
1066
1067    _connection_class = HTTPConnection
1068
1069    def __init__(self, host='', port=None, strict=None):
1070        "Provide a default host, since the superclass requires one."
1071
1072        # some joker passed 0 explicitly, meaning default port
1073        if port == 0:
1074            port = None
1075
1076        # Note that we may pass an empty string as the host; this will raise
1077        # an error when we attempt to connect. Presumably, the client code
1078        # will call connect before then, with a proper host.
1079        self._setup(self._connection_class(host, port, strict))
1080
1081    def _setup(self, conn):
1082        self._conn = conn
1083
1084        # set up delegation to flesh out interface
1085        self.send = conn.send
1086        self.putrequest = conn.putrequest
1087        self.putheader = conn.putheader
1088        self.endheaders = conn.endheaders
1089        self.set_debuglevel = conn.set_debuglevel
1090
1091        conn._http_vsn = self._http_vsn
1092        conn._http_vsn_str = self._http_vsn_str
1093
1094        self.file = None
1095
1096    def connect(self, host=None, port=None):
1097        "Accept arguments to set the host/port, since the superclass doesn't."
1098
1099        if host is not None:
1100            self._conn._set_hostport(host, port)
1101        self._conn.connect()
1102
1103    def getfile(self):
1104        "Provide a getfile, since the superclass' does not use this concept."
1105        return self.file
1106
1107    def getreply(self, buffering=False):
1108        """Compat definition since superclass does not define it.
1109
1110        Returns a tuple consisting of:
1111        - server status code (e.g. '200' if all goes well)
1112        - server "reason" corresponding to status code
1113        - any RFC822 headers in the response from the server
1114        """
1115        try:
1116            if not buffering:
1117                response = self._conn.getresponse()
1118            else:
1119                #only add this keyword if non-default for compatibility
1120                #with other connection classes
1121                response = self._conn.getresponse(buffering)
1122        except BadStatusLine, e:
1123            ### hmm. if getresponse() ever closes the socket on a bad request,
1124            ### then we are going to have problems with self.sock
1125
1126            ### should we keep this behavior? do people use it?
1127            # keep the socket open (as a file), and return it
1128            self.file = self._conn.sock.makefile('rb', 0)
1129
1130            # close our socket -- we want to restart after any protocol error
1131            self.close()
1132
1133            self.headers = None
1134            return -1, e.line, None
1135
1136        self.headers = response.msg
1137        self.file = response.fp
1138        return response.status, response.reason, response.msg
1139
1140    def close(self):
1141        self._conn.close()
1142
1143        # note that self.file == response.fp, which gets closed by the
1144        # superclass. just clear the object ref here.
1145        ### hmm. messy. if status==-1, then self.file is owned by us.
1146        ### well... we aren't explicitly closing, but losing this ref will
1147        ### do it
1148        self.file = None
1149
1150try:
1151    import ssl
1152except ImportError:
1153    pass
1154else:
1155    class HTTPSConnection(HTTPConnection):
1156        "This class allows communication via SSL."
1157
1158        default_port = HTTPS_PORT
1159
1160        def __init__(self, host, port=None, key_file=None, cert_file=None,
1161                     strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1162                     source_address=None):
1163            HTTPConnection.__init__(self, host, port, strict, timeout,
1164                                    source_address)
1165            self.key_file = key_file
1166            self.cert_file = cert_file
1167
1168        def connect(self):
1169            "Connect to a host on a given (SSL) port."
1170
1171            sock = socket.create_connection((self.host, self.port),
1172                                            self.timeout, self.source_address)
1173            if self._tunnel_host:
1174                self.sock = sock
1175                self._tunnel()
1176            self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1177
1178    __all__.append("HTTPSConnection")
1179
1180    class HTTPS(HTTP):
1181        """Compatibility with 1.5 httplib interface
1182
1183        Python 1.5.2 did not have an HTTPS class, but it defined an
1184        interface for sending http requests that is also useful for
1185        https.
1186        """
1187
1188        _connection_class = HTTPSConnection
1189
1190        def __init__(self, host='', port=None, key_file=None, cert_file=None,
1191                     strict=None):
1192            # provide a default host, pass the X509 cert info
1193
1194            # urf. compensate for bad input.
1195            if port == 0:
1196                port = None
1197            self._setup(self._connection_class(host, port, key_file,
1198                                               cert_file, strict))
1199
1200            # we never actually use these for anything, but we keep them
1201            # here for compatibility with post-1.5.2 CVS.
1202            self.key_file = key_file
1203            self.cert_file = cert_file
1204
1205
1206    def FakeSocket (sock, sslobj):
1207        warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1208                      "Use the result of ssl.wrap_socket() directly instead.",
1209                      DeprecationWarning, stacklevel=2)
1210        return sslobj
1211
1212
1213class HTTPException(Exception):
1214    # Subclasses that define an __init__ must call Exception.__init__
1215    # or define self.args.  Otherwise, str() will fail.
1216    pass
1217
1218class NotConnected(HTTPException):
1219    pass
1220
1221class InvalidURL(HTTPException):
1222    pass
1223
1224class UnknownProtocol(HTTPException):
1225    def __init__(self, version):
1226        self.args = version,
1227        self.version = version
1228
1229class UnknownTransferEncoding(HTTPException):
1230    pass
1231
1232class UnimplementedFileMode(HTTPException):
1233    pass
1234
1235class IncompleteRead(HTTPException):
1236    def __init__(self, partial, expected=None):
1237        self.args = partial,
1238        self.partial = partial
1239        self.expected = expected
1240    def __repr__(self):
1241        if self.expected is not None:
1242            e = ', %i more expected' % self.expected
1243        else:
1244            e = ''
1245        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1246    def __str__(self):
1247        return repr(self)
1248
1249class ImproperConnectionState(HTTPException):
1250    pass
1251
1252class CannotSendRequest(ImproperConnectionState):
1253    pass
1254
1255class CannotSendHeader(ImproperConnectionState):
1256    pass
1257
1258class ResponseNotReady(ImproperConnectionState):
1259    pass
1260
1261class BadStatusLine(HTTPException):
1262    def __init__(self, line):
1263        if not line:
1264            line = repr(line)
1265        self.args = line,
1266        self.line = line
1267
1268class LineTooLong(HTTPException):
1269    def __init__(self, line_type):
1270        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1271                                     % (_MAXLINE, line_type))
1272
1273# for backwards compatibility
1274error = HTTPException
1275
1276class LineAndFileWrapper:
1277    """A limited file-like object for HTTP/0.9 responses."""
1278
1279    # The status-line parsing code calls readline(), which normally
1280    # get the HTTP status line.  For a 0.9 response, however, this is
1281    # actually the first line of the body!  Clients need to get a
1282    # readable file object that contains that line.
1283
1284    def __init__(self, line, file):
1285        self._line = line
1286        self._file = file
1287        self._line_consumed = 0
1288        self._line_offset = 0
1289        self._line_left = len(line)
1290
1291    def __getattr__(self, attr):
1292        return getattr(self._file, attr)
1293
1294    def _done(self):
1295        # called when the last byte is read from the line.  After the
1296        # call, all read methods are delegated to the underlying file
1297        # object.
1298        self._line_consumed = 1
1299        self.read = self._file.read
1300        self.readline = self._file.readline
1301        self.readlines = self._file.readlines
1302
1303    def read(self, amt=None):
1304        if self._line_consumed:
1305            return self._file.read(amt)
1306        assert self._line_left
1307        if amt is None or amt > self._line_left:
1308            s = self._line[self._line_offset:]
1309            self._done()
1310            if amt is None:
1311                return s + self._file.read()
1312            else:
1313                return s + self._file.read(amt - len(s))
1314        else:
1315            assert amt <= self._line_left
1316            i = self._line_offset
1317            j = i + amt
1318            s = self._line[i:j]
1319            self._line_offset = j
1320            self._line_left -= amt
1321            if self._line_left == 0:
1322                self._done()
1323            return s
1324
1325    def readline(self):
1326        if self._line_consumed:
1327            return self._file.readline()
1328        assert self._line_left
1329        s = self._line[self._line_offset:]
1330        self._done()
1331        return s
1332
1333    def readlines(self, size=None):
1334        if self._line_consumed:
1335            return self._file.readlines(size)
1336        assert self._line_left
1337        L = [self._line[self._line_offset:]]
1338        self._done()
1339        if size is None:
1340            return L + self._file.readlines()
1341        else:
1342            return L + self._file.readlines(size)
1343