1"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).  It does, however, optionally implement HTTP/1.1
6persistent connections, as of version 0.3.
7
8Contents:
9
10- BaseHTTPRequestHandler: HTTP request handler base class
11- test: test function
12
13XXX To do:
14
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18"""
19
20
21# See also:
22#
23# HTTP Working Group                                        T. Berners-Lee
24# INTERNET-DRAFT                                            R. T. Fielding
25# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
26# Expires September 8, 1995                                  March 8, 1995
27#
28# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
29#
30# and
31#
32# Network Working Group                                      R. Fielding
33# Request for Comments: 2616                                       et al
34# Obsoletes: 2068                                              June 1999
35# Category: Standards Track
36#
37# URL: http://www.faqs.org/rfcs/rfc2616.html
38
39# Log files
40# ---------
41#
42# Here's a quote from the NCSA httpd docs about log file format.
43#
44# | The logfile format is as follows. Each line consists of:
45# |
46# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
47# |
48# |        host: Either the DNS name or the IP number of the remote client
49# |        rfc931: Any information returned by identd for this person,
50# |                - otherwise.
51# |        authuser: If user sent a userid for authentication, the user name,
52# |                  - otherwise.
53# |        DD: Day
54# |        Mon: Month (calendar name)
55# |        YYYY: Year
56# |        hh: hour (24-hour format, the machine's timezone)
57# |        mm: minutes
58# |        ss: seconds
59# |        request: The first line of the HTTP request as sent by the client.
60# |        ddd: the status code returned by the server, - if not available.
61# |        bbbb: the total number of bytes sent,
62# |              *not including the HTTP/1.0 header*, - if not available
63# |
64# | You can determine the name of the file accessed through request.
65#
66# (Actually, the latter is only true if you know the server configuration
67# at the time the request was made!)
68
69__version__ = "0.3"
70
71__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
72
73import sys
74import time
75import socket # For gethostbyaddr()
76from warnings import filterwarnings, catch_warnings
77with catch_warnings():
78    if sys.py3kwarning:
79        filterwarnings("ignore", ".*mimetools has been removed",
80                        DeprecationWarning)
81    import mimetools
82import SocketServer
83
84# Default error message template
85DEFAULT_ERROR_MESSAGE = """\
86<head>
87<title>Error response</title>
88</head>
89<body>
90<h1>Error response</h1>
91<p>Error code %(code)d.
92<p>Message: %(message)s.
93<p>Error code explanation: %(code)s = %(explain)s.
94</body>
95"""
96
97DEFAULT_ERROR_CONTENT_TYPE = "text/html"
98
99def _quote_html(html):
100    return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
101
102class HTTPServer(SocketServer.TCPServer):
103
104    allow_reuse_address = 1    # Seems to make sense in testing environment
105
106    def server_bind(self):
107        """Override server_bind to store the server name."""
108        SocketServer.TCPServer.server_bind(self)
109        host, port = self.socket.getsockname()[:2]
110        self.server_name = socket.getfqdn(host)
111        self.server_port = port
112
113
114class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
115
116    """HTTP request handler base class.
117
118    The following explanation of HTTP serves to guide you through the
119    code as well as to expose any misunderstandings I may have about
120    HTTP (so you don't need to read the code to figure out I'm wrong
121    :-).
122
123    HTTP (HyperText Transfer Protocol) is an extensible protocol on
124    top of a reliable stream transport (e.g. TCP/IP).  The protocol
125    recognizes three parts to a request:
126
127    1. One line identifying the request type and path
128    2. An optional set of RFC-822-style headers
129    3. An optional data part
130
131    The headers and data are separated by a blank line.
132
133    The first line of the request has the form
134
135    <command> <path> <version>
136
137    where <command> is a (case-sensitive) keyword such as GET or POST,
138    <path> is a string containing path information for the request,
139    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
140    <path> is encoded using the URL encoding scheme (using %xx to signify
141    the ASCII character with hex code xx).
142
143    The specification specifies that lines are separated by CRLF but
144    for compatibility with the widest range of clients recommends
145    servers also handle LF.  Similarly, whitespace in the request line
146    is treated sensibly (allowing multiple spaces between components
147    and allowing trailing whitespace).
148
149    Similarly, for output, lines ought to be separated by CRLF pairs
150    but most clients grok LF characters just fine.
151
152    If the first line of the request has the form
153
154    <command> <path>
155
156    (i.e. <version> is left out) then this is assumed to be an HTTP
157    0.9 request; this form has no optional headers and data part and
158    the reply consists of just the data.
159
160    The reply form of the HTTP 1.x protocol again has three parts:
161
162    1. One line giving the response code
163    2. An optional set of RFC-822-style headers
164    3. The data
165
166    Again, the headers and data are separated by a blank line.
167
168    The response code line has the form
169
170    <version> <responsecode> <responsestring>
171
172    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
173    <responsecode> is a 3-digit response code indicating success or
174    failure of the request, and <responsestring> is an optional
175    human-readable string explaining what the response code means.
176
177    This server parses the request and the headers, and then calls a
178    function specific to the request type (<command>).  Specifically,
179    a request SPAM will be handled by a method do_SPAM().  If no
180    such method exists the server sends an error response to the
181    client.  If it exists, it is called with no arguments:
182
183    do_SPAM()
184
185    Note that the request name is case sensitive (i.e. SPAM and spam
186    are different requests).
187
188    The various request details are stored in instance variables:
189
190    - client_address is the client IP address in the form (host,
191    port);
192
193    - command, path and version are the broken-down request line;
194
195    - headers is an instance of mimetools.Message (or a derived
196    class) containing the header information;
197
198    - rfile is a file object open for reading positioned at the
199    start of the optional input data part;
200
201    - wfile is a file object open for writing.
202
203    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
204
205    The first thing to be written must be the response line.  Then
206    follow 0 or more header lines, then a blank line, and then the
207    actual data (if any).  The meaning of the header lines depends on
208    the command executed by the server; in most cases, when data is
209    returned, there should be at least one header line of the form
210
211    Content-type: <type>/<subtype>
212
213    where <type> and <subtype> should be registered MIME types,
214    e.g. "text/html" or "text/plain".
215
216    """
217
218    # The Python system version, truncated to its first component.
219    sys_version = "Python/" + sys.version.split()[0]
220
221    # The server software version.  You may want to override this.
222    # The format is multiple whitespace-separated strings,
223    # where each string is of the form name[/version].
224    server_version = "BaseHTTP/" + __version__
225
226    # The default request version.  This only affects responses up until
227    # the point where the request line is parsed, so it mainly decides what
228    # the client gets back when sending a malformed request line.
229    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
230    default_request_version = "HTTP/0.9"
231
232    def parse_request(self):
233        """Parse a request (internal).
234
235        The request should be stored in self.raw_requestline; the results
236        are in self.command, self.path, self.request_version and
237        self.headers.
238
239        Return True for success, False for failure; on failure, an
240        error is sent back.
241
242        """
243        self.command = None  # set in case of error on the first line
244        self.request_version = version = self.default_request_version
245        self.close_connection = 1
246        requestline = self.raw_requestline
247        requestline = requestline.rstrip('\r\n')
248        self.requestline = requestline
249        words = requestline.split()
250        if len(words) == 3:
251            command, path, version = words
252            if version[:5] != 'HTTP/':
253                self.send_error(400, "Bad request version (%r)" % version)
254                return False
255            try:
256                base_version_number = version.split('/', 1)[1]
257                version_number = base_version_number.split(".")
258                # RFC 2145 section 3.1 says there can be only one "." and
259                #   - major and minor numbers MUST be treated as
260                #      separate integers;
261                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
262                #      turn is lower than HTTP/12.3;
263                #   - Leading zeros MUST be ignored by recipients.
264                if len(version_number) != 2:
265                    raise ValueError
266                version_number = int(version_number[0]), int(version_number[1])
267            except (ValueError, IndexError):
268                self.send_error(400, "Bad request version (%r)" % version)
269                return False
270            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
271                self.close_connection = 0
272            if version_number >= (2, 0):
273                self.send_error(505,
274                          "Invalid HTTP Version (%s)" % base_version_number)
275                return False
276        elif len(words) == 2:
277            command, path = words
278            self.close_connection = 1
279            if command != 'GET':
280                self.send_error(400,
281                                "Bad HTTP/0.9 request type (%r)" % command)
282                return False
283        elif not words:
284            return False
285        else:
286            self.send_error(400, "Bad request syntax (%r)" % requestline)
287            return False
288        self.command, self.path, self.request_version = command, path, version
289
290        # Examine the headers and look for a Connection directive
291        self.headers = self.MessageClass(self.rfile, 0)
292
293        conntype = self.headers.get('Connection', "")
294        if conntype.lower() == 'close':
295            self.close_connection = 1
296        elif (conntype.lower() == 'keep-alive' and
297              self.protocol_version >= "HTTP/1.1"):
298            self.close_connection = 0
299        return True
300
301    def handle_one_request(self):
302        """Handle a single HTTP request.
303
304        You normally don't need to override this method; see the class
305        __doc__ string for information on how to handle specific HTTP
306        commands such as GET and POST.
307
308        """
309        try:
310            self.raw_requestline = self.rfile.readline(65537)
311            if len(self.raw_requestline) > 65536:
312                self.requestline = ''
313                self.request_version = ''
314                self.command = ''
315                self.send_error(414)
316                return
317            if not self.raw_requestline:
318                self.close_connection = 1
319                return
320            if not self.parse_request():
321                # An error code has been sent, just exit
322                return
323            mname = 'do_' + self.command
324            if not hasattr(self, mname):
325                self.send_error(501, "Unsupported method (%r)" % self.command)
326                return
327            method = getattr(self, mname)
328            method()
329            self.wfile.flush() #actually send the response if not already done.
330        except socket.timeout, e:
331            #a read or a write timed out.  Discard this connection
332            self.log_error("Request timed out: %r", e)
333            self.close_connection = 1
334            return
335
336    def handle(self):
337        """Handle multiple requests if necessary."""
338        self.close_connection = 1
339
340        self.handle_one_request()
341        while not self.close_connection:
342            self.handle_one_request()
343
344    def send_error(self, code, message=None):
345        """Send and log an error reply.
346
347        Arguments are the error code, and a detailed message.
348        The detailed message defaults to the short entry matching the
349        response code.
350
351        This sends an error response (so it must be called before any
352        output has been generated), logs the error, and finally sends
353        a piece of HTML explaining the error to the user.
354
355        """
356
357        try:
358            short, long = self.responses[code]
359        except KeyError:
360            short, long = '???', '???'
361        if message is None:
362            message = short
363        explain = long
364        self.log_error("code %d, message %s", code, message)
365        # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
366        content = (self.error_message_format %
367                   {'code': code, 'message': _quote_html(message), 'explain': explain})
368        self.send_response(code, message)
369        self.send_header("Content-Type", self.error_content_type)
370        self.send_header('Connection', 'close')
371        self.end_headers()
372        if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
373            self.wfile.write(content)
374
375    error_message_format = DEFAULT_ERROR_MESSAGE
376    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
377
378    def send_response(self, code, message=None):
379        """Send the response header and log the response code.
380
381        Also send two standard headers with the server software
382        version and the current date.
383
384        """
385        self.log_request(code)
386        if message is None:
387            if code in self.responses:
388                message = self.responses[code][0]
389            else:
390                message = ''
391        if self.request_version != 'HTTP/0.9':
392            self.wfile.write("%s %d %s\r\n" %
393                             (self.protocol_version, code, message))
394            # print (self.protocol_version, code, message)
395        self.send_header('Server', self.version_string())
396        self.send_header('Date', self.date_time_string())
397
398    def send_header(self, keyword, value):
399        """Send a MIME header."""
400        if self.request_version != 'HTTP/0.9':
401            self.wfile.write("%s: %s\r\n" % (keyword, value))
402
403        if keyword.lower() == 'connection':
404            if value.lower() == 'close':
405                self.close_connection = 1
406            elif value.lower() == 'keep-alive':
407                self.close_connection = 0
408
409    def end_headers(self):
410        """Send the blank line ending the MIME headers."""
411        if self.request_version != 'HTTP/0.9':
412            self.wfile.write("\r\n")
413
414    def log_request(self, code='-', size='-'):
415        """Log an accepted request.
416
417        This is called by send_response().
418
419        """
420
421        self.log_message('"%s" %s %s',
422                         self.requestline, str(code), str(size))
423
424    def log_error(self, format, *args):
425        """Log an error.
426
427        This is called when a request cannot be fulfilled.  By
428        default it passes the message on to log_message().
429
430        Arguments are the same as for log_message().
431
432        XXX This should go to the separate error log.
433
434        """
435
436        self.log_message(format, *args)
437
438    def log_message(self, format, *args):
439        """Log an arbitrary message.
440
441        This is used by all other logging functions.  Override
442        it if you have specific logging wishes.
443
444        The first argument, FORMAT, is a format string for the
445        message to be logged.  If the format string contains
446        any % escapes requiring parameters, they should be
447        specified as subsequent arguments (it's just like
448        printf!).
449
450        The client ip address and current date/time are prefixed to every
451        message.
452
453        """
454
455        sys.stderr.write("%s - - [%s] %s\n" %
456                         (self.client_address[0],
457                          self.log_date_time_string(),
458                          format%args))
459
460    def version_string(self):
461        """Return the server software version string."""
462        return self.server_version + ' ' + self.sys_version
463
464    def date_time_string(self, timestamp=None):
465        """Return the current date and time formatted for a message header."""
466        if timestamp is None:
467            timestamp = time.time()
468        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
469        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
470                self.weekdayname[wd],
471                day, self.monthname[month], year,
472                hh, mm, ss)
473        return s
474
475    def log_date_time_string(self):
476        """Return the current time formatted for logging."""
477        now = time.time()
478        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
479        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
480                day, self.monthname[month], year, hh, mm, ss)
481        return s
482
483    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
484
485    monthname = [None,
486                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
487                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
488
489    def address_string(self):
490        """Return the client address formatted for logging.
491
492        This version looks up the full hostname using gethostbyaddr(),
493        and tries to find a name that contains at least one dot.
494
495        """
496
497        host, port = self.client_address[:2]
498        return socket.getfqdn(host)
499
500    # Essentially static class variables
501
502    # The version of the HTTP protocol we support.
503    # Set this to HTTP/1.1 to enable automatic keepalive
504    protocol_version = "HTTP/1.0"
505
506    # The Message-like class used to parse headers
507    MessageClass = mimetools.Message
508
509    # Table mapping response codes to messages; entries have the
510    # form {code: (shortmessage, longmessage)}.
511    # See RFC 2616.
512    responses = {
513        100: ('Continue', 'Request received, please continue'),
514        101: ('Switching Protocols',
515              'Switching to new protocol; obey Upgrade header'),
516
517        200: ('OK', 'Request fulfilled, document follows'),
518        201: ('Created', 'Document created, URL follows'),
519        202: ('Accepted',
520              'Request accepted, processing continues off-line'),
521        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
522        204: ('No Content', 'Request fulfilled, nothing follows'),
523        205: ('Reset Content', 'Clear input form for further input.'),
524        206: ('Partial Content', 'Partial content follows.'),
525
526        300: ('Multiple Choices',
527              'Object has several resources -- see URI list'),
528        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
529        302: ('Found', 'Object moved temporarily -- see URI list'),
530        303: ('See Other', 'Object moved -- see Method and URL list'),
531        304: ('Not Modified',
532              'Document has not changed since given time'),
533        305: ('Use Proxy',
534              'You must use proxy specified in Location to access this '
535              'resource.'),
536        307: ('Temporary Redirect',
537              'Object moved temporarily -- see URI list'),
538
539        400: ('Bad Request',
540              'Bad request syntax or unsupported method'),
541        401: ('Unauthorized',
542              'No permission -- see authorization schemes'),
543        402: ('Payment Required',
544              'No payment -- see charging schemes'),
545        403: ('Forbidden',
546              'Request forbidden -- authorization will not help'),
547        404: ('Not Found', 'Nothing matches the given URI'),
548        405: ('Method Not Allowed',
549              'Specified method is invalid for this resource.'),
550        406: ('Not Acceptable', 'URI not available in preferred format.'),
551        407: ('Proxy Authentication Required', 'You must authenticate with '
552              'this proxy before proceeding.'),
553        408: ('Request Timeout', 'Request timed out; try again later.'),
554        409: ('Conflict', 'Request conflict.'),
555        410: ('Gone',
556              'URI no longer exists and has been permanently removed.'),
557        411: ('Length Required', 'Client must specify Content-Length.'),
558        412: ('Precondition Failed', 'Precondition in headers is false.'),
559        413: ('Request Entity Too Large', 'Entity is too large.'),
560        414: ('Request-URI Too Long', 'URI is too long.'),
561        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
562        416: ('Requested Range Not Satisfiable',
563              'Cannot satisfy request range.'),
564        417: ('Expectation Failed',
565              'Expect condition could not be satisfied.'),
566
567        500: ('Internal Server Error', 'Server got itself in trouble'),
568        501: ('Not Implemented',
569              'Server does not support this operation'),
570        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
571        503: ('Service Unavailable',
572              'The server cannot process the request due to a high load'),
573        504: ('Gateway Timeout',
574              'The gateway server did not receive a timely response'),
575        505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
576        }
577
578
579def test(HandlerClass = BaseHTTPRequestHandler,
580         ServerClass = HTTPServer, protocol="HTTP/1.0"):
581    """Test the HTTP request handler class.
582
583    This runs an HTTP server on port 8000 (or the first command line
584    argument).
585
586    """
587
588    if sys.argv[1:]:
589        port = int(sys.argv[1])
590    else:
591        port = 8000
592    server_address = ('', port)
593
594    HandlerClass.protocol_version = protocol
595    httpd = ServerClass(server_address, HandlerClass)
596
597    sa = httpd.socket.getsockname()
598    print "Serving HTTP on", sa[0], "port", sa[1], "..."
599    httpd.serve_forever()
600
601
602if __name__ == '__main__':
603    test()
604