1"""CGI-savvy HTTP Server.
2
3This module builds on SimpleHTTPServer by implementing GET and POST
4requests to cgi-bin scripts.
5
6If the os.fork() function is not present (e.g. on Windows),
7os.popen2() is used as a fallback, with slightly altered semantics; if
8that function is not present either (e.g. on Macintosh), only Python
9scripts are supported, and they are executed by the current process.
10
11In all cases, the implementation is intentionally naive -- all
12requests are executed sychronously.
13
14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
15-- it may execute arbitrary Python code or external programs.
16
17Note that status code 200 is sent prior to execution of a CGI script, so
18scripts cannot send other status codes such as 302 (redirect).
19"""
20
21
22__version__ = "0.4"
23
24__all__ = ["CGIHTTPRequestHandler"]
25
26import os
27import sys
28import urllib
29import BaseHTTPServer
30import SimpleHTTPServer
31import select
32import copy
33
34
35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
36
37    """Complete HTTP server with GET, HEAD and POST commands.
38
39    GET and HEAD also support running CGI scripts.
40
41    The POST command is *only* implemented for CGI scripts.
42
43    """
44
45    # Determine platform specifics
46    have_fork = hasattr(os, 'fork')
47    have_popen2 = hasattr(os, 'popen2')
48    have_popen3 = hasattr(os, 'popen3')
49
50    # Make rfile unbuffered -- we need to read one line and then pass
51    # the rest to a subprocess, so we can't use buffered input.
52    rbufsize = 0
53
54    def do_POST(self):
55        """Serve a POST request.
56
57        This is only implemented for CGI scripts.
58
59        """
60
61        if self.is_cgi():
62            self.run_cgi()
63        else:
64            self.send_error(501, "Can only POST to CGI scripts")
65
66    def send_head(self):
67        """Version of send_head that support CGI scripts"""
68        if self.is_cgi():
69            return self.run_cgi()
70        else:
71            return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
72
73    def is_cgi(self):
74        """Test whether self.path corresponds to a CGI script.
75
76        Returns True and updates the cgi_info attribute to the tuple
77        (dir, rest) if self.path requires running a CGI script.
78        Returns False otherwise.
79
80        If any exception is raised, the caller should assume that
81        self.path was rejected as invalid and act accordingly.
82
83        The default implementation tests whether the normalized url
84        path begins with one of the strings in self.cgi_directories
85        (and the next character is a '/' or the end of the string).
86        """
87        collapsed_path = _url_collapse_path(self.path)
88        dir_sep = collapsed_path.find('/', 1)
89        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
90        if head in self.cgi_directories:
91            self.cgi_info = head, tail
92            return True
93        return False
94
95    cgi_directories = ['/cgi-bin', '/htbin']
96
97    def is_executable(self, path):
98        """Test whether argument path is an executable file."""
99        return executable(path)
100
101    def is_python(self, path):
102        """Test whether argument path is a Python script."""
103        head, tail = os.path.splitext(path)
104        return tail.lower() in (".py", ".pyw")
105
106    def run_cgi(self):
107        """Execute a CGI script."""
108        dir, rest = self.cgi_info
109        path = dir + '/' + rest
110        i = path.find('/', len(dir)+1)
111        while i >= 0:
112            nextdir = path[:i]
113            nextrest = path[i+1:]
114
115            scriptdir = self.translate_path(nextdir)
116            if os.path.isdir(scriptdir):
117                dir, rest = nextdir, nextrest
118                i = path.find('/', len(dir)+1)
119            else:
120                break
121
122        # find an explicit query string, if present.
123        rest, _, query = rest.partition('?')
124
125        # dissect the part after the directory name into a script name &
126        # a possible additional path, to be stored in PATH_INFO.
127        i = rest.find('/')
128        if i >= 0:
129            script, rest = rest[:i], rest[i:]
130        else:
131            script, rest = rest, ''
132
133        scriptname = dir + '/' + script
134        scriptfile = self.translate_path(scriptname)
135        if not os.path.exists(scriptfile):
136            self.send_error(404, "No such CGI script (%r)" % scriptname)
137            return
138        if not os.path.isfile(scriptfile):
139            self.send_error(403, "CGI script is not a plain file (%r)" %
140                            scriptname)
141            return
142        ispy = self.is_python(scriptname)
143        if not ispy:
144            if not (self.have_fork or self.have_popen2 or self.have_popen3):
145                self.send_error(403, "CGI script is not a Python script (%r)" %
146                                scriptname)
147                return
148            if not self.is_executable(scriptfile):
149                self.send_error(403, "CGI script is not executable (%r)" %
150                                scriptname)
151                return
152
153        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
154        # XXX Much of the following could be prepared ahead of time!
155        env = copy.deepcopy(os.environ)
156        env['SERVER_SOFTWARE'] = self.version_string()
157        env['SERVER_NAME'] = self.server.server_name
158        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
159        env['SERVER_PROTOCOL'] = self.protocol_version
160        env['SERVER_PORT'] = str(self.server.server_port)
161        env['REQUEST_METHOD'] = self.command
162        uqrest = urllib.unquote(rest)
163        env['PATH_INFO'] = uqrest
164        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
165        env['SCRIPT_NAME'] = scriptname
166        if query:
167            env['QUERY_STRING'] = query
168        host = self.address_string()
169        if host != self.client_address[0]:
170            env['REMOTE_HOST'] = host
171        env['REMOTE_ADDR'] = self.client_address[0]
172        authorization = self.headers.getheader("authorization")
173        if authorization:
174            authorization = authorization.split()
175            if len(authorization) == 2:
176                import base64, binascii
177                env['AUTH_TYPE'] = authorization[0]
178                if authorization[0].lower() == "basic":
179                    try:
180                        authorization = base64.decodestring(authorization[1])
181                    except binascii.Error:
182                        pass
183                    else:
184                        authorization = authorization.split(':')
185                        if len(authorization) == 2:
186                            env['REMOTE_USER'] = authorization[0]
187        # XXX REMOTE_IDENT
188        if self.headers.typeheader is None:
189            env['CONTENT_TYPE'] = self.headers.type
190        else:
191            env['CONTENT_TYPE'] = self.headers.typeheader
192        length = self.headers.getheader('content-length')
193        if length:
194            env['CONTENT_LENGTH'] = length
195        referer = self.headers.getheader('referer')
196        if referer:
197            env['HTTP_REFERER'] = referer
198        accept = []
199        for line in self.headers.getallmatchingheaders('accept'):
200            if line[:1] in "\t\n\r ":
201                accept.append(line.strip())
202            else:
203                accept = accept + line[7:].split(',')
204        env['HTTP_ACCEPT'] = ','.join(accept)
205        ua = self.headers.getheader('user-agent')
206        if ua:
207            env['HTTP_USER_AGENT'] = ua
208        co = filter(None, self.headers.getheaders('cookie'))
209        if co:
210            env['HTTP_COOKIE'] = ', '.join(co)
211        # XXX Other HTTP_* headers
212        # Since we're setting the env in the parent, provide empty
213        # values to override previously set values
214        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
215                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
216            env.setdefault(k, "")
217
218        self.send_response(200, "Script output follows")
219
220        decoded_query = query.replace('+', ' ')
221
222        if self.have_fork:
223            # Unix -- fork as we should
224            args = [script]
225            if '=' not in decoded_query:
226                args.append(decoded_query)
227            nobody = nobody_uid()
228            self.wfile.flush() # Always flush before forking
229            pid = os.fork()
230            if pid != 0:
231                # Parent
232                pid, sts = os.waitpid(pid, 0)
233                # throw away additional data [see bug #427345]
234                while select.select([self.rfile], [], [], 0)[0]:
235                    if not self.rfile.read(1):
236                        break
237                if sts:
238                    self.log_error("CGI script exit status %#x", sts)
239                return
240            # Child
241            try:
242                try:
243                    os.setuid(nobody)
244                except os.error:
245                    pass
246                os.dup2(self.rfile.fileno(), 0)
247                os.dup2(self.wfile.fileno(), 1)
248                os.execve(scriptfile, args, env)
249            except:
250                self.server.handle_error(self.request, self.client_address)
251                os._exit(127)
252
253        else:
254            # Non Unix - use subprocess
255            import subprocess
256            cmdline = [scriptfile]
257            if self.is_python(scriptfile):
258                interp = sys.executable
259                if interp.lower().endswith("w.exe"):
260                    # On Windows, use python.exe, not pythonw.exe
261                    interp = interp[:-5] + interp[-4:]
262                cmdline = [interp, '-u'] + cmdline
263            if '=' not in query:
264                cmdline.append(query)
265
266            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
267            try:
268                nbytes = int(length)
269            except (TypeError, ValueError):
270                nbytes = 0
271            p = subprocess.Popen(cmdline,
272                                 stdin = subprocess.PIPE,
273                                 stdout = subprocess.PIPE,
274                                 stderr = subprocess.PIPE,
275                                 env = env
276                                )
277            if self.command.lower() == "post" and nbytes > 0:
278                data = self.rfile.read(nbytes)
279            else:
280                data = None
281            # throw away additional data [see bug #427345]
282            while select.select([self.rfile._sock], [], [], 0)[0]:
283                if not self.rfile._sock.recv(1):
284                    break
285            stdout, stderr = p.communicate(data)
286            self.wfile.write(stdout)
287            if stderr:
288                self.log_error('%s', stderr)
289            p.stderr.close()
290            p.stdout.close()
291            status = p.returncode
292            if status:
293                self.log_error("CGI script exit status %#x", status)
294            else:
295                self.log_message("CGI script exited OK")
296
297
298def _url_collapse_path(path):
299    """
300    Given a URL path, remove extra '/'s and '.' path elements and collapse
301    any '..' references and returns a colllapsed path.
302
303    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
304    The utility of this function is limited to is_cgi method and helps
305    preventing some security attacks.
306
307    Returns: The reconstituted URL, which will always start with a '/'.
308
309    Raises: IndexError if too many '..' occur within the path.
310
311    """
312    # Query component should not be involved.
313    path, _, query = path.partition('?')
314    path = urllib.unquote(path)
315
316    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
317    # path semantics rather than local operating system semantics.
318    path_parts = path.split('/')
319    head_parts = []
320    for part in path_parts[:-1]:
321        if part == '..':
322            head_parts.pop() # IndexError if more '..' than prior parts
323        elif part and part != '.':
324            head_parts.append( part )
325    if path_parts:
326        tail_part = path_parts.pop()
327        if tail_part:
328            if tail_part == '..':
329                head_parts.pop()
330                tail_part = ''
331            elif tail_part == '.':
332                tail_part = ''
333    else:
334        tail_part = ''
335
336    if query:
337        tail_part = '?'.join((tail_part, query))
338
339    splitpath = ('/' + '/'.join(head_parts), tail_part)
340    collapsed_path = "/".join(splitpath)
341
342    return collapsed_path
343
344
345nobody = None
346
347def nobody_uid():
348    """Internal routine to get nobody's uid"""
349    global nobody
350    if nobody:
351        return nobody
352    try:
353        import pwd
354    except ImportError:
355        return -1
356    try:
357        nobody = pwd.getpwnam('nobody')[2]
358    except KeyError:
359        nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
360    return nobody
361
362
363def executable(path):
364    """Test for executable file."""
365    try:
366        st = os.stat(path)
367    except os.error:
368        return False
369    return st.st_mode & 0111 != 0
370
371
372def test(HandlerClass = CGIHTTPRequestHandler,
373         ServerClass = BaseHTTPServer.HTTPServer):
374    SimpleHTTPServer.test(HandlerClass, ServerClass)
375
376
377if __name__ == '__main__':
378    test()
379