1"""CGI-savvy HTTP Server. 2 3This module builds on SimpleHTTPServer by implementing GET and POST 4requests to cgi-bin scripts. 5 6If the os.fork() function is not present (e.g. on Windows), 7os.popen2() is used as a fallback, with slightly altered semantics; if 8that function is not present either (e.g. on Macintosh), only Python 9scripts are supported, and they are executed by the current process. 10 11In all cases, the implementation is intentionally naive -- all 12requests are executed sychronously. 13 14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 15-- it may execute arbitrary Python code or external programs. 16 17Note that status code 200 is sent prior to execution of a CGI script, so 18scripts cannot send other status codes such as 302 (redirect). 19""" 20 21 22__version__ = "0.4" 23 24__all__ = ["CGIHTTPRequestHandler"] 25 26import os 27import sys 28import urllib 29import BaseHTTPServer 30import SimpleHTTPServer 31import select 32import copy 33 34 35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 36 37 """Complete HTTP server with GET, HEAD and POST commands. 38 39 GET and HEAD also support running CGI scripts. 40 41 The POST command is *only* implemented for CGI scripts. 42 43 """ 44 45 # Determine platform specifics 46 have_fork = hasattr(os, 'fork') 47 have_popen2 = hasattr(os, 'popen2') 48 have_popen3 = hasattr(os, 'popen3') 49 50 # Make rfile unbuffered -- we need to read one line and then pass 51 # the rest to a subprocess, so we can't use buffered input. 52 rbufsize = 0 53 54 def do_POST(self): 55 """Serve a POST request. 56 57 This is only implemented for CGI scripts. 58 59 """ 60 61 if self.is_cgi(): 62 self.run_cgi() 63 else: 64 self.send_error(501, "Can only POST to CGI scripts") 65 66 def send_head(self): 67 """Version of send_head that support CGI scripts""" 68 if self.is_cgi(): 69 return self.run_cgi() 70 else: 71 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) 72 73 def is_cgi(self): 74 """Test whether self.path corresponds to a CGI script. 75 76 Returns True and updates the cgi_info attribute to the tuple 77 (dir, rest) if self.path requires running a CGI script. 78 Returns False otherwise. 79 80 If any exception is raised, the caller should assume that 81 self.path was rejected as invalid and act accordingly. 82 83 The default implementation tests whether the normalized url 84 path begins with one of the strings in self.cgi_directories 85 (and the next character is a '/' or the end of the string). 86 """ 87 collapsed_path = _url_collapse_path(self.path) 88 dir_sep = collapsed_path.find('/', 1) 89 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 90 if head in self.cgi_directories: 91 self.cgi_info = head, tail 92 return True 93 return False 94 95 cgi_directories = ['/cgi-bin', '/htbin'] 96 97 def is_executable(self, path): 98 """Test whether argument path is an executable file.""" 99 return executable(path) 100 101 def is_python(self, path): 102 """Test whether argument path is a Python script.""" 103 head, tail = os.path.splitext(path) 104 return tail.lower() in (".py", ".pyw") 105 106 def run_cgi(self): 107 """Execute a CGI script.""" 108 dir, rest = self.cgi_info 109 path = dir + '/' + rest 110 i = path.find('/', len(dir)+1) 111 while i >= 0: 112 nextdir = path[:i] 113 nextrest = path[i+1:] 114 115 scriptdir = self.translate_path(nextdir) 116 if os.path.isdir(scriptdir): 117 dir, rest = nextdir, nextrest 118 i = path.find('/', len(dir)+1) 119 else: 120 break 121 122 # find an explicit query string, if present. 123 rest, _, query = rest.partition('?') 124 125 # dissect the part after the directory name into a script name & 126 # a possible additional path, to be stored in PATH_INFO. 127 i = rest.find('/') 128 if i >= 0: 129 script, rest = rest[:i], rest[i:] 130 else: 131 script, rest = rest, '' 132 133 scriptname = dir + '/' + script 134 scriptfile = self.translate_path(scriptname) 135 if not os.path.exists(scriptfile): 136 self.send_error(404, "No such CGI script (%r)" % scriptname) 137 return 138 if not os.path.isfile(scriptfile): 139 self.send_error(403, "CGI script is not a plain file (%r)" % 140 scriptname) 141 return 142 ispy = self.is_python(scriptname) 143 if not ispy: 144 if not (self.have_fork or self.have_popen2 or self.have_popen3): 145 self.send_error(403, "CGI script is not a Python script (%r)" % 146 scriptname) 147 return 148 if not self.is_executable(scriptfile): 149 self.send_error(403, "CGI script is not executable (%r)" % 150 scriptname) 151 return 152 153 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 154 # XXX Much of the following could be prepared ahead of time! 155 env = copy.deepcopy(os.environ) 156 env['SERVER_SOFTWARE'] = self.version_string() 157 env['SERVER_NAME'] = self.server.server_name 158 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 159 env['SERVER_PROTOCOL'] = self.protocol_version 160 env['SERVER_PORT'] = str(self.server.server_port) 161 env['REQUEST_METHOD'] = self.command 162 uqrest = urllib.unquote(rest) 163 env['PATH_INFO'] = uqrest 164 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 165 env['SCRIPT_NAME'] = scriptname 166 if query: 167 env['QUERY_STRING'] = query 168 host = self.address_string() 169 if host != self.client_address[0]: 170 env['REMOTE_HOST'] = host 171 env['REMOTE_ADDR'] = self.client_address[0] 172 authorization = self.headers.getheader("authorization") 173 if authorization: 174 authorization = authorization.split() 175 if len(authorization) == 2: 176 import base64, binascii 177 env['AUTH_TYPE'] = authorization[0] 178 if authorization[0].lower() == "basic": 179 try: 180 authorization = base64.decodestring(authorization[1]) 181 except binascii.Error: 182 pass 183 else: 184 authorization = authorization.split(':') 185 if len(authorization) == 2: 186 env['REMOTE_USER'] = authorization[0] 187 # XXX REMOTE_IDENT 188 if self.headers.typeheader is None: 189 env['CONTENT_TYPE'] = self.headers.type 190 else: 191 env['CONTENT_TYPE'] = self.headers.typeheader 192 length = self.headers.getheader('content-length') 193 if length: 194 env['CONTENT_LENGTH'] = length 195 referer = self.headers.getheader('referer') 196 if referer: 197 env['HTTP_REFERER'] = referer 198 accept = [] 199 for line in self.headers.getallmatchingheaders('accept'): 200 if line[:1] in "\t\n\r ": 201 accept.append(line.strip()) 202 else: 203 accept = accept + line[7:].split(',') 204 env['HTTP_ACCEPT'] = ','.join(accept) 205 ua = self.headers.getheader('user-agent') 206 if ua: 207 env['HTTP_USER_AGENT'] = ua 208 co = filter(None, self.headers.getheaders('cookie')) 209 if co: 210 env['HTTP_COOKIE'] = ', '.join(co) 211 # XXX Other HTTP_* headers 212 # Since we're setting the env in the parent, provide empty 213 # values to override previously set values 214 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 215 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 216 env.setdefault(k, "") 217 218 self.send_response(200, "Script output follows") 219 220 decoded_query = query.replace('+', ' ') 221 222 if self.have_fork: 223 # Unix -- fork as we should 224 args = [script] 225 if '=' not in decoded_query: 226 args.append(decoded_query) 227 nobody = nobody_uid() 228 self.wfile.flush() # Always flush before forking 229 pid = os.fork() 230 if pid != 0: 231 # Parent 232 pid, sts = os.waitpid(pid, 0) 233 # throw away additional data [see bug #427345] 234 while select.select([self.rfile], [], [], 0)[0]: 235 if not self.rfile.read(1): 236 break 237 if sts: 238 self.log_error("CGI script exit status %#x", sts) 239 return 240 # Child 241 try: 242 try: 243 os.setuid(nobody) 244 except os.error: 245 pass 246 os.dup2(self.rfile.fileno(), 0) 247 os.dup2(self.wfile.fileno(), 1) 248 os.execve(scriptfile, args, env) 249 except: 250 self.server.handle_error(self.request, self.client_address) 251 os._exit(127) 252 253 else: 254 # Non Unix - use subprocess 255 import subprocess 256 cmdline = [scriptfile] 257 if self.is_python(scriptfile): 258 interp = sys.executable 259 if interp.lower().endswith("w.exe"): 260 # On Windows, use python.exe, not pythonw.exe 261 interp = interp[:-5] + interp[-4:] 262 cmdline = [interp, '-u'] + cmdline 263 if '=' not in query: 264 cmdline.append(query) 265 266 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 267 try: 268 nbytes = int(length) 269 except (TypeError, ValueError): 270 nbytes = 0 271 p = subprocess.Popen(cmdline, 272 stdin = subprocess.PIPE, 273 stdout = subprocess.PIPE, 274 stderr = subprocess.PIPE, 275 env = env 276 ) 277 if self.command.lower() == "post" and nbytes > 0: 278 data = self.rfile.read(nbytes) 279 else: 280 data = None 281 # throw away additional data [see bug #427345] 282 while select.select([self.rfile._sock], [], [], 0)[0]: 283 if not self.rfile._sock.recv(1): 284 break 285 stdout, stderr = p.communicate(data) 286 self.wfile.write(stdout) 287 if stderr: 288 self.log_error('%s', stderr) 289 p.stderr.close() 290 p.stdout.close() 291 status = p.returncode 292 if status: 293 self.log_error("CGI script exit status %#x", status) 294 else: 295 self.log_message("CGI script exited OK") 296 297 298def _url_collapse_path(path): 299 """ 300 Given a URL path, remove extra '/'s and '.' path elements and collapse 301 any '..' references and returns a colllapsed path. 302 303 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 304 The utility of this function is limited to is_cgi method and helps 305 preventing some security attacks. 306 307 Returns: The reconstituted URL, which will always start with a '/'. 308 309 Raises: IndexError if too many '..' occur within the path. 310 311 """ 312 # Query component should not be involved. 313 path, _, query = path.partition('?') 314 path = urllib.unquote(path) 315 316 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 317 # path semantics rather than local operating system semantics. 318 path_parts = path.split('/') 319 head_parts = [] 320 for part in path_parts[:-1]: 321 if part == '..': 322 head_parts.pop() # IndexError if more '..' than prior parts 323 elif part and part != '.': 324 head_parts.append( part ) 325 if path_parts: 326 tail_part = path_parts.pop() 327 if tail_part: 328 if tail_part == '..': 329 head_parts.pop() 330 tail_part = '' 331 elif tail_part == '.': 332 tail_part = '' 333 else: 334 tail_part = '' 335 336 if query: 337 tail_part = '?'.join((tail_part, query)) 338 339 splitpath = ('/' + '/'.join(head_parts), tail_part) 340 collapsed_path = "/".join(splitpath) 341 342 return collapsed_path 343 344 345nobody = None 346 347def nobody_uid(): 348 """Internal routine to get nobody's uid""" 349 global nobody 350 if nobody: 351 return nobody 352 try: 353 import pwd 354 except ImportError: 355 return -1 356 try: 357 nobody = pwd.getpwnam('nobody')[2] 358 except KeyError: 359 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) 360 return nobody 361 362 363def executable(path): 364 """Test for executable file.""" 365 try: 366 st = os.stat(path) 367 except os.error: 368 return False 369 return st.st_mode & 0111 != 0 370 371 372def test(HandlerClass = CGIHTTPRequestHandler, 373 ServerClass = BaseHTTPServer.HTTPServer): 374 SimpleHTTPServer.test(HandlerClass, ServerClass) 375 376 377if __name__ == '__main__': 378 test() 379