1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import BaseHTTPServer 6import errno 7import gzip 8import mimetypes 9import os 10import SimpleHTTPServer 11import socket 12import SocketServer 13import StringIO 14import sys 15import urlparse 16from collections import namedtuple 17 18from telemetry.core import local_server 19 20ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte']) 21ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range']) 22 23 24class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 25 26 protocol_version = 'HTTP/1.1' # override BaseHTTPServer setting 27 wbufsize = -1 # override StreamRequestHandler (a base class) setting 28 29 def handle(self): 30 try: 31 BaseHTTPServer.BaseHTTPRequestHandler.handle(self) 32 except socket.error, e: 33 # Connection reset errors happen all the time due to the browser closing 34 # without terminating the connection properly. They can be safely 35 # ignored. 36 if e[0] != errno.ECONNRESET: 37 raise 38 39 def do_GET(self): 40 """Serve a GET request.""" 41 resource_range = self.SendHead() 42 43 if not resource_range or not resource_range.resource: 44 return 45 response = resource_range.resource['response'] 46 47 if not resource_range.byte_range: 48 self.wfile.write(response) 49 return 50 51 start_index = resource_range.byte_range.from_byte 52 end_index = resource_range.byte_range.to_byte 53 self.wfile.write(response[start_index:end_index + 1]) 54 55 def do_HEAD(self): 56 """Serve a HEAD request.""" 57 self.SendHead() 58 59 def log_error(self, fmt, *args): 60 pass 61 62 def log_request(self, code='-', size='-'): 63 # Dont spam the console unless it is important. 64 pass 65 66 def SendHead(self): 67 path = os.path.realpath(self.translate_path(self.path)) 68 if path not in self.server.resource_map: 69 self.send_error(404, 'File not found') 70 return None 71 72 resource = self.server.resource_map[path] 73 total_num_of_bytes = resource['content-length'] 74 byte_range = self.GetByteRange(total_num_of_bytes) 75 if byte_range: 76 # request specified a range, so set response code to 206. 77 self.send_response(206) 78 self.send_header('Content-Range', 79 'bytes %d-%d/%d' % (byte_range.from_byte, 80 byte_range.to_byte, 81 total_num_of_bytes)) 82 total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1 83 else: 84 self.send_response(200) 85 86 self.send_header('Content-Length', str(total_num_of_bytes)) 87 self.send_header('Content-Type', resource['content-type']) 88 self.send_header('Last-Modified', 89 self.date_time_string(resource['last-modified'])) 90 if resource['zipped']: 91 self.send_header('Content-Encoding', 'gzip') 92 self.end_headers() 93 return ResourceAndRange(resource, byte_range) 94 95 def GetByteRange(self, total_num_of_bytes): 96 """Parse the header and get the range values specified. 97 98 Args: 99 total_num_of_bytes: Total # of bytes in requested resource, 100 used to calculate upper range limit. 101 Returns: 102 A ByteRange namedtuple object with the requested byte-range values. 103 If no Range is explicitly requested or there is a failure parsing, 104 return None. 105 If range specified is in the format "N-", return N-END. Refer to 106 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details. 107 If upper range limit is greater than total # of bytes, return upper index. 108 """ 109 110 range_header = self.headers.getheader('Range') 111 if range_header is None: 112 return None 113 if not range_header.startswith('bytes='): 114 return None 115 116 # The range header is expected to be a string in this format: 117 # bytes=0-1 118 # Get the upper and lower limits of the specified byte-range. 119 # We've already confirmed that range_header starts with 'bytes='. 120 byte_range_values = range_header[len('bytes='):].split('-') 121 from_byte = 0 122 to_byte = 0 123 124 if len(byte_range_values) == 2: 125 # If to_range is not defined return all bytes starting from from_byte. 126 to_byte = (int(byte_range_values[1]) if byte_range_values[1] 127 else total_num_of_bytes - 1) 128 # If from_range is not defined return last 'to_byte' bytes. 129 from_byte = (int(byte_range_values[0]) if byte_range_values[0] 130 else total_num_of_bytes - to_byte) 131 else: 132 return None 133 134 # Do some validation. 135 if from_byte < 0: 136 return None 137 138 # Make to_byte the end byte by default in edge cases. 139 if to_byte < from_byte or to_byte >= total_num_of_bytes: 140 to_byte = total_num_of_bytes - 1 141 142 return ByteRange(from_byte, to_byte) 143 144 145class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn, 146 BaseHTTPServer.HTTPServer): 147 # Increase the request queue size. The default value, 5, is set in 148 # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). 149 # Since we're intercepting many domains through this single server, 150 # it is quite possible to get more than 5 concurrent requests. 151 request_queue_size = 128 152 153 # Don't prevent python from exiting when there is thread activity. 154 daemon_threads = True 155 156 def __init__(self, host_port, handler, paths): 157 BaseHTTPServer.HTTPServer.__init__(self, host_port, handler) 158 self.resource_map = {} 159 for path in paths: 160 if os.path.isdir(path): 161 self.AddDirectoryToResourceMap(path) 162 else: 163 self.AddFileToResourceMap(path) 164 165 def AddDirectoryToResourceMap(self, directory_path): 166 """Loads all files in directory_path into the in-memory resource map.""" 167 for root, dirs, files in os.walk(directory_path): 168 # Skip hidden files and folders (like .svn and .git). 169 files = [f for f in files if f[0] != '.'] 170 dirs[:] = [d for d in dirs if d[0] != '.'] 171 172 for f in files: 173 file_path = os.path.join(root, f) 174 if not os.path.exists(file_path): # Allow for '.#' files 175 continue 176 self.AddFileToResourceMap(file_path) 177 178 def AddFileToResourceMap(self, file_path): 179 """Loads file_path into the in-memory resource map.""" 180 file_path = os.path.realpath(file_path) 181 if file_path in self.resource_map: 182 return 183 184 with open(file_path, 'rb') as fd: 185 response = fd.read() 186 fs = os.fstat(fd.fileno()) 187 content_type = mimetypes.guess_type(file_path)[0] 188 zipped = False 189 if content_type in ['text/html', 'text/css', 'application/javascript']: 190 zipped = True 191 sio = StringIO.StringIO() 192 gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb') 193 gzf.write(response) 194 gzf.close() 195 response = sio.getvalue() 196 sio.close() 197 self.resource_map[file_path] = { 198 'content-type': content_type, 199 'content-length': len(response), 200 'last-modified': fs.st_mtime, 201 'response': response, 202 'zipped': zipped 203 } 204 205 index = 'index.html' 206 if os.path.basename(file_path) == index: 207 dir_path = os.path.dirname(file_path) 208 self.resource_map[dir_path] = self.resource_map[file_path] 209 210 211class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend): 212 def __init__(self): 213 super(MemoryCacheHTTPServerBackend, self).__init__() 214 self._httpd = None 215 216 def StartAndGetNamedPorts(self, args): 217 base_dir = args['base_dir'] 218 os.chdir(base_dir) 219 220 paths = args['paths'] 221 for path in paths: 222 if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())): 223 print >> sys.stderr, '"%s" is not under the cwd.' % path 224 sys.exit(1) 225 226 server_address = (args['host'], args['port']) 227 MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1' 228 self._httpd = _MemoryCacheHTTPServerImpl( 229 server_address, MemoryCacheHTTPRequestHandler, paths) 230 return [local_server.NamedPort('http', self._httpd.server_address[1])] 231 232 def ServeForever(self): 233 return self._httpd.serve_forever() 234 235 236class MemoryCacheHTTPServer(local_server.LocalServer): 237 def __init__(self, paths): 238 super(MemoryCacheHTTPServer, self).__init__( 239 MemoryCacheHTTPServerBackend) 240 self._base_dir = None 241 242 for path in paths: 243 assert os.path.exists(path), '%s does not exist.' % path 244 245 paths = list(paths) 246 self._paths = paths 247 248 self._paths_as_set = set(map(os.path.realpath, paths)) 249 250 common_prefix = os.path.commonprefix(paths) 251 if os.path.isdir(common_prefix): 252 self._base_dir = common_prefix 253 else: 254 self._base_dir = os.path.dirname(common_prefix) 255 256 def GetBackendStartupArgs(self): 257 return {'base_dir': self._base_dir, 258 'paths': self._paths, 259 'host': self.host_ip, 260 'port': 0} 261 262 @property 263 def paths(self): 264 return self._paths_as_set 265 266 @property 267 def url(self): 268 return self.forwarder.url 269 270 def UrlOf(self, path): 271 relative_path = os.path.relpath(path, self._base_dir) 272 # Preserve trailing slash or backslash. 273 # It doesn't matter in a file path, but it does matter in a URL. 274 if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)): 275 relative_path += '/' 276 return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/')) 277