1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import BaseHTTPServer
6import errno
7import gzip
8import mimetypes
9import os
10import SimpleHTTPServer
11import socket
12import SocketServer
13import StringIO
14import sys
15import urlparse
16from collections import namedtuple
17
18from telemetry.core import local_server
19
20ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte'])
21ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range'])
22
23
24class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
25
26  protocol_version = 'HTTP/1.1'  # override BaseHTTPServer setting
27  wbufsize = -1  # override StreamRequestHandler (a base class) setting
28
29  def handle(self):
30    try:
31      BaseHTTPServer.BaseHTTPRequestHandler.handle(self)
32    except socket.error, e:
33      # Connection reset errors happen all the time due to the browser closing
34      # without terminating the connection properly.  They can be safely
35      # ignored.
36      if e[0] != errno.ECONNRESET:
37        raise
38
39  def do_GET(self):
40    """Serve a GET request."""
41    resource_range = self.SendHead()
42
43    if not resource_range or not resource_range.resource:
44      return
45    response = resource_range.resource['response']
46
47    if not resource_range.byte_range:
48      self.wfile.write(response)
49      return
50
51    start_index = resource_range.byte_range.from_byte
52    end_index = resource_range.byte_range.to_byte
53    self.wfile.write(response[start_index:end_index + 1])
54
55  def do_HEAD(self):
56    """Serve a HEAD request."""
57    self.SendHead()
58
59  def log_error(self, fmt, *args):
60    pass
61
62  def log_request(self, code='-', size='-'):
63    # Dont spam the console unless it is important.
64    pass
65
66  def SendHead(self):
67    path = os.path.realpath(self.translate_path(self.path))
68    if path not in self.server.resource_map:
69      self.send_error(404, 'File not found')
70      return None
71
72    resource = self.server.resource_map[path]
73    total_num_of_bytes = resource['content-length']
74    byte_range = self.GetByteRange(total_num_of_bytes)
75    if byte_range:
76      # request specified a range, so set response code to 206.
77      self.send_response(206)
78      self.send_header('Content-Range',
79                       'bytes %d-%d/%d' % (byte_range.from_byte,
80                                           byte_range.to_byte,
81                                           total_num_of_bytes))
82      total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1
83    else:
84      self.send_response(200)
85
86    self.send_header('Content-Length', str(total_num_of_bytes))
87    self.send_header('Content-Type', resource['content-type'])
88    self.send_header('Last-Modified',
89                     self.date_time_string(resource['last-modified']))
90    if resource['zipped']:
91      self.send_header('Content-Encoding', 'gzip')
92    self.end_headers()
93    return ResourceAndRange(resource, byte_range)
94
95  def GetByteRange(self, total_num_of_bytes):
96    """Parse the header and get the range values specified.
97
98    Args:
99      total_num_of_bytes: Total # of bytes in requested resource,
100      used to calculate upper range limit.
101    Returns:
102      A ByteRange namedtuple object with the requested byte-range values.
103      If no Range is explicitly requested or there is a failure parsing,
104      return None.
105      If range specified is in the format "N-", return N-END. Refer to
106      http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details.
107      If upper range limit is greater than total # of bytes, return upper index.
108    """
109
110    range_header = self.headers.getheader('Range')
111    if range_header is None:
112      return None
113    if not range_header.startswith('bytes='):
114      return None
115
116    # The range header is expected to be a string in this format:
117    # bytes=0-1
118    # Get the upper and lower limits of the specified byte-range.
119    # We've already confirmed that range_header starts with 'bytes='.
120    byte_range_values = range_header[len('bytes='):].split('-')
121    from_byte = 0
122    to_byte = 0
123
124    if len(byte_range_values) == 2:
125      # If to_range is not defined return all bytes starting from from_byte.
126      to_byte = (int(byte_range_values[1]) if  byte_range_values[1]
127          else total_num_of_bytes - 1)
128      # If from_range is not defined return last 'to_byte' bytes.
129      from_byte = (int(byte_range_values[0]) if byte_range_values[0]
130          else total_num_of_bytes - to_byte)
131    else:
132      return None
133
134    # Do some validation.
135    if from_byte < 0:
136      return None
137
138    # Make to_byte the end byte by default in edge cases.
139    if to_byte < from_byte or to_byte >= total_num_of_bytes:
140      to_byte = total_num_of_bytes - 1
141
142    return ByteRange(from_byte, to_byte)
143
144
145class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn,
146                                 BaseHTTPServer.HTTPServer):
147  # Increase the request queue size. The default value, 5, is set in
148  # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer).
149  # Since we're intercepting many domains through this single server,
150  # it is quite possible to get more than 5 concurrent requests.
151  request_queue_size = 128
152
153  # Don't prevent python from exiting when there is thread activity.
154  daemon_threads = True
155
156  def __init__(self, host_port, handler, paths):
157    BaseHTTPServer.HTTPServer.__init__(self, host_port, handler)
158    self.resource_map = {}
159    for path in paths:
160      if os.path.isdir(path):
161        self.AddDirectoryToResourceMap(path)
162      else:
163        self.AddFileToResourceMap(path)
164
165  def AddDirectoryToResourceMap(self, directory_path):
166    """Loads all files in directory_path into the in-memory resource map."""
167    for root, dirs, files in os.walk(directory_path):
168      # Skip hidden files and folders (like .svn and .git).
169      files = [f for f in files if f[0] != '.']
170      dirs[:] = [d for d in dirs if d[0] != '.']
171
172      for f in files:
173        file_path = os.path.join(root, f)
174        if not os.path.exists(file_path):  # Allow for '.#' files
175          continue
176        self.AddFileToResourceMap(file_path)
177
178  def AddFileToResourceMap(self, file_path):
179    """Loads file_path into the in-memory resource map."""
180    file_path = os.path.realpath(file_path)
181    if file_path in self.resource_map:
182      return
183
184    with open(file_path, 'rb') as fd:
185      response = fd.read()
186      fs = os.fstat(fd.fileno())
187    content_type = mimetypes.guess_type(file_path)[0]
188    zipped = False
189    if content_type in ['text/html', 'text/css', 'application/javascript']:
190      zipped = True
191      sio = StringIO.StringIO()
192      gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb')
193      gzf.write(response)
194      gzf.close()
195      response = sio.getvalue()
196      sio.close()
197    self.resource_map[file_path] = {
198        'content-type': content_type,
199        'content-length': len(response),
200        'last-modified': fs.st_mtime,
201        'response': response,
202        'zipped': zipped
203        }
204
205    index = 'index.html'
206    if os.path.basename(file_path) == index:
207      dir_path = os.path.dirname(file_path)
208      self.resource_map[dir_path] = self.resource_map[file_path]
209
210
211class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend):
212  def __init__(self):
213    super(MemoryCacheHTTPServerBackend, self).__init__()
214    self._httpd = None
215
216  def StartAndGetNamedPorts(self, args):
217    base_dir = args['base_dir']
218    os.chdir(base_dir)
219
220    paths = args['paths']
221    for path in paths:
222      if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())):
223        print >> sys.stderr, '"%s" is not under the cwd.' % path
224        sys.exit(1)
225
226    server_address = (args['host'], args['port'])
227    MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1'
228    self._httpd = _MemoryCacheHTTPServerImpl(
229        server_address, MemoryCacheHTTPRequestHandler, paths)
230    return [local_server.NamedPort('http', self._httpd.server_address[1])]
231
232  def ServeForever(self):
233    return self._httpd.serve_forever()
234
235
236class MemoryCacheHTTPServer(local_server.LocalServer):
237  def __init__(self, paths):
238    super(MemoryCacheHTTPServer, self).__init__(
239        MemoryCacheHTTPServerBackend)
240    self._base_dir = None
241
242    for path in paths:
243      assert os.path.exists(path), '%s does not exist.' % path
244
245    paths = list(paths)
246    self._paths = paths
247
248    self._paths_as_set = set(map(os.path.realpath, paths))
249
250    common_prefix = os.path.commonprefix(paths)
251    if os.path.isdir(common_prefix):
252      self._base_dir = common_prefix
253    else:
254      self._base_dir = os.path.dirname(common_prefix)
255
256  def GetBackendStartupArgs(self):
257    return {'base_dir': self._base_dir,
258            'paths': self._paths,
259            'host': self.host_ip,
260            'port': 0}
261
262  @property
263  def paths(self):
264    return self._paths_as_set
265
266  @property
267  def url(self):
268    return self.forwarder.url
269
270  def UrlOf(self, path):
271    relative_path = os.path.relpath(path, self._base_dir)
272    # Preserve trailing slash or backslash.
273    # It doesn't matter in a file path, but it does matter in a URL.
274    if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)):
275      relative_path += '/'
276    return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/'))
277