server.py revision 2682c90860655f6c25c61f97c8d8db309d03087a
1#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9HTTP server for our HTML rebaseline viewer.
10"""
11
12# System-level imports
13import argparse
14import BaseHTTPServer
15import json
16import logging
17import os
18import posixpath
19import re
20import shutil
21import socket
22import subprocess
23import sys
24import thread
25import threading
26import time
27import urlparse
28
29# Imports from within Skia
30#
31# We need to add the 'tools' directory, so that we can import svn.py within
32# that directory.
33# Make sure that the 'tools' dir is in the PYTHONPATH, but add it at the *end*
34# so any dirs that are already in the PYTHONPATH will be preferred.
35PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
36TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
37TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
38if TOOLS_DIRECTORY not in sys.path:
39  sys.path.append(TOOLS_DIRECTORY)
40import svn
41
42# Imports from local dir
43import results
44
45ACTUALS_SVN_REPO = 'http://skia-autogen.googlecode.com/svn/gm-actual'
46PATHSPLIT_RE = re.compile('/([^/]+)/(.+)')
47EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm')
48GENERATED_IMAGES_ROOT = os.path.join(PARENT_DIRECTORY, 'static',
49                                     'generated-images')
50
51# A simple dictionary of file name extensions to MIME types. The empty string
52# entry is used as the default when no extension was given or if the extension
53# has no entry in this dictionary.
54MIME_TYPE_MAP = {'': 'application/octet-stream',
55                 'html': 'text/html',
56                 'css': 'text/css',
57                 'png': 'image/png',
58                 'js': 'application/javascript',
59                 'json': 'application/json'
60                 }
61
62DEFAULT_ACTUALS_DIR = '.gm-actuals'
63DEFAULT_PORT = 8888
64
65# How often (in seconds) clients should reload while waiting for initial
66# results to load.
67RELOAD_INTERVAL_UNTIL_READY = 10
68
69_HTTP_HEADER_CONTENT_LENGTH = 'Content-Length'
70_HTTP_HEADER_CONTENT_TYPE = 'Content-Type'
71
72_SERVER = None   # This gets filled in by main()
73
74
75def _run_command(args, directory):
76  """Runs a command and returns stdout as a single string.
77
78  Args:
79    args: the command to run, as a list of arguments
80    directory: directory within which to run the command
81
82  Returns: stdout, as a string
83
84  Raises an Exception if the command failed (exited with nonzero return code).
85  """
86  logging.debug('_run_command: %s in directory %s' % (args, directory))
87  proc = subprocess.Popen(args, cwd=directory,
88                          stdout=subprocess.PIPE,
89                          stderr=subprocess.PIPE)
90  (stdout, stderr) = proc.communicate()
91  if proc.returncode is not 0:
92    raise Exception('command "%s" failed in dir "%s": %s' %
93                    (args, directory, stderr))
94  return stdout
95
96
97def _get_routable_ip_address():
98  """Returns routable IP address of this host (the IP address of its network
99     interface that would be used for most traffic, not its localhost
100     interface).  See http://stackoverflow.com/a/166589 """
101  sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
102  sock.connect(('8.8.8.8', 80))
103  host = sock.getsockname()[0]
104  sock.close()
105  return host
106
107
108def _create_svn_checkout(dir_path, repo_url):
109  """Creates local checkout of an SVN repository at the specified directory
110  path, returning an svn.Svn object referring to the local checkout.
111
112  Args:
113    dir_path: path to the local checkout; if this directory does not yet exist,
114              it will be created and the repo will be checked out into it
115    repo_url: URL of SVN repo to check out into dir_path (unless the local
116              checkout already exists)
117  Returns: an svn.Svn object referring to the local checkout.
118  """
119  local_checkout = svn.Svn(dir_path)
120  if not os.path.isdir(dir_path):
121    os.makedirs(dir_path)
122    local_checkout.Checkout(repo_url, '.')
123  return local_checkout
124
125
126class Server(object):
127  """ HTTP server for our HTML rebaseline viewer. """
128
129  def __init__(self,
130               actuals_dir=DEFAULT_ACTUALS_DIR,
131               port=DEFAULT_PORT, export=False, editable=True,
132               reload_seconds=0):
133    """
134    Args:
135      actuals_dir: directory under which we will check out the latest actual
136                   GM results
137      port: which TCP port to listen on for HTTP requests
138      export: whether to allow HTTP clients on other hosts to access this server
139      editable: whether HTTP clients are allowed to submit new baselines
140      reload_seconds: polling interval with which to check for new results;
141                      if 0, don't check for new results at all
142    """
143    self._actuals_dir = actuals_dir
144    self._port = port
145    self._export = export
146    self._editable = editable
147    self._reload_seconds = reload_seconds
148    self._actuals_repo = _create_svn_checkout(
149        dir_path=actuals_dir, repo_url=ACTUALS_SVN_REPO)
150
151    # Reentrant lock that must be held whenever updating EITHER of:
152    # 1. self._results
153    # 2. the expected or actual results on local disk
154    self.results_rlock = threading.RLock()
155    # self._results will be filled in by calls to update_results()
156    self._results = None
157
158  @property
159  def results(self):
160    """ Returns the most recently generated results, or None if update_results()
161    has not been called yet. """
162    return self._results
163
164  @property
165  def is_exported(self):
166    """ Returns true iff HTTP clients on other hosts are allowed to access
167    this server. """
168    return self._export
169
170  @property
171  def is_editable(self):
172    """ Returns true iff HTTP clients are allowed to submit new baselines. """
173    return self._editable
174
175  @property
176  def reload_seconds(self):
177    """ Returns the result reload period in seconds, or 0 if we don't reload
178    results. """
179    return self._reload_seconds
180
181  def update_results(self):
182    """ Create or update self._results, based on the expectations in
183    EXPECTATIONS_DIR and the latest actuals from skia-autogen.
184
185    We hold self.results_rlock while we do this, to guarantee that no other
186    thread attempts to update either self._results or the underlying files at
187    the same time.
188    """
189    with self.results_rlock:
190      logging.info('Updating actual GM results in %s from SVN repo %s ...' % (
191          self._actuals_dir, ACTUALS_SVN_REPO))
192      self._actuals_repo.Update('.')
193
194      # We only update the expectations dir if the server was run with a
195      # nonzero --reload argument; otherwise, we expect the user to maintain
196      # her own expectations as she sees fit.
197      #
198      # Because the Skia repo is moving from SVN to git, and git does not
199      # support updating a single directory tree, we have to update the entire
200      # repo checkout.
201      #
202      # Because Skia uses depot_tools, we have to update using "gclient sync"
203      # instead of raw git (or SVN) update.  Happily, this will work whether
204      # the checkout was created using git or SVN.
205      if self._reload_seconds:
206        logging.info(
207            'Updating expected GM results in %s by syncing Skia repo ...' %
208            EXPECTATIONS_DIR)
209        _run_command(['gclient', 'sync'], TRUNK_DIRECTORY)
210
211      logging.info(
212          ('Parsing results from actuals in %s and expectations in %s, '
213           + 'and generating pixel diffs (may take a while) ...') % (
214               self._actuals_dir, EXPECTATIONS_DIR))
215      self._results = results.Results(
216          actuals_root=self._actuals_dir,
217          expected_root=EXPECTATIONS_DIR,
218          generated_images_root=GENERATED_IMAGES_ROOT)
219
220  def _result_loader(self, reload_seconds=0):
221    """ Call self.update_results(), either once or periodically.
222
223    Params:
224      reload_seconds: integer; if nonzero, reload results at this interval
225          (in which case, this method will never return!)
226    """
227    self.update_results()
228    logging.info('Initial results loaded. Ready for requests on %s' % self._url)
229    if reload_seconds:
230      while True:
231        time.sleep(reload_seconds)
232        self.update_results()
233
234  def run(self):
235    arg_tuple = (self._reload_seconds,)  # start_new_thread needs a tuple,
236                                         # even though it holds just one param
237    thread.start_new_thread(self._result_loader, arg_tuple)
238
239    if self._export:
240      server_address = ('', self._port)
241      host = _get_routable_ip_address()
242      if self._editable:
243        logging.warning('Running with combination of "export" and "editable" '
244                        'flags.  Users on other machines will '
245                        'be able to modify your GM expectations!')
246    else:
247      host = '127.0.0.1'
248      server_address = (host, self._port)
249    http_server = BaseHTTPServer.HTTPServer(server_address, HTTPRequestHandler)
250    self._url = 'http://%s:%d' % (host, http_server.server_port)
251    logging.info('Listening for requests on %s' % self._url)
252    http_server.serve_forever()
253
254
255class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
256  """ HTTP request handlers for various types of queries this server knows
257      how to handle (static HTML and Javascript, expected/actual results, etc.)
258  """
259  def do_GET(self):
260    """ Handles all GET requests, forwarding them to the appropriate
261        do_GET_* dispatcher. """
262    if self.path == '' or self.path == '/' or self.path == '/index.html' :
263      self.redirect_to('/static/index.html')
264      return
265    if self.path == '/favicon.ico' :
266      self.redirect_to('/static/favicon.ico')
267      return
268
269    # All requests must be of this form:
270    #   /dispatcher/remainder
271    # where 'dispatcher' indicates which do_GET_* dispatcher to run
272    # and 'remainder' is the remaining path sent to that dispatcher.
273    normpath = posixpath.normpath(self.path)
274    (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
275    dispatchers = {
276      'results': self.do_GET_results,
277      'static': self.do_GET_static,
278    }
279    dispatcher = dispatchers[dispatcher_name]
280    dispatcher(remainder)
281
282  def do_GET_results(self, type):
283    """ Handle a GET request for GM results.
284
285    Args:
286      type: string indicating which set of results to return;
287            must be one of the results.RESULTS_* constants
288    """
289    logging.debug('do_GET_results: sending results of type "%s"' % type)
290    try:
291      # Since we must make multiple calls to the Results object, grab a
292      # reference to it in case it is updated to point at a new Results
293      # object within another thread.
294      #
295      # TODO(epoger): Rather than using a global variable for the handler
296      # to refer to the Server object, make Server a subclass of
297      # HTTPServer, and then it could be available to the handler via
298      # the handler's .server instance variable.
299      results_obj = _SERVER.results
300      if results_obj:
301        response_dict = self.package_results(results_obj, type)
302      else:
303        now = int(time.time())
304        response_dict = {
305            'header': {
306                'resultsStillLoading': True,
307                'timeUpdated': now,
308                'timeNextUpdateAvailable': now + RELOAD_INTERVAL_UNTIL_READY,
309            },
310        }
311      self.send_json_dict(response_dict)
312    except:
313      self.send_error(404)
314      raise
315
316  def package_results(self, results_obj, type):
317    """ Given a nonempty "results" object, package it as a response_dict
318    as needed within do_GET_results.
319
320    Args:
321      results_obj: nonempty "results" object
322      type: string indicating which set of results to return;
323            must be one of the results.RESULTS_* constants
324    """
325    response_dict = results_obj.get_results_of_type(type)
326    time_updated = results_obj.get_timestamp()
327    response_dict['header'] = {
328        # Timestamps:
329        # 1. when this data was last updated
330        # 2. when the caller should check back for new data (if ever)
331        #
332        # We only return these timestamps if the --reload argument was passed;
333        # otherwise, we have no idea when the expectations were last updated
334        # (we allow the user to maintain her own expectations as she sees fit).
335        'timeUpdated': time_updated if _SERVER.reload_seconds else None,
336        'timeNextUpdateAvailable': (
337            (time_updated+_SERVER.reload_seconds) if _SERVER.reload_seconds
338            else None),
339
340        # The type we passed to get_results_of_type()
341        'type': type,
342
343        # Hash of testData, which the client must return with any edits--
344        # this ensures that the edits were made to a particular dataset.
345        'dataHash': str(hash(repr(response_dict['testData']))),
346
347        # Whether the server will accept edits back.
348        'isEditable': _SERVER.is_editable,
349
350        # Whether the service is accessible from other hosts.
351        'isExported': _SERVER.is_exported,
352    }
353    return response_dict
354
355  def do_GET_static(self, path):
356    """ Handle a GET request for a file under the 'static' directory.
357    Only allow serving of files within the 'static' directory that is a
358    filesystem sibling of this script.
359
360    Args:
361      path: path to file (under static directory) to retrieve
362    """
363    # Strip arguments ('?resultsToLoad=all') from the path
364    path = urlparse.urlparse(path).path
365
366    logging.debug('do_GET_static: sending file "%s"' % path)
367    static_dir = os.path.realpath(os.path.join(PARENT_DIRECTORY, 'static'))
368    full_path = os.path.realpath(os.path.join(static_dir, path))
369    if full_path.startswith(static_dir):
370      self.send_file(full_path)
371    else:
372      logging.error(
373          'Attempted do_GET_static() of path [%s] outside of static dir [%s]'
374          % (full_path, static_dir))
375      self.send_error(404)
376
377  def do_POST(self):
378    """ Handles all POST requests, forwarding them to the appropriate
379        do_POST_* dispatcher. """
380    # All requests must be of this form:
381    #   /dispatcher
382    # where 'dispatcher' indicates which do_POST_* dispatcher to run.
383    normpath = posixpath.normpath(self.path)
384    dispatchers = {
385      '/edits': self.do_POST_edits,
386    }
387    try:
388      dispatcher = dispatchers[normpath]
389      dispatcher()
390      self.send_response(200)
391    except:
392      self.send_error(404)
393      raise
394
395  def do_POST_edits(self):
396    """ Handle a POST request with modifications to GM expectations, in this
397    format:
398
399    {
400      'oldResultsType': 'all',    # type of results that the client loaded
401                                  # and then made modifications to
402      'oldResultsHash': 39850913, # hash of results when the client loaded them
403                                  # (ensures that the client and server apply
404                                  # modifications to the same base)
405      'modifications': [
406        {
407          'builder': 'Test-Android-Nexus10-MaliT604-Arm7-Debug',
408          'test': 'strokerect',
409          'config': 'gpu',
410          'expectedHashType': 'bitmap-64bitMD5',
411          'expectedHashDigest': '1707359671708613629',
412        },
413        ...
414      ],
415    }
416
417    Raises an Exception if there were any problems.
418    """
419    if not _SERVER.is_editable:
420      raise Exception('this server is not running in --editable mode')
421
422    content_type = self.headers[_HTTP_HEADER_CONTENT_TYPE]
423    if content_type != 'application/json;charset=UTF-8':
424      raise Exception('unsupported %s [%s]' % (
425          _HTTP_HEADER_CONTENT_TYPE, content_type))
426
427    content_length = int(self.headers[_HTTP_HEADER_CONTENT_LENGTH])
428    json_data = self.rfile.read(content_length)
429    data = json.loads(json_data)
430    logging.debug('do_POST_edits: received new GM expectations data [%s]' %
431                  data)
432
433    # Update the results on disk with the information we received from the
434    # client.
435    # We must hold _SERVER.results_rlock while we do this, to guarantee that
436    # no other thread updates expectations (from the Skia repo) while we are
437    # updating them (using the info we received from the client).
438    with _SERVER.results_rlock:
439      oldResultsType = data['oldResultsType']
440      oldResults = _SERVER.results.get_results_of_type(oldResultsType)
441      oldResultsHash = str(hash(repr(oldResults['testData'])))
442      if oldResultsHash != data['oldResultsHash']:
443        raise Exception('results of type "%s" changed while the client was '
444                        'making modifications. The client should reload the '
445                        'results and submit the modifications again.' %
446                        oldResultsType)
447      _SERVER.results.edit_expectations(data['modifications'])
448      # Read the updated results back from disk.
449      _SERVER.update_results()
450
451  def redirect_to(self, url):
452    """ Redirect the HTTP client to a different url.
453
454    Args:
455      url: URL to redirect the HTTP client to
456    """
457    self.send_response(301)
458    self.send_header('Location', url)
459    self.end_headers()
460
461  def send_file(self, path):
462    """ Send the contents of the file at this path, with a mimetype based
463        on the filename extension.
464
465    Args:
466      path: path of file whose contents to send to the HTTP client
467    """
468    # Grab the extension if there is one
469    extension = os.path.splitext(path)[1]
470    if len(extension) >= 1:
471      extension = extension[1:]
472
473    # Determine the MIME type of the file from its extension
474    mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
475
476    # Open the file and send it over HTTP
477    if os.path.isfile(path):
478      with open(path, 'rb') as sending_file:
479        self.send_response(200)
480        self.send_header('Content-type', mime_type)
481        self.end_headers()
482        self.wfile.write(sending_file.read())
483    else:
484      self.send_error(404)
485
486  def send_json_dict(self, json_dict):
487    """ Send the contents of this dictionary in JSON format, with a JSON
488        mimetype.
489
490    Args:
491      json_dict: dictionary to send
492    """
493    self.send_response(200)
494    self.send_header('Content-type', 'application/json')
495    self.end_headers()
496    json.dump(json_dict, self.wfile)
497
498
499def main():
500  logging.basicConfig(level=logging.INFO)
501  parser = argparse.ArgumentParser()
502  parser.add_argument('--actuals-dir',
503                    help=('Directory into which we will check out the latest '
504                          'actual GM results. If this directory does not '
505                          'exist, it will be created. Defaults to %(default)s'),
506                    default=DEFAULT_ACTUALS_DIR)
507  parser.add_argument('--editable', action='store_true',
508                      help=('Allow HTTP clients to submit new baselines.'))
509  parser.add_argument('--export', action='store_true',
510                      help=('Instead of only allowing access from HTTP clients '
511                            'on localhost, allow HTTP clients on other hosts '
512                            'to access this server.  WARNING: doing so will '
513                            'allow users on other hosts to modify your '
514                            'GM expectations, if combined with --editable.'))
515  parser.add_argument('--port', type=int,
516                      help=('Which TCP port to listen on for HTTP requests; '
517                            'defaults to %(default)s'),
518                      default=DEFAULT_PORT)
519  parser.add_argument('--reload', type=int,
520                      help=('How often (a period in seconds) to update the '
521                            'results.  If specified, both expected and actual '
522                            'results will be updated by running "gclient sync" '
523                            'on your Skia checkout as a whole.  '
524                            'By default, we do not reload at all, and you '
525                            'must restart the server to pick up new data.'),
526                      default=0)
527  args = parser.parse_args()
528  global _SERVER
529  _SERVER = Server(actuals_dir=args.actuals_dir,
530                   port=args.port, export=args.export, editable=args.editable,
531                   reload_seconds=args.reload)
532  _SERVER.run()
533
534
535if __name__ == '__main__':
536  main()
537