server.py revision 50ad8e4d8efcd04f8a2c34cc32f6fecb3985a1a4
1#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9HTTP server for our HTML rebaseline viewer.
10"""
11
12# System-level imports
13import argparse
14import BaseHTTPServer
15import json
16import logging
17import os
18import posixpath
19import re
20import shutil
21import socket
22import subprocess
23import sys
24import thread
25import threading
26import time
27import urlparse
28
29# Imports from within Skia
30#
31# We need to add the 'tools' directory, so that we can import svn.py within
32# that directory.
33# Make sure that the 'tools' dir is in the PYTHONPATH, but add it at the *end*
34# so any dirs that are already in the PYTHONPATH will be preferred.
35PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
36TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
37TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
38if TOOLS_DIRECTORY not in sys.path:
39  sys.path.append(TOOLS_DIRECTORY)
40import svn
41
42# Imports from local dir
43import results
44
45ACTUALS_SVN_REPO = 'http://skia-autogen.googlecode.com/svn/gm-actual'
46PATHSPLIT_RE = re.compile('/([^/]+)/(.+)')
47EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm')
48GENERATED_IMAGES_ROOT = os.path.join(PARENT_DIRECTORY, 'static',
49                                     'generated-images')
50
51# A simple dictionary of file name extensions to MIME types. The empty string
52# entry is used as the default when no extension was given or if the extension
53# has no entry in this dictionary.
54MIME_TYPE_MAP = {'': 'application/octet-stream',
55                 'html': 'text/html',
56                 'css': 'text/css',
57                 'png': 'image/png',
58                 'js': 'application/javascript',
59                 'json': 'application/json'
60                 }
61
62DEFAULT_ACTUALS_DIR = '.gm-actuals'
63DEFAULT_PORT = 8888
64
65# How often (in seconds) clients should reload while waiting for initial
66# results to load.
67RELOAD_INTERVAL_UNTIL_READY = 10
68
69_HTTP_HEADER_CONTENT_LENGTH = 'Content-Length'
70_HTTP_HEADER_CONTENT_TYPE = 'Content-Type'
71
72_SERVER = None   # This gets filled in by main()
73
74
75def _run_command(args, directory):
76  """Runs a command and returns stdout as a single string.
77
78  Args:
79    args: the command to run, as a list of arguments
80    directory: directory within which to run the command
81
82  Returns: stdout, as a string
83
84  Raises an Exception if the command failed (exited with nonzero return code).
85  """
86  logging.debug('_run_command: %s in directory %s' % (args, directory))
87  proc = subprocess.Popen(args, cwd=directory,
88                          stdout=subprocess.PIPE,
89                          stderr=subprocess.PIPE)
90  (stdout, stderr) = proc.communicate()
91  if proc.returncode is not 0:
92    raise Exception('command "%s" failed in dir "%s": %s' %
93                    (args, directory, stderr))
94  return stdout
95
96
97def _get_routable_ip_address():
98  """Returns routable IP address of this host (the IP address of its network
99     interface that would be used for most traffic, not its localhost
100     interface).  See http://stackoverflow.com/a/166589 """
101  sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
102  sock.connect(('8.8.8.8', 80))
103  host = sock.getsockname()[0]
104  sock.close()
105  return host
106
107
108def _create_svn_checkout(dir_path, repo_url):
109  """Creates local checkout of an SVN repository at the specified directory
110  path, returning an svn.Svn object referring to the local checkout.
111
112  Args:
113    dir_path: path to the local checkout; if this directory does not yet exist,
114              it will be created and the repo will be checked out into it
115    repo_url: URL of SVN repo to check out into dir_path (unless the local
116              checkout already exists)
117  Returns: an svn.Svn object referring to the local checkout.
118  """
119  local_checkout = svn.Svn(dir_path)
120  if not os.path.isdir(dir_path):
121    os.makedirs(dir_path)
122    local_checkout.Checkout(repo_url, '.')
123  return local_checkout
124
125
126class Server(object):
127  """ HTTP server for our HTML rebaseline viewer. """
128
129  def __init__(self,
130               actuals_dir=DEFAULT_ACTUALS_DIR,
131               port=DEFAULT_PORT, export=False, editable=True,
132               reload_seconds=0):
133    """
134    Args:
135      actuals_dir: directory under which we will check out the latest actual
136                   GM results
137      port: which TCP port to listen on for HTTP requests
138      export: whether to allow HTTP clients on other hosts to access this server
139      editable: whether HTTP clients are allowed to submit new baselines
140      reload_seconds: polling interval with which to check for new results;
141                      if 0, don't check for new results at all
142    """
143    self._actuals_dir = actuals_dir
144    self._port = port
145    self._export = export
146    self._editable = editable
147    self._reload_seconds = reload_seconds
148    self._actuals_repo = _create_svn_checkout(
149        dir_path=actuals_dir, repo_url=ACTUALS_SVN_REPO)
150
151    # Reentrant lock that must be held whenever updating EITHER of:
152    # 1. self._results
153    # 2. the expected or actual results on local disk
154    self.results_rlock = threading.RLock()
155    # self._results will be filled in by calls to update_results()
156    self._results = None
157
158  @property
159  def results(self):
160    """ Returns the most recently generated results, or None if we don't have
161    any valid results (update_results() has not completed yet). """
162    return self._results
163
164  @property
165  def is_exported(self):
166    """ Returns true iff HTTP clients on other hosts are allowed to access
167    this server. """
168    return self._export
169
170  @property
171  def is_editable(self):
172    """ Returns true iff HTTP clients are allowed to submit new baselines. """
173    return self._editable
174
175  @property
176  def reload_seconds(self):
177    """ Returns the result reload period in seconds, or 0 if we don't reload
178    results. """
179    return self._reload_seconds
180
181  def update_results(self, invalidate=False):
182    """ Create or update self._results, based on the expectations in
183    EXPECTATIONS_DIR and the latest actuals from skia-autogen.
184
185    We hold self.results_rlock while we do this, to guarantee that no other
186    thread attempts to update either self._results or the underlying files at
187    the same time.
188
189    Args:
190      invalidate: if True, invalidate self._results immediately upon entry;
191                  otherwise, we will let readers see those results until we
192                  replace them
193    """
194    with self.results_rlock:
195      if invalidate:
196        self._results = None
197      logging.info('Updating actual GM results in %s from SVN repo %s ...' % (
198          self._actuals_dir, ACTUALS_SVN_REPO))
199      self._actuals_repo.Update('.')
200
201      # We only update the expectations dir if the server was run with a
202      # nonzero --reload argument; otherwise, we expect the user to maintain
203      # her own expectations as she sees fit.
204      #
205      # Because the Skia repo is moving from SVN to git, and git does not
206      # support updating a single directory tree, we have to update the entire
207      # repo checkout.
208      #
209      # Because Skia uses depot_tools, we have to update using "gclient sync"
210      # instead of raw git (or SVN) update.  Happily, this will work whether
211      # the checkout was created using git or SVN.
212      if self._reload_seconds:
213        logging.info(
214            'Updating expected GM results in %s by syncing Skia repo ...' %
215            EXPECTATIONS_DIR)
216        _run_command(['gclient', 'sync'], TRUNK_DIRECTORY)
217
218      logging.info(
219          ('Parsing results from actuals in %s and expectations in %s, '
220           + 'and generating pixel diffs (may take a while) ...') % (
221               self._actuals_dir, EXPECTATIONS_DIR))
222      self._results = results.Results(
223          actuals_root=self._actuals_dir,
224          expected_root=EXPECTATIONS_DIR,
225          generated_images_root=GENERATED_IMAGES_ROOT)
226
227  def _result_loader(self, reload_seconds=0):
228    """ Call self.update_results(), either once or periodically.
229
230    Params:
231      reload_seconds: integer; if nonzero, reload results at this interval
232          (in which case, this method will never return!)
233    """
234    self.update_results()
235    logging.info('Initial results loaded. Ready for requests on %s' % self._url)
236    if reload_seconds:
237      while True:
238        time.sleep(reload_seconds)
239        self.update_results()
240
241  def run(self):
242    arg_tuple = (self._reload_seconds,)  # start_new_thread needs a tuple,
243                                         # even though it holds just one param
244    thread.start_new_thread(self._result_loader, arg_tuple)
245
246    if self._export:
247      server_address = ('', self._port)
248      host = _get_routable_ip_address()
249      if self._editable:
250        logging.warning('Running with combination of "export" and "editable" '
251                        'flags.  Users on other machines will '
252                        'be able to modify your GM expectations!')
253    else:
254      host = '127.0.0.1'
255      server_address = (host, self._port)
256    http_server = BaseHTTPServer.HTTPServer(server_address, HTTPRequestHandler)
257    self._url = 'http://%s:%d' % (host, http_server.server_port)
258    logging.info('Listening for requests on %s' % self._url)
259    http_server.serve_forever()
260
261
262class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
263  """ HTTP request handlers for various types of queries this server knows
264      how to handle (static HTML and Javascript, expected/actual results, etc.)
265  """
266  def do_GET(self):
267    """ Handles all GET requests, forwarding them to the appropriate
268        do_GET_* dispatcher. """
269    if self.path == '' or self.path == '/' or self.path == '/index.html' :
270      self.redirect_to('/static/index.html')
271      return
272    if self.path == '/favicon.ico' :
273      self.redirect_to('/static/favicon.ico')
274      return
275
276    # All requests must be of this form:
277    #   /dispatcher/remainder
278    # where 'dispatcher' indicates which do_GET_* dispatcher to run
279    # and 'remainder' is the remaining path sent to that dispatcher.
280    normpath = posixpath.normpath(self.path)
281    (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
282    dispatchers = {
283      'results': self.do_GET_results,
284      'static': self.do_GET_static,
285    }
286    dispatcher = dispatchers[dispatcher_name]
287    dispatcher(remainder)
288
289  def do_GET_results(self, type):
290    """ Handle a GET request for GM results.
291
292    Args:
293      type: string indicating which set of results to return;
294            must be one of the results.RESULTS_* constants
295    """
296    logging.debug('do_GET_results: sending results of type "%s"' % type)
297    try:
298      # Since we must make multiple calls to the Results object, grab a
299      # reference to it in case it is updated to point at a new Results
300      # object within another thread.
301      #
302      # TODO(epoger): Rather than using a global variable for the handler
303      # to refer to the Server object, make Server a subclass of
304      # HTTPServer, and then it could be available to the handler via
305      # the handler's .server instance variable.
306      results_obj = _SERVER.results
307      if results_obj:
308        response_dict = self.package_results(results_obj, type)
309      else:
310        now = int(time.time())
311        response_dict = {
312            'header': {
313                'resultsStillLoading': True,
314                'timeUpdated': now,
315                'timeNextUpdateAvailable': now + RELOAD_INTERVAL_UNTIL_READY,
316            },
317        }
318      self.send_json_dict(response_dict)
319    except:
320      self.send_error(404)
321      raise
322
323  def package_results(self, results_obj, type):
324    """ Given a nonempty "results" object, package it as a response_dict
325    as needed within do_GET_results.
326
327    Args:
328      results_obj: nonempty "results" object
329      type: string indicating which set of results to return;
330            must be one of the results.RESULTS_* constants
331    """
332    response_dict = results_obj.get_results_of_type(type)
333    time_updated = results_obj.get_timestamp()
334    response_dict['header'] = {
335        # Timestamps:
336        # 1. when this data was last updated
337        # 2. when the caller should check back for new data (if ever)
338        #
339        # We only return these timestamps if the --reload argument was passed;
340        # otherwise, we have no idea when the expectations were last updated
341        # (we allow the user to maintain her own expectations as she sees fit).
342        'timeUpdated': time_updated if _SERVER.reload_seconds else None,
343        'timeNextUpdateAvailable': (
344            (time_updated+_SERVER.reload_seconds) if _SERVER.reload_seconds
345            else None),
346
347        # The type we passed to get_results_of_type()
348        'type': type,
349
350        # Hash of testData, which the client must return with any edits--
351        # this ensures that the edits were made to a particular dataset.
352        'dataHash': str(hash(repr(response_dict['testData']))),
353
354        # Whether the server will accept edits back.
355        'isEditable': _SERVER.is_editable,
356
357        # Whether the service is accessible from other hosts.
358        'isExported': _SERVER.is_exported,
359    }
360    return response_dict
361
362  def do_GET_static(self, path):
363    """ Handle a GET request for a file under the 'static' directory.
364    Only allow serving of files within the 'static' directory that is a
365    filesystem sibling of this script.
366
367    Args:
368      path: path to file (under static directory) to retrieve
369    """
370    # Strip arguments ('?resultsToLoad=all') from the path
371    path = urlparse.urlparse(path).path
372
373    logging.debug('do_GET_static: sending file "%s"' % path)
374    static_dir = os.path.realpath(os.path.join(PARENT_DIRECTORY, 'static'))
375    full_path = os.path.realpath(os.path.join(static_dir, path))
376    if full_path.startswith(static_dir):
377      self.send_file(full_path)
378    else:
379      logging.error(
380          'Attempted do_GET_static() of path [%s] outside of static dir [%s]'
381          % (full_path, static_dir))
382      self.send_error(404)
383
384  def do_POST(self):
385    """ Handles all POST requests, forwarding them to the appropriate
386        do_POST_* dispatcher. """
387    # All requests must be of this form:
388    #   /dispatcher
389    # where 'dispatcher' indicates which do_POST_* dispatcher to run.
390    normpath = posixpath.normpath(self.path)
391    dispatchers = {
392      '/edits': self.do_POST_edits,
393    }
394    try:
395      dispatcher = dispatchers[normpath]
396      dispatcher()
397      self.send_response(200)
398    except:
399      self.send_error(404)
400      raise
401
402  def do_POST_edits(self):
403    """ Handle a POST request with modifications to GM expectations, in this
404    format:
405
406    {
407      'oldResultsType': 'all',    # type of results that the client loaded
408                                  # and then made modifications to
409      'oldResultsHash': 39850913, # hash of results when the client loaded them
410                                  # (ensures that the client and server apply
411                                  # modifications to the same base)
412      'modifications': [
413        {
414          'builder': 'Test-Android-Nexus10-MaliT604-Arm7-Debug',
415          'test': 'strokerect',
416          'config': 'gpu',
417          'expectedHashType': 'bitmap-64bitMD5',
418          'expectedHashDigest': '1707359671708613629',
419        },
420        ...
421      ],
422    }
423
424    Raises an Exception if there were any problems.
425    """
426    if not _SERVER.is_editable:
427      raise Exception('this server is not running in --editable mode')
428
429    content_type = self.headers[_HTTP_HEADER_CONTENT_TYPE]
430    if content_type != 'application/json;charset=UTF-8':
431      raise Exception('unsupported %s [%s]' % (
432          _HTTP_HEADER_CONTENT_TYPE, content_type))
433
434    content_length = int(self.headers[_HTTP_HEADER_CONTENT_LENGTH])
435    json_data = self.rfile.read(content_length)
436    data = json.loads(json_data)
437    logging.debug('do_POST_edits: received new GM expectations data [%s]' %
438                  data)
439
440    # Update the results on disk with the information we received from the
441    # client.
442    # We must hold _SERVER.results_rlock while we do this, to guarantee that
443    # no other thread updates expectations (from the Skia repo) while we are
444    # updating them (using the info we received from the client).
445    with _SERVER.results_rlock:
446      oldResultsType = data['oldResultsType']
447      oldResults = _SERVER.results.get_results_of_type(oldResultsType)
448      oldResultsHash = str(hash(repr(oldResults['testData'])))
449      if oldResultsHash != data['oldResultsHash']:
450        raise Exception('results of type "%s" changed while the client was '
451                        'making modifications. The client should reload the '
452                        'results and submit the modifications again.' %
453                        oldResultsType)
454      _SERVER.results.edit_expectations(data['modifications'])
455
456    # Read the updated results back from disk.
457    # We can do this in a separate thread; we should return our success message
458    # to the UI as soon as possible.
459    thread.start_new_thread(_SERVER.update_results, (True,))
460
461  def redirect_to(self, url):
462    """ Redirect the HTTP client to a different url.
463
464    Args:
465      url: URL to redirect the HTTP client to
466    """
467    self.send_response(301)
468    self.send_header('Location', url)
469    self.end_headers()
470
471  def send_file(self, path):
472    """ Send the contents of the file at this path, with a mimetype based
473        on the filename extension.
474
475    Args:
476      path: path of file whose contents to send to the HTTP client
477    """
478    # Grab the extension if there is one
479    extension = os.path.splitext(path)[1]
480    if len(extension) >= 1:
481      extension = extension[1:]
482
483    # Determine the MIME type of the file from its extension
484    mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
485
486    # Open the file and send it over HTTP
487    if os.path.isfile(path):
488      with open(path, 'rb') as sending_file:
489        self.send_response(200)
490        self.send_header('Content-type', mime_type)
491        self.end_headers()
492        self.wfile.write(sending_file.read())
493    else:
494      self.send_error(404)
495
496  def send_json_dict(self, json_dict):
497    """ Send the contents of this dictionary in JSON format, with a JSON
498        mimetype.
499
500    Args:
501      json_dict: dictionary to send
502    """
503    self.send_response(200)
504    self.send_header('Content-type', 'application/json')
505    self.end_headers()
506    json.dump(json_dict, self.wfile)
507
508
509def main():
510  logging.basicConfig(level=logging.INFO)
511  parser = argparse.ArgumentParser()
512  parser.add_argument('--actuals-dir',
513                    help=('Directory into which we will check out the latest '
514                          'actual GM results. If this directory does not '
515                          'exist, it will be created. Defaults to %(default)s'),
516                    default=DEFAULT_ACTUALS_DIR)
517  parser.add_argument('--editable', action='store_true',
518                      help=('Allow HTTP clients to submit new baselines.'))
519  parser.add_argument('--export', action='store_true',
520                      help=('Instead of only allowing access from HTTP clients '
521                            'on localhost, allow HTTP clients on other hosts '
522                            'to access this server.  WARNING: doing so will '
523                            'allow users on other hosts to modify your '
524                            'GM expectations, if combined with --editable.'))
525  parser.add_argument('--port', type=int,
526                      help=('Which TCP port to listen on for HTTP requests; '
527                            'defaults to %(default)s'),
528                      default=DEFAULT_PORT)
529  parser.add_argument('--reload', type=int,
530                      help=('How often (a period in seconds) to update the '
531                            'results.  If specified, both expected and actual '
532                            'results will be updated by running "gclient sync" '
533                            'on your Skia checkout as a whole.  '
534                            'By default, we do not reload at all, and you '
535                            'must restart the server to pick up new data.'),
536                      default=0)
537  args = parser.parse_args()
538  global _SERVER
539  _SERVER = Server(actuals_dir=args.actuals_dir,
540                   port=args.port, export=args.export, editable=args.editable,
541                   reload_seconds=args.reload)
542  _SERVER.run()
543
544
545if __name__ == '__main__':
546  main()
547