server.py revision d1c85d29204ad94950b23014c03e781409b9b682
1#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9HTTP server for our HTML rebaseline viewer.
10"""
11
12# System-level imports
13import argparse
14import BaseHTTPServer
15import json
16import logging
17import os
18import posixpath
19import re
20import shutil
21import socket
22import subprocess
23import sys
24import thread
25import threading
26import time
27import urlparse
28
29# Imports from within Skia
30#
31# We need to add the 'tools' directory, so that we can import svn.py within
32# that directory.
33# Make sure that the 'tools' dir is in the PYTHONPATH, but add it at the *end*
34# so any dirs that are already in the PYTHONPATH will be preferred.
35PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
36TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
37TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
38if TOOLS_DIRECTORY not in sys.path:
39  sys.path.append(TOOLS_DIRECTORY)
40import svn
41
42# Imports from local dir
43#
44# Note: we import results under a different name, to avoid confusion with the
45# Server.results() property. See discussion at
46# https://codereview.chromium.org/195943004/diff/1/gm/rebaseline_server/server.py#newcode44
47import imagepairset
48import results as results_mod
49
50PATHSPLIT_RE = re.compile('/([^/]+)/(.+)')
51
52# A simple dictionary of file name extensions to MIME types. The empty string
53# entry is used as the default when no extension was given or if the extension
54# has no entry in this dictionary.
55MIME_TYPE_MAP = {'': 'application/octet-stream',
56                 'html': 'text/html',
57                 'css': 'text/css',
58                 'png': 'image/png',
59                 'js': 'application/javascript',
60                 'json': 'application/json'
61                 }
62
63# Keys that server.py uses to create the toplevel content header.
64# NOTE: Keep these in sync with static/constants.js
65KEY__EDITS__MODIFICATIONS = 'modifications'
66KEY__EDITS__OLD_RESULTS_HASH = 'oldResultsHash'
67KEY__EDITS__OLD_RESULTS_TYPE = 'oldResultsType'
68
69DEFAULT_ACTUALS_DIR = results_mod.DEFAULT_ACTUALS_DIR
70DEFAULT_ACTUALS_REPO_REVISION = 'HEAD'
71DEFAULT_ACTUALS_REPO_URL = 'http://skia-autogen.googlecode.com/svn/gm-actual'
72DEFAULT_PORT = 8888
73
74# How often (in seconds) clients should reload while waiting for initial
75# results to load.
76RELOAD_INTERVAL_UNTIL_READY = 10
77
78_HTTP_HEADER_CONTENT_LENGTH = 'Content-Length'
79_HTTP_HEADER_CONTENT_TYPE = 'Content-Type'
80
81_SERVER = None   # This gets filled in by main()
82
83
84def _run_command(args, directory):
85  """Runs a command and returns stdout as a single string.
86
87  Args:
88    args: the command to run, as a list of arguments
89    directory: directory within which to run the command
90
91  Returns: stdout, as a string
92
93  Raises an Exception if the command failed (exited with nonzero return code).
94  """
95  logging.debug('_run_command: %s in directory %s' % (args, directory))
96  proc = subprocess.Popen(args, cwd=directory,
97                          stdout=subprocess.PIPE,
98                          stderr=subprocess.PIPE)
99  (stdout, stderr) = proc.communicate()
100  if proc.returncode is not 0:
101    raise Exception('command "%s" failed in dir "%s": %s' %
102                    (args, directory, stderr))
103  return stdout
104
105
106def _get_routable_ip_address():
107  """Returns routable IP address of this host (the IP address of its network
108     interface that would be used for most traffic, not its localhost
109     interface).  See http://stackoverflow.com/a/166589 """
110  sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
111  sock.connect(('8.8.8.8', 80))
112  host = sock.getsockname()[0]
113  sock.close()
114  return host
115
116
117def _create_svn_checkout(dir_path, repo_url):
118  """Creates local checkout of an SVN repository at the specified directory
119  path, returning an svn.Svn object referring to the local checkout.
120
121  Args:
122    dir_path: path to the local checkout; if this directory does not yet exist,
123              it will be created and the repo will be checked out into it
124    repo_url: URL of SVN repo to check out into dir_path (unless the local
125              checkout already exists)
126  Returns: an svn.Svn object referring to the local checkout.
127  """
128  local_checkout = svn.Svn(dir_path)
129  if not os.path.isdir(dir_path):
130    os.makedirs(dir_path)
131    local_checkout.Checkout(repo_url, '.')
132  return local_checkout
133
134
135class Server(object):
136  """ HTTP server for our HTML rebaseline viewer. """
137
138  def __init__(self,
139               actuals_dir=DEFAULT_ACTUALS_DIR,
140               actuals_repo_revision=DEFAULT_ACTUALS_REPO_REVISION,
141               actuals_repo_url=DEFAULT_ACTUALS_REPO_URL,
142               port=DEFAULT_PORT, export=False, editable=True,
143               reload_seconds=0):
144    """
145    Args:
146      actuals_dir: directory under which we will check out the latest actual
147                   GM results
148      actuals_repo_revision: revision of actual-results.json files to process
149      actuals_repo_url: SVN repo to download actual-results.json files from
150      port: which TCP port to listen on for HTTP requests
151      export: whether to allow HTTP clients on other hosts to access this server
152      editable: whether HTTP clients are allowed to submit new baselines
153      reload_seconds: polling interval with which to check for new results;
154                      if 0, don't check for new results at all
155    """
156    self._actuals_dir = actuals_dir
157    self._actuals_repo_revision = actuals_repo_revision
158    self._actuals_repo_url = actuals_repo_url
159    self._port = port
160    self._export = export
161    self._editable = editable
162    self._reload_seconds = reload_seconds
163    self._actuals_repo = _create_svn_checkout(
164        dir_path=actuals_dir, repo_url=actuals_repo_url)
165
166    # Reentrant lock that must be held whenever updating EITHER of:
167    # 1. self._results
168    # 2. the expected or actual results on local disk
169    self.results_rlock = threading.RLock()
170    # self._results will be filled in by calls to update_results()
171    self._results = None
172
173  @property
174  def results(self):
175    """ Returns the most recently generated results, or None if we don't have
176    any valid results (update_results() has not completed yet). """
177    return self._results
178
179  @property
180  def is_exported(self):
181    """ Returns true iff HTTP clients on other hosts are allowed to access
182    this server. """
183    return self._export
184
185  @property
186  def is_editable(self):
187    """ Returns true iff HTTP clients are allowed to submit new baselines. """
188    return self._editable
189
190  @property
191  def reload_seconds(self):
192    """ Returns the result reload period in seconds, or 0 if we don't reload
193    results. """
194    return self._reload_seconds
195
196  def update_results(self, invalidate=False):
197    """ Create or update self._results, based on the latest expectations and
198    actuals.
199
200    We hold self.results_rlock while we do this, to guarantee that no other
201    thread attempts to update either self._results or the underlying files at
202    the same time.
203
204    Args:
205      invalidate: if True, invalidate self._results immediately upon entry;
206                  otherwise, we will let readers see those results until we
207                  replace them
208    """
209    with self.results_rlock:
210      if invalidate:
211        self._results = None
212      logging.info(
213          'Updating actual GM results in %s to revision %s from repo %s ...' % (
214              self._actuals_dir, self._actuals_repo_revision,
215              self._actuals_repo_url))
216      self._actuals_repo.Update(path='.', revision=self._actuals_repo_revision)
217
218      # We only update the expectations dir if the server was run with a
219      # nonzero --reload argument; otherwise, we expect the user to maintain
220      # her own expectations as she sees fit.
221      #
222      # Because the Skia repo is moving from SVN to git, and git does not
223      # support updating a single directory tree, we have to update the entire
224      # repo checkout.
225      #
226      # Because Skia uses depot_tools, we have to update using "gclient sync"
227      # instead of raw git (or SVN) update.  Happily, this will work whether
228      # the checkout was created using git or SVN.
229      if self._reload_seconds:
230        logging.info(
231            'Updating expected GM results in %s by syncing Skia repo ...' %
232            results_mod.DEFAULT_EXPECTATIONS_DIR)
233        _run_command(['gclient', 'sync'], TRUNK_DIRECTORY)
234
235      self._results = results_mod.Results(actuals_root=self._actuals_dir)
236
237  def _result_loader(self, reload_seconds=0):
238    """ Call self.update_results(), either once or periodically.
239
240    Params:
241      reload_seconds: integer; if nonzero, reload results at this interval
242          (in which case, this method will never return!)
243    """
244    self.update_results()
245    logging.info('Initial results loaded. Ready for requests on %s' % self._url)
246    if reload_seconds:
247      while True:
248        time.sleep(reload_seconds)
249        self.update_results()
250
251  def run(self):
252    arg_tuple = (self._reload_seconds,)  # start_new_thread needs a tuple,
253                                         # even though it holds just one param
254    thread.start_new_thread(self._result_loader, arg_tuple)
255
256    if self._export:
257      server_address = ('', self._port)
258      host = _get_routable_ip_address()
259      if self._editable:
260        logging.warning('Running with combination of "export" and "editable" '
261                        'flags.  Users on other machines will '
262                        'be able to modify your GM expectations!')
263    else:
264      host = '127.0.0.1'
265      server_address = (host, self._port)
266    http_server = BaseHTTPServer.HTTPServer(server_address, HTTPRequestHandler)
267    self._url = 'http://%s:%d' % (host, http_server.server_port)
268    logging.info('Listening for requests on %s' % self._url)
269    http_server.serve_forever()
270
271
272class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
273  """ HTTP request handlers for various types of queries this server knows
274      how to handle (static HTML and Javascript, expected/actual results, etc.)
275  """
276  def do_GET(self):
277    """
278    Handles all GET requests, forwarding them to the appropriate
279    do_GET_* dispatcher.
280
281    If we see any Exceptions, return a 404.  This fixes http://skbug.com/2147
282    """
283    try:
284      logging.debug('do_GET: path="%s"' % self.path)
285      if self.path == '' or self.path == '/' or self.path == '/index.html' :
286        self.redirect_to('/static/index.html')
287        return
288      if self.path == '/favicon.ico' :
289        self.redirect_to('/static/favicon.ico')
290        return
291
292      # All requests must be of this form:
293      #   /dispatcher/remainder
294      # where 'dispatcher' indicates which do_GET_* dispatcher to run
295      # and 'remainder' is the remaining path sent to that dispatcher.
296      normpath = posixpath.normpath(self.path)
297      (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
298      dispatchers = {
299          'results': self.do_GET_results,
300          'static': self.do_GET_static,
301      }
302      dispatcher = dispatchers[dispatcher_name]
303      dispatcher(remainder)
304    except:
305      self.send_error(404)
306      raise
307
308  def do_GET_results(self, results_type):
309    """ Handle a GET request for GM results.
310
311    Args:
312      results_type: string indicating which set of results to return;
313            must be one of the results_mod.RESULTS_* constants
314    """
315    logging.debug('do_GET_results: sending results of type "%s"' % results_type)
316    # Since we must make multiple calls to the Results object, grab a
317    # reference to it in case it is updated to point at a new Results
318    # object within another thread.
319    #
320    # TODO(epoger): Rather than using a global variable for the handler
321    # to refer to the Server object, make Server a subclass of
322    # HTTPServer, and then it could be available to the handler via
323    # the handler's .server instance variable.
324    results_obj = _SERVER.results
325    if results_obj:
326      response_dict = results_obj.get_packaged_results_of_type(
327          results_type=results_type, reload_seconds=_SERVER.reload_seconds,
328          is_editable=_SERVER.is_editable, is_exported=_SERVER.is_exported)
329    else:
330      now = int(time.time())
331      response_dict = {
332          results_mod.KEY__HEADER: {
333              results_mod.KEY__HEADER__SCHEMA_VERSION: (
334                  results_mod.REBASELINE_SERVER_SCHEMA_VERSION_NUMBER),
335              results_mod.KEY__HEADER__IS_STILL_LOADING: True,
336              results_mod.KEY__HEADER__TIME_UPDATED: now,
337              results_mod.KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE: (
338                  now + RELOAD_INTERVAL_UNTIL_READY),
339          },
340      }
341    self.send_json_dict(response_dict)
342
343  def do_GET_static(self, path):
344    """ Handle a GET request for a file under the 'static' directory.
345    Only allow serving of files within the 'static' directory that is a
346    filesystem sibling of this script.
347
348    Args:
349      path: path to file (under static directory) to retrieve
350    """
351    # Strip arguments ('?resultsToLoad=all') from the path
352    path = urlparse.urlparse(path).path
353
354    logging.debug('do_GET_static: sending file "%s"' % path)
355    static_dir = os.path.realpath(os.path.join(PARENT_DIRECTORY, 'static'))
356    full_path = os.path.realpath(os.path.join(static_dir, path))
357    if full_path.startswith(static_dir):
358      self.send_file(full_path)
359    else:
360      logging.error(
361          'Attempted do_GET_static() of path [%s] outside of static dir [%s]'
362          % (full_path, static_dir))
363      self.send_error(404)
364
365  def do_POST(self):
366    """ Handles all POST requests, forwarding them to the appropriate
367        do_POST_* dispatcher. """
368    # All requests must be of this form:
369    #   /dispatcher
370    # where 'dispatcher' indicates which do_POST_* dispatcher to run.
371    logging.debug('do_POST: path="%s"' % self.path)
372    normpath = posixpath.normpath(self.path)
373    dispatchers = {
374      '/edits': self.do_POST_edits,
375    }
376    try:
377      dispatcher = dispatchers[normpath]
378      dispatcher()
379      self.send_response(200)
380    except:
381      self.send_error(404)
382      raise
383
384  def do_POST_edits(self):
385    """ Handle a POST request with modifications to GM expectations, in this
386    format:
387
388    {
389      KEY__EDITS__OLD_RESULTS_TYPE: 'all',  # type of results that the client
390                                            # loaded and then made
391                                            # modifications to
392      KEY__EDITS__OLD_RESULTS_HASH: 39850913, # hash of results when the client
393                                              # loaded them (ensures that the
394                                              # client and server apply
395                                              # modifications to the same base)
396      KEY__EDITS__MODIFICATIONS: [
397        # as needed by results_mod.edit_expectations()
398        ...
399      ],
400    }
401
402    Raises an Exception if there were any problems.
403    """
404    if not _SERVER.is_editable:
405      raise Exception('this server is not running in --editable mode')
406
407    content_type = self.headers[_HTTP_HEADER_CONTENT_TYPE]
408    if content_type != 'application/json;charset=UTF-8':
409      raise Exception('unsupported %s [%s]' % (
410          _HTTP_HEADER_CONTENT_TYPE, content_type))
411
412    content_length = int(self.headers[_HTTP_HEADER_CONTENT_LENGTH])
413    json_data = self.rfile.read(content_length)
414    data = json.loads(json_data)
415    logging.debug('do_POST_edits: received new GM expectations data [%s]' %
416                  data)
417
418    # Update the results on disk with the information we received from the
419    # client.
420    # We must hold _SERVER.results_rlock while we do this, to guarantee that
421    # no other thread updates expectations (from the Skia repo) while we are
422    # updating them (using the info we received from the client).
423    with _SERVER.results_rlock:
424      oldResultsType = data[KEY__EDITS__OLD_RESULTS_TYPE]
425      oldResults = _SERVER.results.get_results_of_type(oldResultsType)
426      oldResultsHash = str(hash(repr(oldResults[imagepairset.KEY__IMAGEPAIRS])))
427      if oldResultsHash != data[KEY__EDITS__OLD_RESULTS_HASH]:
428        raise Exception('results of type "%s" changed while the client was '
429                        'making modifications. The client should reload the '
430                        'results and submit the modifications again.' %
431                        oldResultsType)
432      _SERVER.results.edit_expectations(data[KEY__EDITS__MODIFICATIONS])
433
434    # Read the updated results back from disk.
435    # We can do this in a separate thread; we should return our success message
436    # to the UI as soon as possible.
437    thread.start_new_thread(_SERVER.update_results, (True,))
438
439  def redirect_to(self, url):
440    """ Redirect the HTTP client to a different url.
441
442    Args:
443      url: URL to redirect the HTTP client to
444    """
445    self.send_response(301)
446    self.send_header('Location', url)
447    self.end_headers()
448
449  def send_file(self, path):
450    """ Send the contents of the file at this path, with a mimetype based
451        on the filename extension.
452
453    Args:
454      path: path of file whose contents to send to the HTTP client
455    """
456    # Grab the extension if there is one
457    extension = os.path.splitext(path)[1]
458    if len(extension) >= 1:
459      extension = extension[1:]
460
461    # Determine the MIME type of the file from its extension
462    mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
463
464    # Open the file and send it over HTTP
465    if os.path.isfile(path):
466      with open(path, 'rb') as sending_file:
467        self.send_response(200)
468        self.send_header('Content-type', mime_type)
469        self.end_headers()
470        self.wfile.write(sending_file.read())
471    else:
472      self.send_error(404)
473
474  def send_json_dict(self, json_dict):
475    """ Send the contents of this dictionary in JSON format, with a JSON
476        mimetype.
477
478    Args:
479      json_dict: dictionary to send
480    """
481    self.send_response(200)
482    self.send_header('Content-type', 'application/json')
483    self.end_headers()
484    json.dump(json_dict, self.wfile)
485
486
487def main():
488  logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
489                      datefmt='%m/%d/%Y %H:%M:%S',
490                      level=logging.INFO)
491  parser = argparse.ArgumentParser()
492  parser.add_argument('--actuals-dir',
493                    help=('Directory into which we will check out the latest '
494                          'actual GM results. If this directory does not '
495                          'exist, it will be created. Defaults to %(default)s'),
496                    default=DEFAULT_ACTUALS_DIR)
497  parser.add_argument('--actuals-repo',
498                    help=('URL of SVN repo to download actual-results.json '
499                          'files from. Defaults to %(default)s'),
500                    default=DEFAULT_ACTUALS_REPO_URL)
501  parser.add_argument('--actuals-revision',
502                    help=('revision of actual-results.json files to process. '
503                          'Defaults to %(default)s .  Beware of setting this '
504                          'argument in conjunction with --editable; you '
505                          'probably only want to edit results at HEAD.'),
506                    default=DEFAULT_ACTUALS_REPO_REVISION)
507  parser.add_argument('--editable', action='store_true',
508                      help=('Allow HTTP clients to submit new baselines.'))
509  parser.add_argument('--export', action='store_true',
510                      help=('Instead of only allowing access from HTTP clients '
511                            'on localhost, allow HTTP clients on other hosts '
512                            'to access this server.  WARNING: doing so will '
513                            'allow users on other hosts to modify your '
514                            'GM expectations, if combined with --editable.'))
515  parser.add_argument('--port', type=int,
516                      help=('Which TCP port to listen on for HTTP requests; '
517                            'defaults to %(default)s'),
518                      default=DEFAULT_PORT)
519  parser.add_argument('--reload', type=int,
520                      help=('How often (a period in seconds) to update the '
521                            'results.  If specified, both expected and actual '
522                            'results will be updated by running "gclient sync" '
523                            'on your Skia checkout as a whole.  '
524                            'By default, we do not reload at all, and you '
525                            'must restart the server to pick up new data.'),
526                      default=0)
527  args = parser.parse_args()
528  global _SERVER
529  _SERVER = Server(actuals_dir=args.actuals_dir,
530                   actuals_repo_revision=args.actuals_revision,
531                   actuals_repo_url=args.actuals_repo,
532                   port=args.port, export=args.export, editable=args.editable,
533                   reload_seconds=args.reload)
534  _SERVER.run()
535
536
537if __name__ == '__main__':
538  main()
539