server.py revision 16f418080ff6751e15e0193263149412de9c848a
1#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9HTTP server for our HTML rebaseline viewer.
10"""
11
12# System-level imports
13import argparse
14import BaseHTTPServer
15import json
16import logging
17import os
18import posixpath
19import re
20import shutil
21import socket
22import subprocess
23import sys
24import thread
25import threading
26import time
27import urlparse
28
29# Imports from within Skia
30#
31# We need to add the 'tools' directory, so that we can import svn.py within
32# that directory.
33# Make sure that the 'tools' dir is in the PYTHONPATH, but add it at the *end*
34# so any dirs that are already in the PYTHONPATH will be preferred.
35PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
36TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
37TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
38if TOOLS_DIRECTORY not in sys.path:
39  sys.path.append(TOOLS_DIRECTORY)
40import svn
41
42# Imports from local dir
43import imagepairset
44import results
45
46ACTUALS_SVN_REPO = 'http://skia-autogen.googlecode.com/svn/gm-actual'
47PATHSPLIT_RE = re.compile('/([^/]+)/(.+)')
48EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm')
49GENERATED_IMAGES_ROOT = os.path.join(PARENT_DIRECTORY, 'static',
50                                     'generated-images')
51
52# A simple dictionary of file name extensions to MIME types. The empty string
53# entry is used as the default when no extension was given or if the extension
54# has no entry in this dictionary.
55MIME_TYPE_MAP = {'': 'application/octet-stream',
56                 'html': 'text/html',
57                 'css': 'text/css',
58                 'png': 'image/png',
59                 'js': 'application/javascript',
60                 'json': 'application/json'
61                 }
62
63# Keys that server.py uses to create the toplevel content header.
64# NOTE: Keep these in sync with static/constants.js
65KEY__EDITS__MODIFICATIONS = 'modifications'
66KEY__EDITS__OLD_RESULTS_HASH = 'oldResultsHash'
67KEY__EDITS__OLD_RESULTS_TYPE = 'oldResultsType'
68KEY__HEADER = 'header'
69KEY__HEADER__DATAHASH = 'dataHash'
70KEY__HEADER__IS_EDITABLE = 'isEditable'
71KEY__HEADER__IS_EXPORTED = 'isExported'
72KEY__HEADER__IS_STILL_LOADING = 'resultsStillLoading'
73KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE = 'timeNextUpdateAvailable'
74KEY__HEADER__TIME_UPDATED = 'timeUpdated'
75KEY__HEADER__TYPE = 'type'
76
77DEFAULT_ACTUALS_DIR = '.gm-actuals'
78DEFAULT_PORT = 8888
79
80# How often (in seconds) clients should reload while waiting for initial
81# results to load.
82RELOAD_INTERVAL_UNTIL_READY = 10
83
84_HTTP_HEADER_CONTENT_LENGTH = 'Content-Length'
85_HTTP_HEADER_CONTENT_TYPE = 'Content-Type'
86
87_SERVER = None   # This gets filled in by main()
88
89
90def _run_command(args, directory):
91  """Runs a command and returns stdout as a single string.
92
93  Args:
94    args: the command to run, as a list of arguments
95    directory: directory within which to run the command
96
97  Returns: stdout, as a string
98
99  Raises an Exception if the command failed (exited with nonzero return code).
100  """
101  logging.debug('_run_command: %s in directory %s' % (args, directory))
102  proc = subprocess.Popen(args, cwd=directory,
103                          stdout=subprocess.PIPE,
104                          stderr=subprocess.PIPE)
105  (stdout, stderr) = proc.communicate()
106  if proc.returncode is not 0:
107    raise Exception('command "%s" failed in dir "%s": %s' %
108                    (args, directory, stderr))
109  return stdout
110
111
112def _get_routable_ip_address():
113  """Returns routable IP address of this host (the IP address of its network
114     interface that would be used for most traffic, not its localhost
115     interface).  See http://stackoverflow.com/a/166589 """
116  sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
117  sock.connect(('8.8.8.8', 80))
118  host = sock.getsockname()[0]
119  sock.close()
120  return host
121
122
123def _create_svn_checkout(dir_path, repo_url):
124  """Creates local checkout of an SVN repository at the specified directory
125  path, returning an svn.Svn object referring to the local checkout.
126
127  Args:
128    dir_path: path to the local checkout; if this directory does not yet exist,
129              it will be created and the repo will be checked out into it
130    repo_url: URL of SVN repo to check out into dir_path (unless the local
131              checkout already exists)
132  Returns: an svn.Svn object referring to the local checkout.
133  """
134  local_checkout = svn.Svn(dir_path)
135  if not os.path.isdir(dir_path):
136    os.makedirs(dir_path)
137    local_checkout.Checkout(repo_url, '.')
138  return local_checkout
139
140
141class Server(object):
142  """ HTTP server for our HTML rebaseline viewer. """
143
144  def __init__(self,
145               actuals_dir=DEFAULT_ACTUALS_DIR,
146               port=DEFAULT_PORT, export=False, editable=True,
147               reload_seconds=0):
148    """
149    Args:
150      actuals_dir: directory under which we will check out the latest actual
151                   GM results
152      port: which TCP port to listen on for HTTP requests
153      export: whether to allow HTTP clients on other hosts to access this server
154      editable: whether HTTP clients are allowed to submit new baselines
155      reload_seconds: polling interval with which to check for new results;
156                      if 0, don't check for new results at all
157    """
158    self._actuals_dir = actuals_dir
159    self._port = port
160    self._export = export
161    self._editable = editable
162    self._reload_seconds = reload_seconds
163    self._actuals_repo = _create_svn_checkout(
164        dir_path=actuals_dir, repo_url=ACTUALS_SVN_REPO)
165
166    # Reentrant lock that must be held whenever updating EITHER of:
167    # 1. self._results
168    # 2. the expected or actual results on local disk
169    self.results_rlock = threading.RLock()
170    # self._results will be filled in by calls to update_results()
171    self._results = None
172
173  @property
174  def results(self):
175    """ Returns the most recently generated results, or None if we don't have
176    any valid results (update_results() has not completed yet). """
177    return self._results
178
179  @property
180  def is_exported(self):
181    """ Returns true iff HTTP clients on other hosts are allowed to access
182    this server. """
183    return self._export
184
185  @property
186  def is_editable(self):
187    """ Returns true iff HTTP clients are allowed to submit new baselines. """
188    return self._editable
189
190  @property
191  def reload_seconds(self):
192    """ Returns the result reload period in seconds, or 0 if we don't reload
193    results. """
194    return self._reload_seconds
195
196  def update_results(self, invalidate=False):
197    """ Create or update self._results, based on the expectations in
198    EXPECTATIONS_DIR and the latest actuals from skia-autogen.
199
200    We hold self.results_rlock while we do this, to guarantee that no other
201    thread attempts to update either self._results or the underlying files at
202    the same time.
203
204    Args:
205      invalidate: if True, invalidate self._results immediately upon entry;
206                  otherwise, we will let readers see those results until we
207                  replace them
208    """
209    with self.results_rlock:
210      if invalidate:
211        self._results = None
212      logging.info('Updating actual GM results in %s from SVN repo %s ...' % (
213          self._actuals_dir, ACTUALS_SVN_REPO))
214      self._actuals_repo.Update('.')
215
216      # We only update the expectations dir if the server was run with a
217      # nonzero --reload argument; otherwise, we expect the user to maintain
218      # her own expectations as she sees fit.
219      #
220      # Because the Skia repo is moving from SVN to git, and git does not
221      # support updating a single directory tree, we have to update the entire
222      # repo checkout.
223      #
224      # Because Skia uses depot_tools, we have to update using "gclient sync"
225      # instead of raw git (or SVN) update.  Happily, this will work whether
226      # the checkout was created using git or SVN.
227      if self._reload_seconds:
228        logging.info(
229            'Updating expected GM results in %s by syncing Skia repo ...' %
230            EXPECTATIONS_DIR)
231        _run_command(['gclient', 'sync'], TRUNK_DIRECTORY)
232
233      self._results = results.Results(
234          actuals_root=self._actuals_dir,
235          expected_root=EXPECTATIONS_DIR,
236          generated_images_root=GENERATED_IMAGES_ROOT)
237
238  def _result_loader(self, reload_seconds=0):
239    """ Call self.update_results(), either once or periodically.
240
241    Params:
242      reload_seconds: integer; if nonzero, reload results at this interval
243          (in which case, this method will never return!)
244    """
245    self.update_results()
246    logging.info('Initial results loaded. Ready for requests on %s' % self._url)
247    if reload_seconds:
248      while True:
249        time.sleep(reload_seconds)
250        self.update_results()
251
252  def run(self):
253    arg_tuple = (self._reload_seconds,)  # start_new_thread needs a tuple,
254                                         # even though it holds just one param
255    thread.start_new_thread(self._result_loader, arg_tuple)
256
257    if self._export:
258      server_address = ('', self._port)
259      host = _get_routable_ip_address()
260      if self._editable:
261        logging.warning('Running with combination of "export" and "editable" '
262                        'flags.  Users on other machines will '
263                        'be able to modify your GM expectations!')
264    else:
265      host = '127.0.0.1'
266      server_address = (host, self._port)
267    http_server = BaseHTTPServer.HTTPServer(server_address, HTTPRequestHandler)
268    self._url = 'http://%s:%d' % (host, http_server.server_port)
269    logging.info('Listening for requests on %s' % self._url)
270    http_server.serve_forever()
271
272
273class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
274  """ HTTP request handlers for various types of queries this server knows
275      how to handle (static HTML and Javascript, expected/actual results, etc.)
276  """
277  def do_GET(self):
278    """
279    Handles all GET requests, forwarding them to the appropriate
280    do_GET_* dispatcher.
281
282    If we see any Exceptions, return a 404.  This fixes http://skbug.com/2147
283    """
284    try:
285      logging.debug('do_GET: path="%s"' % self.path)
286      if self.path == '' or self.path == '/' or self.path == '/index.html' :
287        self.redirect_to('/static/index.html')
288        return
289      if self.path == '/favicon.ico' :
290        self.redirect_to('/static/favicon.ico')
291        return
292
293      # All requests must be of this form:
294      #   /dispatcher/remainder
295      # where 'dispatcher' indicates which do_GET_* dispatcher to run
296      # and 'remainder' is the remaining path sent to that dispatcher.
297      normpath = posixpath.normpath(self.path)
298      (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
299      dispatchers = {
300          'results': self.do_GET_results,
301          'static': self.do_GET_static,
302      }
303      dispatcher = dispatchers[dispatcher_name]
304      dispatcher(remainder)
305    except:
306      self.send_error(404)
307      raise
308
309  def do_GET_results(self, type):
310    """ Handle a GET request for GM results.
311
312    Args:
313      type: string indicating which set of results to return;
314            must be one of the results.RESULTS_* constants
315    """
316    logging.debug('do_GET_results: sending results of type "%s"' % type)
317    # Since we must make multiple calls to the Results object, grab a
318    # reference to it in case it is updated to point at a new Results
319    # object within another thread.
320    #
321    # TODO(epoger): Rather than using a global variable for the handler
322    # to refer to the Server object, make Server a subclass of
323    # HTTPServer, and then it could be available to the handler via
324    # the handler's .server instance variable.
325    results_obj = _SERVER.results
326    if results_obj:
327      response_dict = self.package_results(results_obj, type)
328    else:
329      now = int(time.time())
330      response_dict = {
331          KEY__HEADER: {
332              KEY__HEADER__IS_STILL_LOADING: True,
333              KEY__HEADER__TIME_UPDATED: now,
334              KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE:
335                  now + RELOAD_INTERVAL_UNTIL_READY,
336          },
337      }
338    self.send_json_dict(response_dict)
339
340  def package_results(self, results_obj, type):
341    """ Given a nonempty "results" object, package it as a response_dict
342    as needed within do_GET_results.
343
344    Args:
345      results_obj: nonempty "results" object
346      type: string indicating which set of results to return;
347            must be one of the results.RESULTS_* constants
348    """
349    response_dict = results_obj.get_results_of_type(type)
350    time_updated = results_obj.get_timestamp()
351    response_dict[KEY__HEADER] = {
352        # Timestamps:
353        # 1. when this data was last updated
354        # 2. when the caller should check back for new data (if ever)
355        #
356        # We only return these timestamps if the --reload argument was passed;
357        # otherwise, we have no idea when the expectations were last updated
358        # (we allow the user to maintain her own expectations as she sees fit).
359        KEY__HEADER__TIME_UPDATED:
360            time_updated if _SERVER.reload_seconds else None,
361        KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE:
362            (time_updated+_SERVER.reload_seconds) if _SERVER.reload_seconds
363            else None,
364
365        # The type we passed to get_results_of_type()
366        KEY__HEADER__TYPE: type,
367
368        # Hash of dataset, which the client must return with any edits--
369        # this ensures that the edits were made to a particular dataset.
370        KEY__HEADER__DATAHASH: str(hash(repr(
371            response_dict[imagepairset.KEY__IMAGEPAIRS]))),
372
373        # Whether the server will accept edits back.
374        KEY__HEADER__IS_EDITABLE: _SERVER.is_editable,
375
376        # Whether the service is accessible from other hosts.
377        KEY__HEADER__IS_EXPORTED: _SERVER.is_exported,
378    }
379    return response_dict
380
381  def do_GET_static(self, path):
382    """ Handle a GET request for a file under the 'static' directory.
383    Only allow serving of files within the 'static' directory that is a
384    filesystem sibling of this script.
385
386    Args:
387      path: path to file (under static directory) to retrieve
388    """
389    # Strip arguments ('?resultsToLoad=all') from the path
390    path = urlparse.urlparse(path).path
391
392    logging.debug('do_GET_static: sending file "%s"' % path)
393    static_dir = os.path.realpath(os.path.join(PARENT_DIRECTORY, 'static'))
394    full_path = os.path.realpath(os.path.join(static_dir, path))
395    if full_path.startswith(static_dir):
396      self.send_file(full_path)
397    else:
398      logging.error(
399          'Attempted do_GET_static() of path [%s] outside of static dir [%s]'
400          % (full_path, static_dir))
401      self.send_error(404)
402
403  def do_POST(self):
404    """ Handles all POST requests, forwarding them to the appropriate
405        do_POST_* dispatcher. """
406    # All requests must be of this form:
407    #   /dispatcher
408    # where 'dispatcher' indicates which do_POST_* dispatcher to run.
409    logging.debug('do_POST: path="%s"' % self.path)
410    normpath = posixpath.normpath(self.path)
411    dispatchers = {
412      '/edits': self.do_POST_edits,
413    }
414    try:
415      dispatcher = dispatchers[normpath]
416      dispatcher()
417      self.send_response(200)
418    except:
419      self.send_error(404)
420      raise
421
422  def do_POST_edits(self):
423    """ Handle a POST request with modifications to GM expectations, in this
424    format:
425
426    {
427      KEY__EDITS__OLD_RESULTS_TYPE: 'all',  # type of results that the client
428                                            # loaded and then made
429                                            # modifications to
430      KEY__EDITS__OLD_RESULTS_HASH: 39850913, # hash of results when the client
431                                              # loaded them (ensures that the
432                                              # client and server apply
433                                              # modifications to the same base)
434      KEY__EDITS__MODIFICATIONS: [
435        # as needed by results.edit_expectations()
436        ...
437      ],
438    }
439
440    Raises an Exception if there were any problems.
441    """
442    if not _SERVER.is_editable:
443      raise Exception('this server is not running in --editable mode')
444
445    content_type = self.headers[_HTTP_HEADER_CONTENT_TYPE]
446    if content_type != 'application/json;charset=UTF-8':
447      raise Exception('unsupported %s [%s]' % (
448          _HTTP_HEADER_CONTENT_TYPE, content_type))
449
450    content_length = int(self.headers[_HTTP_HEADER_CONTENT_LENGTH])
451    json_data = self.rfile.read(content_length)
452    data = json.loads(json_data)
453    logging.debug('do_POST_edits: received new GM expectations data [%s]' %
454                  data)
455
456    # Update the results on disk with the information we received from the
457    # client.
458    # We must hold _SERVER.results_rlock while we do this, to guarantee that
459    # no other thread updates expectations (from the Skia repo) while we are
460    # updating them (using the info we received from the client).
461    with _SERVER.results_rlock:
462      oldResultsType = data[KEY__EDITS__OLD_RESULTS_TYPE]
463      oldResults = _SERVER.results.get_results_of_type(oldResultsType)
464      oldResultsHash = str(hash(repr(oldResults[imagepairset.KEY__IMAGEPAIRS])))
465      if oldResultsHash != data[KEY__EDITS__OLD_RESULTS_HASH]:
466        raise Exception('results of type "%s" changed while the client was '
467                        'making modifications. The client should reload the '
468                        'results and submit the modifications again.' %
469                        oldResultsType)
470      _SERVER.results.edit_expectations(data[KEY__EDITS__MODIFICATIONS])
471
472    # Read the updated results back from disk.
473    # We can do this in a separate thread; we should return our success message
474    # to the UI as soon as possible.
475    thread.start_new_thread(_SERVER.update_results, (True,))
476
477  def redirect_to(self, url):
478    """ Redirect the HTTP client to a different url.
479
480    Args:
481      url: URL to redirect the HTTP client to
482    """
483    self.send_response(301)
484    self.send_header('Location', url)
485    self.end_headers()
486
487  def send_file(self, path):
488    """ Send the contents of the file at this path, with a mimetype based
489        on the filename extension.
490
491    Args:
492      path: path of file whose contents to send to the HTTP client
493    """
494    # Grab the extension if there is one
495    extension = os.path.splitext(path)[1]
496    if len(extension) >= 1:
497      extension = extension[1:]
498
499    # Determine the MIME type of the file from its extension
500    mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
501
502    # Open the file and send it over HTTP
503    if os.path.isfile(path):
504      with open(path, 'rb') as sending_file:
505        self.send_response(200)
506        self.send_header('Content-type', mime_type)
507        self.end_headers()
508        self.wfile.write(sending_file.read())
509    else:
510      self.send_error(404)
511
512  def send_json_dict(self, json_dict):
513    """ Send the contents of this dictionary in JSON format, with a JSON
514        mimetype.
515
516    Args:
517      json_dict: dictionary to send
518    """
519    self.send_response(200)
520    self.send_header('Content-type', 'application/json')
521    self.end_headers()
522    json.dump(json_dict, self.wfile)
523
524
525def main():
526  logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
527                      datefmt='%m/%d/%Y %H:%M:%S',
528                      level=logging.INFO)
529  parser = argparse.ArgumentParser()
530  parser.add_argument('--actuals-dir',
531                    help=('Directory into which we will check out the latest '
532                          'actual GM results. If this directory does not '
533                          'exist, it will be created. Defaults to %(default)s'),
534                    default=DEFAULT_ACTUALS_DIR)
535  parser.add_argument('--editable', action='store_true',
536                      help=('Allow HTTP clients to submit new baselines.'))
537  parser.add_argument('--export', action='store_true',
538                      help=('Instead of only allowing access from HTTP clients '
539                            'on localhost, allow HTTP clients on other hosts '
540                            'to access this server.  WARNING: doing so will '
541                            'allow users on other hosts to modify your '
542                            'GM expectations, if combined with --editable.'))
543  parser.add_argument('--port', type=int,
544                      help=('Which TCP port to listen on for HTTP requests; '
545                            'defaults to %(default)s'),
546                      default=DEFAULT_PORT)
547  parser.add_argument('--reload', type=int,
548                      help=('How often (a period in seconds) to update the '
549                            'results.  If specified, both expected and actual '
550                            'results will be updated by running "gclient sync" '
551                            'on your Skia checkout as a whole.  '
552                            'By default, we do not reload at all, and you '
553                            'must restart the server to pick up new data.'),
554                      default=0)
555  args = parser.parse_args()
556  global _SERVER
557  _SERVER = Server(actuals_dir=args.actuals_dir,
558                   port=args.port, export=args.export, editable=args.editable,
559                   reload_seconds=args.reload)
560  _SERVER.run()
561
562
563if __name__ == '__main__':
564  main()
565