1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Copyright 2013 The Chromium Authors. All rights reserved.
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# found in the LICENSE file.
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciimport time
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import traceback
7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)from app_yaml_helper import AppYamlHelper
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom appengine_wrappers import IsDeadlineExceededError, logservice, taskqueue
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from branch_utility import BranchUtility
11a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)from compiled_file_system import CompiledFileSystem
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom custom_logger import CustomLogger
13424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)from data_source_registry import CreateDataSources
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom environment import GetAppVersion
155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from gcs_file_system_provider import CloudStorageFileSystemProvider
161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)from github_file_system_provider import GithubFileSystemProvider
174e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)from host_file_system_provider import HostFileSystemProvider
18b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from object_store_creator import ObjectStoreCreator
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom render_refresher import RenderRefresher
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from server_instance import ServerInstance
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from servlet import Servlet, Request, Response
221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom timer import Timer
23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
24b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci_log = CustomLogger('cron')
26b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CronServlet(Servlet):
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  '''Servlet which runs a cron job.
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  '''
31b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  def __init__(self, request, delegate_for_test=None):
32b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    Servlet.__init__(self, request)
33b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    self._delegate = delegate_for_test or CronServlet.Delegate()
34b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
35b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  class Delegate(object):
36a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    '''CronServlet's runtime dependencies. Override for testing.
37b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '''
38b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def CreateBranchUtility(self, object_store_creator):
39b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)      return BranchUtility.Create(object_store_creator)
40b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
414e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    def CreateHostFileSystemProvider(self,
424e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)                                     object_store_creator,
431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                     pinned_commit=None):
444e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      return HostFileSystemProvider(object_store_creator,
451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                    pinned_commit=pinned_commit)
46b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
471e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    def CreateGithubFileSystemProvider(self, object_store_creator):
481e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      return GithubFileSystemProvider(object_store_creator)
49b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    def CreateGCSFileSystemProvider(self, object_store_creator):
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return CloudStorageFileSystemProvider(object_store_creator)
525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
53a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    def GetAppVersion(self):
54a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)      return GetAppVersion()
55a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  def Get(self):
571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # Refreshes may time out, and if they do we need to make sure to flush the
583551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    # logs before the process gets killed (Python gives us a couple of
593551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    # seconds).
603551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    #
613551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    # So, manually flush logs at the end of the cron run. However, sometimes
621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # even that isn't enough, which is why in this file we use _log and
633551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    # make it flush the log every time its used.
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    logservice.AUTOFLUSH_ENABLED = False
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    try:
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return self._GetImpl()
673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    except BaseException:
681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      _log.error('Caught top-level exception! %s', traceback.format_exc())
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    finally:
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      logservice.flush()
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  def _GetImpl(self):
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Cron strategy:
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    #
751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # Collect all DataSources, the PlatformBundle, the ContentProviders, and
761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # any other statically renderered contents (e.g. examples content),
771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # and spin up taskqueue tasks which will refresh any cached data relevant
781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # to these assets.
793551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    #
801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # TODO(rockot/kalman): At the moment examples are not actually refreshed
811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # because they're too slow.
82b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    _log.info('starting')
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    server_instance = self._GetSafeServerInstance()
861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    master_fs = server_instance.host_file_system_provider.GetMaster()
871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    master_commit = master_fs.GetCommitID().Get()
88a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # This is the guy that would be responsible for refreshing the cache of
901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # examples. Here for posterity, hopefully it will be added to the targets
911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # below someday.
921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    render_refresher = RenderRefresher(server_instance, self._request)
93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # Get the default taskqueue
951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    queue = taskqueue.Queue()
96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # GAE documentation specifies that it's bad to add tasks to a queue
981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # within one second of purging. We wait 2 seconds, because we like
991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # to go the extra mile.
1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    queue.purge()
1011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    time.sleep(2)
1023551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    success = True
1041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    try:
1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      data_sources = CreateDataSources(server_instance)
1061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      targets = (data_sources.items() +
1071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                 [('content_providers', server_instance.content_providers),
1081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                  ('platform_bundle', server_instance.platform_bundle)])
1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      title = 'initializing %s parallel targets' % len(targets)
1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      _log.info(title)
111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      timer = Timer()
1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      for name, target in targets:
1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        refresh_paths = target.GetRefreshPaths()
1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        for path in refresh_paths:
1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          queue.add(taskqueue.Task(url='/_refresh/%s/%s' % (name, path),
1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                   params={'commit': master_commit}))
1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      _log.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
118d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)    except:
1193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      # This should never actually happen (each cron step does its own
1203551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      # conservative error checking), so re-raise no matter what it is.
1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      _log.error('uncaught error: %s' % traceback.format_exc())
1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      success = False
1233551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      raise
1243551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    finally:
1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      _log.info('finished (%s)', 'success' if success else 'FAILED')
1263551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      return (Response.Ok('Success') if success else
1273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)              Response.InternalError('Failure'))
128a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
129a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)  def _GetSafeServerInstance(self):
1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    '''Returns a ServerInstance with a host file system at a safe commit,
1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    meaning the last commit that the current running version of the server
132a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    existed.
133a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    '''
134a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    delegate = self._delegate
1354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # IMPORTANT: Get a ServerInstance pinned to the most recent commit, not
1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # HEAD. These cron jobs take a while and run very frequently such that
1384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # there is usually one running at any given time, and eventually a file
1394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # that we're dealing with will change underneath it, putting the server in
1404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # an undefined state.
1414e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    server_instance_near_head = self._CreateServerInstance(
1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        self._GetMostRecentCommit())
143a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
144a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    app_yaml_handler = AppYamlHelper(
1454e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        server_instance_near_head.object_store_creator,
1464e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        server_instance_near_head.host_file_system_provider)
147a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
148a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    if app_yaml_handler.IsUpToDate(delegate.GetAppVersion()):
1494e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      return server_instance_near_head
150a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
151a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    # The version in app.yaml is greater than the currently running app's.
152a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    # The safe version is the one before it changed.
153a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    safe_revision = app_yaml_handler.GetFirstRevisionGreaterThan(
154a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)        delegate.GetAppVersion()) - 1
155a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    _log.info('app version %s is out of date, safe is %s',
1573551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)        delegate.GetAppVersion(), safe_revision)
158a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
159ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    return self._CreateServerInstance(safe_revision)
160a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def _GetMostRecentCommit(self):
1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    '''Gets the commit of the most recent patch submitted to the host file
1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    system. This is similar to HEAD but it's a concrete commit so won't
1644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    change as the cron runs.
1654e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    '''
1664e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    head_fs = (
1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        self._CreateServerInstance(None).host_file_system_provider.GetMaster())
1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return head_fs.GetCommitID().Get()
1694e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def _CreateServerInstance(self, commit):
1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    '''Creates a ServerInstance pinned to |commit|, or HEAD if None.
1724e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    NOTE: If passed None it's likely that during the cron run patches will be
1734e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    submitted at HEAD, which may change data underneath the cron run.
1744e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    '''
175ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    object_store_creator = ObjectStoreCreator(start_empty=True)
1767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    branch_utility = self._delegate.CreateBranchUtility(object_store_creator)
1774e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    host_file_system_provider = self._delegate.CreateHostFileSystemProvider(
1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        object_store_creator, pinned_commit=commit)
1791e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    github_file_system_provider = self._delegate.CreateGithubFileSystemProvider(
180a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)        object_store_creator)
1815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    gcs_file_system_provider = self._delegate.CreateGCSFileSystemProvider(
1825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        object_store_creator)
183ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    return ServerInstance(object_store_creator,
1844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)                          CompiledFileSystem.Factory(object_store_creator),
1857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                          branch_utility,
1861e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                          host_file_system_provider,
1875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                          github_file_system_provider,
1885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                          gcs_file_system_provider)
189