1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Copyright 2013 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciimport time 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import traceback 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)from app_yaml_helper import AppYamlHelper 91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom appengine_wrappers import IsDeadlineExceededError, logservice, taskqueue 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from branch_utility import BranchUtility 11a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)from compiled_file_system import CompiledFileSystem 121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom custom_logger import CustomLogger 13424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)from data_source_registry import CreateDataSources 141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom environment import GetAppVersion 155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from gcs_file_system_provider import CloudStorageFileSystemProvider 161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)from github_file_system_provider import GithubFileSystemProvider 174e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)from host_file_system_provider import HostFileSystemProvider 18b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from object_store_creator import ObjectStoreCreator 191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom render_refresher import RenderRefresher 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from server_instance import ServerInstance 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from servlet import Servlet, Request, Response 221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom timer import Timer 23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 24b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci_log = CustomLogger('cron') 26b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CronServlet(Servlet): 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) '''Servlet which runs a cron job. 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ''' 31b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __init__(self, request, delegate_for_test=None): 32b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) Servlet.__init__(self, request) 33b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self._delegate = delegate_for_test or CronServlet.Delegate() 34b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 35b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) class Delegate(object): 36a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) '''CronServlet's runtime dependencies. Override for testing. 37b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ''' 38b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def CreateBranchUtility(self, object_store_creator): 39b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return BranchUtility.Create(object_store_creator) 40b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 414e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) def CreateHostFileSystemProvider(self, 424e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) object_store_creator, 431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci pinned_commit=None): 444e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return HostFileSystemProvider(object_store_creator, 451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci pinned_commit=pinned_commit) 46b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 471e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) def CreateGithubFileSystemProvider(self, object_store_creator): 481e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return GithubFileSystemProvider(object_store_creator) 49b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def CreateGCSFileSystemProvider(self, object_store_creator): 515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return CloudStorageFileSystemProvider(object_store_creator) 525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 53a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) def GetAppVersion(self): 54a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return GetAppVersion() 55a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) def Get(self): 571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Refreshes may time out, and if they do we need to make sure to flush the 583551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # logs before the process gets killed (Python gives us a couple of 593551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # seconds). 603551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # 613551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # So, manually flush logs at the end of the cron run. However, sometimes 621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # even that isn't enough, which is why in this file we use _log and 633551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # make it flush the log every time its used. 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) logservice.AUTOFLUSH_ENABLED = False 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) try: 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return self._GetImpl() 673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) except BaseException: 681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.error('Caught top-level exception! %s', traceback.format_exc()) 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) finally: 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) logservice.flush() 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) def _GetImpl(self): 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) # Cron strategy: 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) # 751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Collect all DataSources, the PlatformBundle, the ContentProviders, and 761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # any other statically renderered contents (e.g. examples content), 771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # and spin up taskqueue tasks which will refresh any cached data relevant 781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # to these assets. 793551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # 801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # TODO(rockot/kalman): At the moment examples are not actually refreshed 811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # because they're too slow. 82b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.info('starting') 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci server_instance = self._GetSafeServerInstance() 861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci master_fs = server_instance.host_file_system_provider.GetMaster() 871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci master_commit = master_fs.GetCommitID().Get() 88a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # This is the guy that would be responsible for refreshing the cache of 901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # examples. Here for posterity, hopefully it will be added to the targets 911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # below someday. 921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci render_refresher = RenderRefresher(server_instance, self._request) 93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Get the default taskqueue 951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci queue = taskqueue.Queue() 96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # GAE documentation specifies that it's bad to add tasks to a queue 981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # within one second of purging. We wait 2 seconds, because we like 991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # to go the extra mile. 1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci queue.purge() 1011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci time.sleep(2) 1023551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 1031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci success = True 1041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci try: 1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci data_sources = CreateDataSources(server_instance) 1061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci targets = (data_sources.items() + 1071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci [('content_providers', server_instance.content_providers), 1081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ('platform_bundle', server_instance.platform_bundle)]) 1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci title = 'initializing %s parallel targets' % len(targets) 1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.info(title) 111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) timer = Timer() 1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for name, target in targets: 1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci refresh_paths = target.GetRefreshPaths() 1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for path in refresh_paths: 1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci queue.add(taskqueue.Task(url='/_refresh/%s/%s' % (name, path), 1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci params={'commit': master_commit})) 1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.info('%s took %s' % (title, timer.Stop().FormatElapsed())) 118d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) except: 1193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # This should never actually happen (each cron step does its own 1203551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) # conservative error checking), so re-raise no matter what it is. 1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.error('uncaught error: %s' % traceback.format_exc()) 1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci success = False 1233551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) raise 1243551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) finally: 1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.info('finished (%s)', 'success' if success else 'FAILED') 1263551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return (Response.Ok('Success') if success else 1273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) Response.InternalError('Failure')) 128a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 129a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) def _GetSafeServerInstance(self): 1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '''Returns a ServerInstance with a host file system at a safe commit, 1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci meaning the last commit that the current running version of the server 132a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) existed. 133a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) ''' 134a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) delegate = self._delegate 1354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # IMPORTANT: Get a ServerInstance pinned to the most recent commit, not 1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # HEAD. These cron jobs take a while and run very frequently such that 1384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # there is usually one running at any given time, and eventually a file 1394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # that we're dealing with will change underneath it, putting the server in 1404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # an undefined state. 1414e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) server_instance_near_head = self._CreateServerInstance( 1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._GetMostRecentCommit()) 143a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 144a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) app_yaml_handler = AppYamlHelper( 1454e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) server_instance_near_head.object_store_creator, 1464e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) server_instance_near_head.host_file_system_provider) 147a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 148a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if app_yaml_handler.IsUpToDate(delegate.GetAppVersion()): 1494e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return server_instance_near_head 150a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 151a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) # The version in app.yaml is greater than the currently running app's. 152a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) # The safe version is the one before it changed. 153a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) safe_revision = app_yaml_handler.GetFirstRevisionGreaterThan( 154a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) delegate.GetAppVersion()) - 1 155a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci _log.info('app version %s is out of date, safe is %s', 1573551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) delegate.GetAppVersion(), safe_revision) 158a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 159ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch return self._CreateServerInstance(safe_revision) 160a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def _GetMostRecentCommit(self): 1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '''Gets the commit of the most recent patch submitted to the host file 1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci system. This is similar to HEAD but it's a concrete commit so won't 1644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) change as the cron runs. 1654e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) ''' 1664e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) head_fs = ( 1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._CreateServerInstance(None).host_file_system_provider.GetMaster()) 1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return head_fs.GetCommitID().Get() 1694e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def _CreateServerInstance(self, commit): 1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '''Creates a ServerInstance pinned to |commit|, or HEAD if None. 1724e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) NOTE: If passed None it's likely that during the cron run patches will be 1734e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) submitted at HEAD, which may change data underneath the cron run. 1744e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) ''' 175ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch object_store_creator = ObjectStoreCreator(start_empty=True) 1767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch branch_utility = self._delegate.CreateBranchUtility(object_store_creator) 1774e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) host_file_system_provider = self._delegate.CreateHostFileSystemProvider( 1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci object_store_creator, pinned_commit=commit) 1791e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) github_file_system_provider = self._delegate.CreateGithubFileSystemProvider( 180a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) object_store_creator) 1815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) gcs_file_system_provider = self._delegate.CreateGCSFileSystemProvider( 1825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) object_store_creator) 183ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch return ServerInstance(object_store_creator, 1844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) CompiledFileSystem.Factory(object_store_creator), 1857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch branch_utility, 1861e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) host_file_system_provider, 1875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) github_file_system_provider, 1885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) gcs_file_system_provider) 189