1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 51e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)import posixpath 64e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)import sys 74e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from file_system import FileSystem, StatInfo, FileNotFoundError 91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom future import All, Future 101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom path_util import AssertIsDirectory, IsDirectory, ToDirectory 11e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdochfrom third_party.json_schema_compiler.memoize import memoize 12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CachingFileSystem(FileSystem): 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) '''FileSystem which implements a caching layer on top of |file_system|. It's 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) smart, using Stat() to decided whether to skip Read()ing from |file_system|, 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) and only Stat()ing directories never files. 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ''' 19b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __init__(self, file_system, object_store_creator): 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) self._file_system = file_system 21b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def create_object_store(category, **optargs): 22b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return object_store_creator.Create( 23b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) CachingFileSystem, 24b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) category='%s/%s' % (file_system.GetIdentity(), category), 25b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) **optargs) 261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._stat_cache = create_object_store('stat') 274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # The read caches can start populated (start_empty=False) because file 284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # updates are picked up by the stat, so it doesn't need the force-refresh 294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # which starting empty is designed for. Without this optimisation, cron 304e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # runs are extra slow. 311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._read_cache = create_object_store('read', start_empty=False) 321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._walk_cache = create_object_store('walk', start_empty=False) 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 34d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) def Refresh(self): 35d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) return self._file_system.Refresh() 36d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 37e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch def StatAsync(self, path): 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) '''Stats the directory given, or if a file is given, stats the file's parent 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) directory to get info about the file. 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ''' 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) # Always stat the parent directory, since it will have the stat of the child 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) # anyway, and this gives us an entire directory's stat info at once. 431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) dir_path, file_path = posixpath.split(path) 446e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles) dir_path = ToDirectory(dir_path) 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 46e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch def make_stat_info(dir_stat): 47e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch '''Converts a dir stat into the correct resulting StatInfo; if the Stat 48e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch was for a file, the StatInfo should just contain that file. 49e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch ''' 50e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if path == dir_path: 51e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return dir_stat 52e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Was a file stat. Extract that file. 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) file_version = dir_stat.child_versions.get(file_path) 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if file_version is None: 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) raise FileNotFoundError('No stat found for %s in %s (found %s)' % 56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) (path, dir_path, dir_stat.child_versions)) 57e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return StatInfo(file_version) 58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci dir_stat = self._stat_cache.Get(dir_path).Get() 60e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if dir_stat is not None: 611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return Future(callback=lambda: make_stat_info(dir_stat)) 62e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) def next(dir_stat): 64e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch assert dir_stat is not None # should have raised a FileNotFoundError 65e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # We only ever need to cache the dir stat. 661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._stat_cache.Set(dir_path, dir_stat) 67e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return make_stat_info(dir_stat) 685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 70e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch @memoize 71e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch def _MemoizedStatAsyncFromFileSystem(self, dir_path): 72e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch '''This is a simple wrapper to memoize Futures to directory stats, since 73e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch StatAsync makes heavy use of it. Only cache directories so that the 74e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch memoized cache doesn't blow up. 75e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch ''' 76e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch assert IsDirectory(dir_path) 77e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return self._file_system.StatAsync(dir_path) 78e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 79e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch def Read(self, paths, skip_not_found=False): 801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '''Reads a list of files. If a file is cached and it is not out of 81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) date, it is returned. Otherwise, the file is retrieved from the file system. 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ''' 831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Files which aren't found are cached in the read object store as 841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # (path, None, None). This is to prevent re-reads of files we know 851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # do not exist. 861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci cached_read_values = self._read_cache.GetMulti(paths).Get() 871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci cached_stat_values = self._stat_cache.GetMulti(paths).Get() 88e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 89e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Populate a map of paths to Futures to their stat. They may have already 90e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # been cached in which case their Future will already have been constructed 91e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # with a value. 92e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch stat_futures = {} 93e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) def handle(error): 955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) if isinstance(error, FileNotFoundError): 965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return None 975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) raise error 98e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for path in paths: 100e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch stat_value = cached_stat_values.get(path) 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if stat_value is None: 102e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch stat_future = self.StatAsync(path) 103e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if skip_not_found: 1045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) stat_future = stat_future.Then(lambda x: x, handle) 105e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch else: 106e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch stat_future = Future(value=stat_value) 107e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch stat_futures[path] = stat_future 108e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Filter only the cached data which is up to date by comparing to the latest 110e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # stat. The cached read data includes the cached version. Remove it for 1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # the result returned to callers. |version| == None implies a non-existent 1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # file, so skip it. 1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci up_to_date_data = dict( 114e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch (path, data) for path, (data, version) in cached_read_values.iteritems() 1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if version is not None and stat_futures[path].Get().version == version) 116e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if skip_not_found: 1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Filter out paths which we know do not exist, i.e. if |path| is in 1191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # |cached_read_values| *and* has a None version, then it doesn't exist. 1201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # See the above declaration of |cached_read_values| for more information. 1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci paths = [path for path in paths 1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if cached_read_values.get(path, (None, True))[1]] 1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if len(up_to_date_data) == len(paths): 125e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Everything was cached and up-to-date. 1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return Future(value=up_to_date_data) 127e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 1285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) def next(new_results): 129e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Update the cache. This is a path -> (data, version) mapping. 1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._read_cache.SetMulti( 131e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch dict((path, (new_result, stat_futures[path].Get().version)) 132e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch for path, new_result in new_results.iteritems())) 1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Update the read cache to include files that weren't found, to prevent 1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # constantly trying to read a file we now know doesn't exist. 1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._read_cache.SetMulti( 1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci dict((path, (None, None)) for path in paths 1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if stat_futures[path].Get() is None)) 1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci new_results.update(up_to_date_data) 139effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return new_results 1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) # Read in the values that were uncached or old. 1411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()), 1425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) skip_not_found=skip_not_found).Then(next) 143a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def GetCommitID(self): 1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return self._file_system.GetCommitID() 1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def GetPreviousCommitID(self): 1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return self._file_system.GetPreviousCommitID() 1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def Walk(self, root, depth=-1): 1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci '''Overrides FileSystem.Walk() to provide caching functionality. 1521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ''' 1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def file_lister(root): 1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci res, root_stat = All((self._walk_cache.Get(root), 1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self.StatAsync(root))).Get() 1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if res and res[2] == root_stat.version: 1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci dirs, files = res[0], res[1] 1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else: 1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Wasn't cached, or not up to date. 1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci dirs, files = [], [] 1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for f in self.ReadSingle(root).Get(): 1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if IsDirectory(f): 1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci dirs.append(f) 1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci else: 1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci files.append(f) 1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Update the cache. This is a root -> (dirs, files, version) mapping. 1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci self._walk_cache.Set(root, (dirs, files, root_stat.version)) 1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return dirs, files 1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return self._file_system.Walk(root, depth=depth, file_lister=file_lister) 1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def GetCommitID(self): 1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return self._file_system.GetCommitID() 1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def GetPreviousCommitID(self): 1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return self._file_system.GetPreviousCommitID() 1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 178a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) def GetIdentity(self): 179a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return self._file_system.GetIdentity() 1804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) def __repr__(self): 182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) 183