caching_file_system.py revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import sys 6 7from file_system import FileSystem, StatInfo, FileNotFoundError 8from future import Future 9from object_store_creator import ObjectStoreCreator 10 11 12class _AsyncUncachedFuture(object): 13 def __init__(self, 14 uncached_read_futures, 15 stats_for_uncached, 16 current_results, 17 file_system, 18 object_store): 19 self._uncached_read_futures = uncached_read_futures 20 self._stats_for_uncached = stats_for_uncached 21 self._current_results = current_results 22 self._file_system = file_system 23 self._object_store = object_store 24 25 def Get(self): 26 new_results = self._uncached_read_futures.Get() 27 # Update the cached data in the object store. This is a path -> (read, 28 # version) mapping. 29 self._object_store.SetMulti(dict( 30 (path, (new_result, self._stats_for_uncached[path].version)) 31 for path, new_result in new_results.iteritems())) 32 new_results.update(self._current_results) 33 return new_results 34 35class CachingFileSystem(FileSystem): 36 '''FileSystem which implements a caching layer on top of |file_system|. It's 37 smart, using Stat() to decided whether to skip Read()ing from |file_system|, 38 and only Stat()ing directories never files. 39 ''' 40 def __init__(self, file_system, object_store_creator): 41 self._file_system = file_system 42 def create_object_store(category, **optargs): 43 return object_store_creator.Create( 44 CachingFileSystem, 45 category='%s/%s' % (file_system.GetIdentity(), category), 46 **optargs) 47 self._stat_object_store = create_object_store('stat') 48 # The read caches can start populated (start_empty=False) because file 49 # updates are picked up by the stat, so it doesn't need the force-refresh 50 # which starting empty is designed for. Without this optimisation, cron 51 # runs are extra slow. 52 self._read_object_store = create_object_store('read', start_empty=False) 53 self._read_binary_object_store = create_object_store('read-binary', 54 start_empty=False) 55 56 def Refresh(self): 57 return self._file_system.Refresh() 58 59 def Stat(self, path): 60 '''Stats the directory given, or if a file is given, stats the file's parent 61 directory to get info about the file. 62 ''' 63 # Always stat the parent directory, since it will have the stat of the child 64 # anyway, and this gives us an entire directory's stat info at once. 65 if path.endswith('/'): 66 dir_path = path 67 else: 68 dir_path, file_path = path.rsplit('/', 1) 69 dir_path += '/' 70 71 # ... and we only ever need to cache the dir stat, too. 72 dir_stat = self._stat_object_store.Get(dir_path).Get() 73 if dir_stat is None: 74 dir_stat = self._file_system.Stat(dir_path) 75 assert dir_stat is not None # should raise a FileNotFoundError 76 self._stat_object_store.Set(dir_path, dir_stat) 77 78 if path == dir_path: 79 stat_info = dir_stat 80 else: 81 file_version = dir_stat.child_versions.get(file_path) 82 if file_version is None: 83 raise FileNotFoundError('No stat found for %s in %s' % (path, dir_path)) 84 stat_info = StatInfo(file_version) 85 86 return stat_info 87 88 def Read(self, paths, binary=False): 89 '''Reads a list of files. If a file is in memcache and it is not out of 90 date, it is returned. Otherwise, the file is retrieved from the file system. 91 ''' 92 read_object_store = (self._read_binary_object_store if binary else 93 self._read_object_store) 94 read_values = read_object_store.GetMulti(paths).Get() 95 stat_values = self._stat_object_store.GetMulti(paths).Get() 96 results = {} # maps path to read value 97 uncached = {} # maps path to stat value 98 for path in paths: 99 stat_value = stat_values.get(path) 100 if stat_value is None: 101 # TODO(cduvall): do a concurrent Stat with the missing stat values. 102 try: 103 stat_value = self.Stat(path) 104 except: 105 return Future(exc_info=sys.exc_info()) 106 read_value = read_values.get(path) 107 if read_value is None: 108 uncached[path] = stat_value 109 continue 110 read_data, read_version = read_value 111 if stat_value.version != read_version: 112 uncached[path] = stat_value 113 continue 114 results[path] = read_data 115 116 if not uncached: 117 return Future(value=results) 118 119 return Future(delegate=_AsyncUncachedFuture( 120 self._file_system.Read(uncached.keys(), binary=binary), 121 uncached, 122 results, 123 self, 124 read_object_store)) 125 126 def GetIdentity(self): 127 return self._file_system.GetIdentity() 128 129 def __repr__(self): 130 return '<%s of %s>' % (type(self).__name__, 131 type(self._file_system).__name__) 132