caching_file_system.py revision f2477e01787aa58f445919b809d89e252beef54f
1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import posixpath 6import sys 7 8from file_system import FileSystem, StatInfo, FileNotFoundError 9from future import Future 10from object_store_creator import ObjectStoreCreator 11 12 13class _AsyncUncachedFuture(object): 14 def __init__(self, 15 uncached_read_futures, 16 stats_for_uncached, 17 current_results, 18 file_system, 19 object_store): 20 self._uncached_read_futures = uncached_read_futures 21 self._stats_for_uncached = stats_for_uncached 22 self._current_results = current_results 23 self._file_system = file_system 24 self._object_store = object_store 25 26 def Get(self): 27 new_results = self._uncached_read_futures.Get() 28 # Update the cached data in the object store. This is a path -> (read, 29 # version) mapping. 30 self._object_store.SetMulti(dict( 31 (path, (new_result, self._stats_for_uncached[path].version)) 32 for path, new_result in new_results.iteritems())) 33 new_results.update(self._current_results) 34 return new_results 35 36class CachingFileSystem(FileSystem): 37 '''FileSystem which implements a caching layer on top of |file_system|. It's 38 smart, using Stat() to decided whether to skip Read()ing from |file_system|, 39 and only Stat()ing directories never files. 40 ''' 41 def __init__(self, file_system, object_store_creator): 42 self._file_system = file_system 43 def create_object_store(category, **optargs): 44 return object_store_creator.Create( 45 CachingFileSystem, 46 category='%s/%s' % (file_system.GetIdentity(), category), 47 **optargs) 48 self._stat_object_store = create_object_store('stat') 49 # The read caches can start populated (start_empty=False) because file 50 # updates are picked up by the stat, so it doesn't need the force-refresh 51 # which starting empty is designed for. Without this optimisation, cron 52 # runs are extra slow. 53 self._read_object_store = create_object_store('read', start_empty=False) 54 self._read_binary_object_store = create_object_store('read-binary', 55 start_empty=False) 56 57 def Refresh(self): 58 return self._file_system.Refresh() 59 60 def Stat(self, path): 61 '''Stats the directory given, or if a file is given, stats the file's parent 62 directory to get info about the file. 63 ''' 64 # Always stat the parent directory, since it will have the stat of the child 65 # anyway, and this gives us an entire directory's stat info at once. 66 dir_path, file_path = posixpath.split(path) 67 if dir_path and not dir_path.endswith('/'): 68 dir_path += '/' 69 70 # ... and we only ever need to cache the dir stat, too. 71 dir_stat = self._stat_object_store.Get(dir_path).Get() 72 if dir_stat is None: 73 dir_stat = self._file_system.Stat(dir_path) 74 assert dir_stat is not None # should raise a FileNotFoundError 75 self._stat_object_store.Set(dir_path, dir_stat) 76 77 if path == dir_path: 78 stat_info = dir_stat 79 else: 80 file_version = dir_stat.child_versions.get(file_path) 81 if file_version is None: 82 raise FileNotFoundError('No stat found for %s in %s (found %s)' % 83 (path, dir_path, dir_stat.child_versions)) 84 stat_info = StatInfo(file_version) 85 86 return stat_info 87 88 def Read(self, paths, binary=False): 89 '''Reads a list of files. If a file is in memcache and it is not out of 90 date, it is returned. Otherwise, the file is retrieved from the file system. 91 ''' 92 read_object_store = (self._read_binary_object_store if binary else 93 self._read_object_store) 94 read_values = read_object_store.GetMulti(paths).Get() 95 stat_values = self._stat_object_store.GetMulti(paths).Get() 96 results = {} # maps path to read value 97 uncached = {} # maps path to stat value 98 for path in paths: 99 stat_value = stat_values.get(path) 100 if stat_value is None: 101 # TODO(cduvall): do a concurrent Stat with the missing stat values. 102 try: 103 stat_value = self.Stat(path) 104 except: 105 return Future(exc_info=sys.exc_info()) 106 read_value = read_values.get(path) 107 if read_value is None: 108 uncached[path] = stat_value 109 continue 110 read_data, read_version = read_value 111 if stat_value.version != read_version: 112 uncached[path] = stat_value 113 continue 114 results[path] = read_data 115 116 if not uncached: 117 return Future(value=results) 118 119 return Future(delegate=_AsyncUncachedFuture( 120 self._file_system.Read(uncached.keys(), binary=binary), 121 uncached, 122 results, 123 self, 124 read_object_store)) 125 126 def GetIdentity(self): 127 return self._file_system.GetIdentity() 128 129 def __repr__(self): 130 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) 131