caching_file_system.py revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from file_system import FileSystem, StatInfo, FileNotFoundError 6from future import Future 7 8class _AsyncUncachedFuture(object): 9 def __init__(self, 10 uncached_read_futures, 11 stats_for_uncached, 12 current_results, 13 file_system, 14 object_store): 15 self._uncached_read_futures = uncached_read_futures 16 self._stats_for_uncached = stats_for_uncached 17 self._current_results = current_results 18 self._file_system = file_system 19 self._object_store = object_store 20 21 def Get(self): 22 new_results = self._uncached_read_futures.Get() 23 # Update the cached data in the object store. This is a path -> (read, 24 # version) mapping. 25 self._object_store.SetMulti(dict( 26 (path, (new_result, self._stats_for_uncached[path].version)) 27 for path, new_result in new_results.iteritems())) 28 new_results.update(self._current_results) 29 return new_results 30 31class CachingFileSystem(FileSystem): 32 '''FileSystem which implements a caching layer on top of |file_system|. It's 33 smart, using Stat() to decided whether to skip Read()ing from |file_system|, 34 and only Stat()ing directories never files. 35 36 Specify |use_existing_values| to continue using whatever has been cached in 37 the object stores. By default, the data in the stores is assumed to be stale 38 (althought consistent). Using existing values is useful for live instances 39 that don't want to touch the file system; not using them is good for the 40 cron jobs, where we want to refresh the data. 41 ''' 42 def __init__(self, 43 file_system, 44 object_store_creator_factory, 45 use_existing_values=False): 46 self._file_system = file_system 47 def create_object_store(category): 48 return (object_store_creator_factory.Create(CachingFileSystem) 49 .Create(category='%s/%s' % (file_system.GetName(), category), 50 # By Stat()ing from scratch we'll end up not using the 51 # existing values, but also not doing unnecessary Read()s if 52 # the files haven't changed from last time. 53 start_empty=(not use_existing_values and category == 'stat'))) 54 self._stat_object_store = create_object_store('stat') 55 self._read_object_store = create_object_store('read') 56 self._read_binary_object_store = create_object_store('read-binary') 57 58 def Stat(self, path): 59 '''Stats the directory given, or if a file is given, stats the file's parent 60 directory to get info about the file. 61 ''' 62 # Always stat the parent directory, since it will have the stat of the child 63 # anyway, and this gives us an entire directory's stat info at once. 64 if path.endswith('/'): 65 dir_path = path 66 else: 67 dir_path, file_path = path.rsplit('/', 1) 68 dir_path += '/' 69 70 # ... and we only ever need to cache the dir stat, too. 71 dir_stat = self._stat_object_store.Get(dir_path).Get() 72 if dir_stat is None: 73 dir_stat = self._file_system.Stat(dir_path) 74 assert dir_stat is not None # should raise a FileNotFoundError 75 self._stat_object_store.Set(dir_path, dir_stat) 76 77 if path == dir_path: 78 stat_info = dir_stat 79 else: 80 file_version = dir_stat.child_versions.get(file_path) 81 if file_version is None: 82 raise FileNotFoundError('No stat found for %s in %s' % (path, dir_path)) 83 stat_info = StatInfo(file_version) 84 85 return stat_info 86 87 def Read(self, paths, binary=False): 88 '''Reads a list of files. If a file is in memcache and it is not out of 89 date, it is returned. Otherwise, the file is retrieved from the file system. 90 ''' 91 read_object_store = (self._read_binary_object_store if binary else 92 self._read_object_store) 93 read_values = read_object_store.GetMulti(paths).Get() 94 stat_values = self._stat_object_store.GetMulti(paths).Get() 95 results = {} # maps path to read value 96 uncached = {} # maps path to stat value 97 for path in paths: 98 stat_value = stat_values.get(path) 99 if stat_value is None: 100 # TODO(cduvall): do a concurrent Stat with the missing stat values. 101 stat_value = self.Stat(path) 102 read_value = read_values.get(path) 103 if read_value is None: 104 uncached[path] = stat_value 105 continue 106 read_data, read_version = read_value 107 if stat_value.version != read_version: 108 uncached[path] = stat_value 109 continue 110 results[path] = read_data 111 112 if not uncached: 113 return Future(value=results) 114 115 return Future(delegate=_AsyncUncachedFuture( 116 self._file_system.Read(uncached.keys(), binary=binary), 117 uncached, 118 results, 119 self, 120 read_object_store)) 121