caching_file_system.py revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6import sys
7
8from file_system import FileSystem, StatInfo, FileNotFoundError
9from future import Future
10
11
12class _AsyncUncachedFuture(object):
13  def __init__(self,
14               uncached_read_futures,
15               stats_for_uncached,
16               current_results,
17               file_system,
18               object_store):
19    self._uncached_read_futures = uncached_read_futures
20    self._stats_for_uncached = stats_for_uncached
21    self._current_results = current_results
22    self._file_system = file_system
23    self._object_store = object_store
24
25  def Get(self):
26    new_results = self._uncached_read_futures.Get()
27    # Update the cached data in the object store. This is a path -> (read,
28    # version) mapping.
29    self._object_store.SetMulti(dict(
30        (path, (new_result, self._stats_for_uncached[path].version))
31        for path, new_result in new_results.iteritems()))
32    new_results.update(self._current_results)
33    return new_results
34
35class CachingFileSystem(FileSystem):
36  '''FileSystem which implements a caching layer on top of |file_system|. It's
37  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
38  and only Stat()ing directories never files.
39  '''
40  def __init__(self, file_system, object_store_creator):
41    self._file_system = file_system
42    def create_object_store(category, **optargs):
43      return object_store_creator.Create(
44          CachingFileSystem,
45          category='%s/%s' % (file_system.GetIdentity(), category),
46          **optargs)
47    self._stat_object_store = create_object_store('stat')
48    # The read caches can start populated (start_empty=False) because file
49    # updates are picked up by the stat, so it doesn't need the force-refresh
50    # which starting empty is designed for. Without this optimisation, cron
51    # runs are extra slow.
52    self._read_object_store = create_object_store('read', start_empty=False)
53
54  def Refresh(self):
55    return self._file_system.Refresh()
56
57  def Stat(self, path):
58    '''Stats the directory given, or if a file is given, stats the file's parent
59    directory to get info about the file.
60    '''
61    # Always stat the parent directory, since it will have the stat of the child
62    # anyway, and this gives us an entire directory's stat info at once.
63    dir_path, file_path = posixpath.split(path)
64    if dir_path and not dir_path.endswith('/'):
65      dir_path += '/'
66
67    # ... and we only ever need to cache the dir stat, too.
68    dir_stat = self._stat_object_store.Get(dir_path).Get()
69    if dir_stat is None:
70      dir_stat = self._file_system.Stat(dir_path)
71      assert dir_stat is not None  # should raise a FileNotFoundError
72      self._stat_object_store.Set(dir_path, dir_stat)
73
74    if path == dir_path:
75      stat_info = dir_stat
76    else:
77      file_version = dir_stat.child_versions.get(file_path)
78      if file_version is None:
79        raise FileNotFoundError('No stat found for %s in %s (found %s)' %
80                                (path, dir_path, dir_stat.child_versions))
81      stat_info = StatInfo(file_version)
82
83    return stat_info
84
85  def Read(self, paths):
86    '''Reads a list of files. If a file is in memcache and it is not out of
87    date, it is returned. Otherwise, the file is retrieved from the file system.
88    '''
89    read_values = self._read_object_store.GetMulti(paths).Get()
90    stat_values = self._stat_object_store.GetMulti(paths).Get()
91    results = {}  # maps path to read value
92    uncached = {}  # maps path to stat value
93    for path in paths:
94      stat_value = stat_values.get(path)
95      if stat_value is None:
96        # TODO(cduvall): do a concurrent Stat with the missing stat values.
97        try:
98          stat_value = self.Stat(path)
99        except:
100          return Future(exc_info=sys.exc_info())
101      read_value = read_values.get(path)
102      if read_value is None:
103        uncached[path] = stat_value
104        continue
105      read_data, read_version = read_value
106      if stat_value.version != read_version:
107        uncached[path] = stat_value
108        continue
109      results[path] = read_data
110
111    if not uncached:
112      return Future(value=results)
113
114    return Future(delegate=_AsyncUncachedFuture(
115        self._file_system.Read(uncached.keys()),
116        uncached,
117        results,
118        self,
119        self._read_object_store))
120
121  def GetIdentity(self):
122    return self._file_system.GetIdentity()
123
124  def __repr__(self):
125    return '%s of <%s>' % (type(self).__name__, repr(self._file_system))
126