caching_file_system.py revision 1e9bf3e0803691d0a228da41fc608347b6db4340
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6import sys
7
8from file_system import FileSystem, StatInfo, FileNotFoundError
9from future import Future
10from object_store_creator import ObjectStoreCreator
11
12
13class _AsyncUncachedFuture(object):
14  def __init__(self,
15               uncached_read_futures,
16               stats_for_uncached,
17               current_results,
18               file_system,
19               object_store):
20    self._uncached_read_futures = uncached_read_futures
21    self._stats_for_uncached = stats_for_uncached
22    self._current_results = current_results
23    self._file_system = file_system
24    self._object_store = object_store
25
26  def Get(self):
27    new_results = self._uncached_read_futures.Get()
28    # Update the cached data in the object store. This is a path -> (read,
29    # version) mapping.
30    self._object_store.SetMulti(dict(
31        (path, (new_result, self._stats_for_uncached[path].version))
32        for path, new_result in new_results.iteritems()))
33    new_results.update(self._current_results)
34    return new_results
35
36class CachingFileSystem(FileSystem):
37  '''FileSystem which implements a caching layer on top of |file_system|. It's
38  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
39  and only Stat()ing directories never files.
40  '''
41  def __init__(self, file_system, object_store_creator):
42    self._file_system = file_system
43    def create_object_store(category, **optargs):
44      return object_store_creator.Create(
45          CachingFileSystem,
46          category='%s/%s' % (file_system.GetIdentity(), category),
47          **optargs)
48    self._stat_object_store = create_object_store('stat')
49    # The read caches can start populated (start_empty=False) because file
50    # updates are picked up by the stat, so it doesn't need the force-refresh
51    # which starting empty is designed for. Without this optimisation, cron
52    # runs are extra slow.
53    self._read_object_store = create_object_store('read', start_empty=False)
54    self._read_binary_object_store = create_object_store('read-binary',
55                                                         start_empty=False)
56
57  def Refresh(self):
58    return self._file_system.Refresh()
59
60  def Stat(self, path):
61    '''Stats the directory given, or if a file is given, stats the file's parent
62    directory to get info about the file.
63    '''
64    # Always stat the parent directory, since it will have the stat of the child
65    # anyway, and this gives us an entire directory's stat info at once.
66    dir_path, file_path = posixpath.split(path)
67    if dir_path and not dir_path.endswith('/'):
68      dir_path += '/'
69
70    # ... and we only ever need to cache the dir stat, too.
71    dir_stat = self._stat_object_store.Get(dir_path).Get()
72    if dir_stat is None:
73      dir_stat = self._file_system.Stat(dir_path)
74      assert dir_stat is not None  # should raise a FileNotFoundError
75      self._stat_object_store.Set(dir_path, dir_stat)
76
77    if path == dir_path:
78      stat_info = dir_stat
79    else:
80      file_version = dir_stat.child_versions.get(file_path)
81      if file_version is None:
82        raise FileNotFoundError('No stat found for %s in %s' % (path, dir_path))
83      stat_info = StatInfo(file_version)
84
85    return stat_info
86
87  def Read(self, paths, binary=False):
88    '''Reads a list of files. If a file is in memcache and it is not out of
89    date, it is returned. Otherwise, the file is retrieved from the file system.
90    '''
91    read_object_store = (self._read_binary_object_store if binary else
92                         self._read_object_store)
93    read_values = read_object_store.GetMulti(paths).Get()
94    stat_values = self._stat_object_store.GetMulti(paths).Get()
95    results = {}  # maps path to read value
96    uncached = {}  # maps path to stat value
97    for path in paths:
98      stat_value = stat_values.get(path)
99      if stat_value is None:
100        # TODO(cduvall): do a concurrent Stat with the missing stat values.
101        try:
102          stat_value = self.Stat(path)
103        except:
104          return Future(exc_info=sys.exc_info())
105      read_value = read_values.get(path)
106      if read_value is None:
107        uncached[path] = stat_value
108        continue
109      read_data, read_version = read_value
110      if stat_value.version != read_version:
111        uncached[path] = stat_value
112        continue
113      results[path] = read_data
114
115    if not uncached:
116      return Future(value=results)
117
118    return Future(delegate=_AsyncUncachedFuture(
119        self._file_system.Read(uncached.keys(), binary=binary),
120        uncached,
121        results,
122        self,
123        read_object_store))
124
125  def GetIdentity(self):
126    return self._file_system.GetIdentity()
127
128  def __repr__(self):
129    return '<%s of %s>' % (type(self).__name__,
130                           type(self._file_system).__name__)
131