caching_file_system.py revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import sys
6
7from file_system import FileSystem, StatInfo, FileNotFoundError
8from future import Future
9from object_store_creator import ObjectStoreCreator
10
11
12class _AsyncUncachedFuture(object):
13  def __init__(self,
14               uncached_read_futures,
15               stats_for_uncached,
16               current_results,
17               file_system,
18               object_store):
19    self._uncached_read_futures = uncached_read_futures
20    self._stats_for_uncached = stats_for_uncached
21    self._current_results = current_results
22    self._file_system = file_system
23    self._object_store = object_store
24
25  def Get(self):
26    new_results = self._uncached_read_futures.Get()
27    # Update the cached data in the object store. This is a path -> (read,
28    # version) mapping.
29    self._object_store.SetMulti(dict(
30        (path, (new_result, self._stats_for_uncached[path].version))
31        for path, new_result in new_results.iteritems()))
32    new_results.update(self._current_results)
33    return new_results
34
35class CachingFileSystem(FileSystem):
36  '''FileSystem which implements a caching layer on top of |file_system|. It's
37  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
38  and only Stat()ing directories never files.
39  '''
40  def __init__(self, file_system, object_store_creator):
41    self._file_system = file_system
42    def create_object_store(category, **optargs):
43      return object_store_creator.Create(
44          CachingFileSystem,
45          category='%s/%s' % (file_system.GetIdentity(), category),
46          **optargs)
47    self._stat_object_store = create_object_store('stat')
48    # The read caches can start populated (start_empty=False) because file
49    # updates are picked up by the stat, so it doesn't need the force-refresh
50    # which starting empty is designed for. Without this optimisation, cron
51    # runs are extra slow.
52    self._read_object_store = create_object_store('read', start_empty=False)
53    self._read_binary_object_store = create_object_store('read-binary',
54                                                         start_empty=False)
55
56  def Refresh(self):
57    return self._file_system.Refresh()
58
59  def Stat(self, path):
60    '''Stats the directory given, or if a file is given, stats the file's parent
61    directory to get info about the file.
62    '''
63    # Always stat the parent directory, since it will have the stat of the child
64    # anyway, and this gives us an entire directory's stat info at once.
65    if path.endswith('/'):
66      dir_path = path
67    else:
68      dir_path, file_path = path.rsplit('/', 1)
69      dir_path += '/'
70
71    # ... and we only ever need to cache the dir stat, too.
72    dir_stat = self._stat_object_store.Get(dir_path).Get()
73    if dir_stat is None:
74      dir_stat = self._file_system.Stat(dir_path)
75      assert dir_stat is not None  # should raise a FileNotFoundError
76      self._stat_object_store.Set(dir_path, dir_stat)
77
78    if path == dir_path:
79      stat_info = dir_stat
80    else:
81      file_version = dir_stat.child_versions.get(file_path)
82      if file_version is None:
83        raise FileNotFoundError('No stat found for %s in %s' % (path, dir_path))
84      stat_info = StatInfo(file_version)
85
86    return stat_info
87
88  def Read(self, paths, binary=False):
89    '''Reads a list of files. If a file is in memcache and it is not out of
90    date, it is returned. Otherwise, the file is retrieved from the file system.
91    '''
92    read_object_store = (self._read_binary_object_store if binary else
93                         self._read_object_store)
94    read_values = read_object_store.GetMulti(paths).Get()
95    stat_values = self._stat_object_store.GetMulti(paths).Get()
96    results = {}  # maps path to read value
97    uncached = {}  # maps path to stat value
98    for path in paths:
99      stat_value = stat_values.get(path)
100      if stat_value is None:
101        # TODO(cduvall): do a concurrent Stat with the missing stat values.
102        try:
103          stat_value = self.Stat(path)
104        except:
105          return Future(exc_info=sys.exc_info())
106      read_value = read_values.get(path)
107      if read_value is None:
108        uncached[path] = stat_value
109        continue
110      read_data, read_version = read_value
111      if stat_value.version != read_version:
112        uncached[path] = stat_value
113        continue
114      results[path] = read_data
115
116    if not uncached:
117      return Future(value=results)
118
119    return Future(delegate=_AsyncUncachedFuture(
120        self._file_system.Read(uncached.keys(), binary=binary),
121        uncached,
122        results,
123        self,
124        read_object_store))
125
126  def GetIdentity(self):
127    return self._file_system.GetIdentity()
128
129  def __repr__(self):
130    return '<%s of %s>' % (type(self).__name__,
131                           type(self._file_system).__name__)
132