caching_file_system.py revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from file_system import FileSystem, StatInfo, FileNotFoundError
6from future import Future
7
8class _AsyncUncachedFuture(object):
9  def __init__(self,
10               uncached_read_futures,
11               stats_for_uncached,
12               current_results,
13               file_system,
14               object_store):
15    self._uncached_read_futures = uncached_read_futures
16    self._stats_for_uncached = stats_for_uncached
17    self._current_results = current_results
18    self._file_system = file_system
19    self._object_store = object_store
20
21  def Get(self):
22    new_results = self._uncached_read_futures.Get()
23    # Update the cached data in the object store. This is a path -> (read,
24    # version) mapping.
25    self._object_store.SetMulti(dict(
26        (path, (new_result, self._stats_for_uncached[path].version))
27        for path, new_result in new_results.iteritems()))
28    new_results.update(self._current_results)
29    return new_results
30
31class CachingFileSystem(FileSystem):
32  '''FileSystem which implements a caching layer on top of |file_system|. It's
33  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
34  and only Stat()ing directories never files.
35
36  Specify |use_existing_values| to continue using whatever has been cached in
37  the object stores. By default, the data in the stores is assumed to be stale
38  (althought consistent). Using existing values is useful for live instances
39  that don't want to touch the file system; not using them is good for the
40  cron jobs, where we want to refresh the data.
41  '''
42  def __init__(self,
43               file_system,
44               object_store_creator_factory,
45               use_existing_values=False):
46    self._file_system = file_system
47    def create_object_store(category):
48      return (object_store_creator_factory.Create(CachingFileSystem)
49          .Create(category='%s/%s' % (file_system.GetName(), category),
50                  # By Stat()ing from scratch we'll end up not using the
51                  # existing values, but also not doing unnecessary Read()s if
52                  # the files haven't changed from last time.
53                  start_empty=(not use_existing_values and category == 'stat')))
54    self._stat_object_store = create_object_store('stat')
55    self._read_object_store = create_object_store('read')
56    self._read_binary_object_store = create_object_store('read-binary')
57
58  def Stat(self, path):
59    '''Stats the directory given, or if a file is given, stats the file's parent
60    directory to get info about the file.
61    '''
62    # Always stat the parent directory, since it will have the stat of the child
63    # anyway, and this gives us an entire directory's stat info at once.
64    if path.endswith('/'):
65      dir_path = path
66    else:
67      dir_path, file_path = path.rsplit('/', 1)
68      dir_path += '/'
69
70    # ... and we only ever need to cache the dir stat, too.
71    dir_stat = self._stat_object_store.Get(dir_path).Get()
72    if dir_stat is None:
73      dir_stat = self._file_system.Stat(dir_path)
74      assert dir_stat is not None  # should raise a FileNotFoundError
75      self._stat_object_store.Set(dir_path, dir_stat)
76
77    if path == dir_path:
78      stat_info = dir_stat
79    else:
80      file_version = dir_stat.child_versions.get(file_path)
81      if file_version is None:
82        raise FileNotFoundError('No stat found for %s in %s' % (path, dir_path))
83      stat_info = StatInfo(file_version)
84
85    return stat_info
86
87  def Read(self, paths, binary=False):
88    '''Reads a list of files. If a file is in memcache and it is not out of
89    date, it is returned. Otherwise, the file is retrieved from the file system.
90    '''
91    read_object_store = (self._read_binary_object_store if binary else
92                         self._read_object_store)
93    read_values = read_object_store.GetMulti(paths).Get()
94    stat_values = self._stat_object_store.GetMulti(paths).Get()
95    results = {}  # maps path to read value
96    uncached = {}  # maps path to stat value
97    for path in paths:
98      stat_value = stat_values.get(path)
99      if stat_value is None:
100        # TODO(cduvall): do a concurrent Stat with the missing stat values.
101        stat_value = self.Stat(path)
102      read_value = read_values.get(path)
103      if read_value is None:
104        uncached[path] = stat_value
105        continue
106      read_data, read_version = read_value
107      if stat_value.version != read_version:
108        uncached[path] = stat_value
109        continue
110      results[path] = read_data
111
112    if not uncached:
113      return Future(value=results)
114
115    return Future(delegate=_AsyncUncachedFuture(
116        self._file_system.Read(uncached.keys(), binary=binary),
117        uncached,
118        results,
119        self,
120        read_object_store))
121