caching_file_system.py revision a93a17c8d99d686bd4a1511e5504e5e6cc9fcadf
1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# found in the LICENSE file.
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from file_system import FileSystem, StatInfo, FileNotFoundError
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from future import Future
7b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from object_store_creator import ObjectStoreCreator
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class _AsyncUncachedFuture(object):
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  def __init__(self,
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               uncached_read_futures,
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               stats_for_uncached,
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               current_results,
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               file_system,
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               object_store):
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._uncached_read_futures = uncached_read_futures
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._stats_for_uncached = stats_for_uncached
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._current_results = current_results
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._file_system = file_system
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._object_store = object_store
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  def Get(self):
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    new_results = self._uncached_read_futures.Get()
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Update the cached data in the object store. This is a path -> (read,
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # version) mapping.
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._object_store.SetMulti(dict(
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        (path, (new_result, self._stats_for_uncached[path].version))
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        for path, new_result in new_results.iteritems()))
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    new_results.update(self._current_results)
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return new_results
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CachingFileSystem(FileSystem):
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  '''FileSystem which implements a caching layer on top of |file_system|. It's
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  and only Stat()ing directories never files.
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  '''
37b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  def __init__(self, file_system, object_store_creator):
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._file_system = file_system
39b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def create_object_store(category, **optargs):
40b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)      return object_store_creator.Create(
41b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)          CachingFileSystem,
42b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)          category='%s/%s' % (file_system.GetIdentity(), category),
43b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)          **optargs)
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._stat_object_store = create_object_store('stat')
45a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    # The read caches can both (a) start populated and (b) be shared with all
46a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    # other app versions, because the data changing is detected by the stat.
47a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    # Without this optimisation, bumping app version is extremely slow.
48b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    self._read_object_store = create_object_store(
49a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)        'read', start_empty=False, app_version=None)
50b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    self._read_binary_object_store = create_object_store(
51a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)        'read-binary', start_empty=False, app_version=None)
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  def Stat(self, path):
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''Stats the directory given, or if a file is given, stats the file's parent
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    directory to get info about the file.
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Always stat the parent directory, since it will have the stat of the child
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # anyway, and this gives us an entire directory's stat info at once.
59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if path.endswith('/'):
60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      dir_path = path
61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    else:
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      dir_path, file_path = path.rsplit('/', 1)
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      dir_path += '/'
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # ... and we only ever need to cache the dir stat, too.
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    dir_stat = self._stat_object_store.Get(dir_path).Get()
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if dir_stat is None:
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      dir_stat = self._file_system.Stat(dir_path)
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      assert dir_stat is not None  # should raise a FileNotFoundError
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      self._stat_object_store.Set(dir_path, dir_stat)
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if path == dir_path:
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      stat_info = dir_stat
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    else:
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      file_version = dir_stat.child_versions.get(file_path)
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if file_version is None:
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        raise FileNotFoundError('No stat found for %s in %s' % (path, dir_path))
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      stat_info = StatInfo(file_version)
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return stat_info
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  def Read(self, paths, binary=False):
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''Reads a list of files. If a file is in memcache and it is not out of
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    date, it is returned. Otherwise, the file is retrieved from the file system.
85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''
86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    read_object_store = (self._read_binary_object_store if binary else
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                         self._read_object_store)
88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    read_values = read_object_store.GetMulti(paths).Get()
89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    stat_values = self._stat_object_store.GetMulti(paths).Get()
90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    results = {}  # maps path to read value
91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    uncached = {}  # maps path to stat value
92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    for path in paths:
93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      stat_value = stat_values.get(path)
94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if stat_value is None:
95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        # TODO(cduvall): do a concurrent Stat with the missing stat values.
96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        stat_value = self.Stat(path)
97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      read_value = read_values.get(path)
98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if read_value is None:
99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        uncached[path] = stat_value
100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        continue
101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      read_data, read_version = read_value
102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if stat_value.version != read_version:
103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        uncached[path] = stat_value
104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        continue
105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      results[path] = read_data
106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if not uncached:
108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return Future(value=results)
109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return Future(delegate=_AsyncUncachedFuture(
111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        self._file_system.Read(uncached.keys(), binary=binary),
112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        uncached,
113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        results,
114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        self,
115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        read_object_store))
116a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
117a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)  def GetIdentity(self):
118a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    return self._file_system.GetIdentity()
119