1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# found in the LICENSE file.
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
51e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)import posixpath
64e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)import sys
74e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from file_system import FileSystem, StatInfo, FileNotFoundError
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom future import All, Future
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom path_util import AssertIsDirectory, IsDirectory, ToDirectory
11e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdochfrom third_party.json_schema_compiler.memoize import memoize
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CachingFileSystem(FileSystem):
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  '''FileSystem which implements a caching layer on top of |file_system|. It's
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  and only Stat()ing directories never files.
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  '''
19b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  def __init__(self, file_system, object_store_creator):
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    self._file_system = file_system
21b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def create_object_store(category, **optargs):
22b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)      return object_store_creator.Create(
23b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)          CachingFileSystem,
24b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)          category='%s/%s' % (file_system.GetIdentity(), category),
25b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)          **optargs)
261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    self._stat_cache = create_object_store('stat')
274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # The read caches can start populated (start_empty=False) because file
284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # updates are picked up by the stat, so it doesn't need the force-refresh
294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # which starting empty is designed for. Without this optimisation, cron
304e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # runs are extra slow.
311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    self._read_cache = create_object_store('read', start_empty=False)
321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    self._walk_cache = create_object_store('walk', start_empty=False)
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
34d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)  def Refresh(self):
35d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)    return self._file_system.Refresh()
36d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
37e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  def StatAsync(self, path):
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''Stats the directory given, or if a file is given, stats the file's parent
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    directory to get info about the file.
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Always stat the parent directory, since it will have the stat of the child
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # anyway, and this gives us an entire directory's stat info at once.
431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    dir_path, file_path = posixpath.split(path)
446e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    dir_path = ToDirectory(dir_path)
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
46e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    def make_stat_info(dir_stat):
47e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      '''Converts a dir stat into the correct resulting StatInfo; if the Stat
48e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      was for a file, the StatInfo should just contain that file.
49e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      '''
50e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      if path == dir_path:
51e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        return dir_stat
52e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # Was a file stat. Extract that file.
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      file_version = dir_stat.child_versions.get(file_path)
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if file_version is None:
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        raise FileNotFoundError('No stat found for %s in %s (found %s)' %
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                (path, dir_path, dir_stat.child_versions))
57e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      return StatInfo(file_version)
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    dir_stat = self._stat_cache.Get(dir_path).Get()
60e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    if dir_stat is not None:
611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return Future(callback=lambda: make_stat_info(dir_stat))
62e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    def next(dir_stat):
64e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      assert dir_stat is not None  # should have raised a FileNotFoundError
65e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # We only ever need to cache the dir stat.
661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      self._stat_cache.Set(dir_path, dir_stat)
67e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      return make_stat_info(dir_stat)
685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
70e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  @memoize
71e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  def _MemoizedStatAsyncFromFileSystem(self, dir_path):
72e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    '''This is a simple wrapper to memoize Futures to directory stats, since
73e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    StatAsync makes heavy use of it. Only cache directories so that the
74e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    memoized cache doesn't blow up.
75e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    '''
76e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    assert IsDirectory(dir_path)
77e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    return self._file_system.StatAsync(dir_path)
78e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
79e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  def Read(self, paths, skip_not_found=False):
801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    '''Reads a list of files. If a file is cached and it is not out of
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    date, it is returned. Otherwise, the file is retrieved from the file system.
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    '''
831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # Files which aren't found are cached in the read object store as
841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # (path, None, None). This is to prevent re-reads of files we know
851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # do not exist.
861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    cached_read_values = self._read_cache.GetMulti(paths).Get()
871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    cached_stat_values = self._stat_cache.GetMulti(paths).Get()
88e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
89e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    # Populate a map of paths to Futures to their stat. They may have already
90e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    # been cached in which case their Future will already have been constructed
91e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    # with a value.
92e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    stat_futures = {}
93e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    def handle(error):
955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if isinstance(error, FileNotFoundError):
965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        return None
975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      raise error
98e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    for path in paths:
100e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      stat_value = cached_stat_values.get(path)
101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if stat_value is None:
102e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        stat_future = self.StatAsync(path)
103e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        if skip_not_found:
1045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)          stat_future = stat_future.Then(lambda x: x, handle)
105e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      else:
106e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        stat_future = Future(value=stat_value)
107e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      stat_futures[path] = stat_future
108e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # Filter only the cached data which is up to date by comparing to the latest
110e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    # stat. The cached read data includes the cached version. Remove it for
1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # the result returned to callers. |version| == None implies a non-existent
1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # file, so skip it.
1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    up_to_date_data = dict(
114e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        (path, data) for path, (data, version) in cached_read_values.iteritems()
1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if version is not None and stat_futures[path].Get().version == version)
116e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if skip_not_found:
1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      # Filter out paths which we know do not exist, i.e. if |path| is in
1191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      # |cached_read_values| *and* has a None version, then it doesn't exist.
1201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      # See the above declaration of |cached_read_values| for more information.
1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      paths = [path for path in paths
1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               if cached_read_values.get(path, (None, True))[1]]
1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if len(up_to_date_data) == len(paths):
125e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # Everything was cached and up-to-date.
1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return Future(value=up_to_date_data)
127e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
1285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    def next(new_results):
129e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # Update the cache. This is a path -> (data, version) mapping.
1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      self._read_cache.SetMulti(
131e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch          dict((path, (new_result, stat_futures[path].Get().version))
132e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch               for path, new_result in new_results.iteritems()))
1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      # Update the read cache to include files that weren't found, to prevent
1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      # constantly trying to read a file we now know doesn't exist.
1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      self._read_cache.SetMulti(
1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          dict((path, (None, None)) for path in paths
1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               if stat_futures[path].Get() is None))
1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      new_results.update(up_to_date_data)
139effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      return new_results
1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    # Read in the values that were uncached or old.
1411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
1425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                  skip_not_found=skip_not_found).Then(next)
143a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)
1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def GetCommitID(self):
1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return self._file_system.GetCommitID()
1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def GetPreviousCommitID(self):
1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return self._file_system.GetPreviousCommitID()
1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def Walk(self, root, depth=-1):
1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    '''Overrides FileSystem.Walk() to provide caching functionality.
1521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    '''
1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    def file_lister(root):
1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      res, root_stat = All((self._walk_cache.Get(root),
1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                            self.StatAsync(root))).Get()
1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if res and res[2] == root_stat.version:
1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        dirs, files = res[0], res[1]
1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      else:
1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        # Wasn't cached, or not up to date.
1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        dirs, files = [], []
1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        for f in self.ReadSingle(root).Get():
1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          if IsDirectory(f):
1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            dirs.append(f)
1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          else:
1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            files.append(f)
1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        # Update the cache. This is a root -> (dirs, files, version) mapping.
1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        self._walk_cache.Set(root, (dirs, files, root_stat.version))
1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return dirs, files
1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return self._file_system.Walk(root, depth=depth, file_lister=file_lister)
1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def GetCommitID(self):
1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return self._file_system.GetCommitID()
1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def GetPreviousCommitID(self):
1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return self._file_system.GetPreviousCommitID()
1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
178a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)  def GetIdentity(self):
179a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)    return self._file_system.GetIdentity()
1804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  def __repr__(self):
182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return '%s of <%s>' % (type(self).__name__, repr(self._file_system))
183