simple_index_file.cc revision 7d4cd473f85ac64c3747c96c277f9e506a0d2246
1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/disk_cache/simple/simple_index_file.h"
6
7#include <vector>
8
9#include "base/file_util.h"
10#include "base/files/file_enumerator.h"
11#include "base/hash.h"
12#include "base/logging.h"
13#include "base/metrics/histogram.h"
14#include "base/pickle.h"
15#include "base/single_thread_task_runner.h"
16#include "base/threading/thread_restrictions.h"
17#include "net/disk_cache/simple/simple_entry_format.h"
18#include "net/disk_cache/simple/simple_index.h"
19#include "net/disk_cache/simple/simple_synchronous_entry.h"
20#include "net/disk_cache/simple/simple_util.h"
21#include "third_party/zlib/zlib.h"
22
23
24namespace {
25
26const uint64 kMaxEntiresInIndex = 100000000;
27
28uint32 CalculatePickleCRC(const Pickle& pickle) {
29  return crc32(crc32(0, Z_NULL, 0),
30               reinterpret_cast<const Bytef*>(pickle.payload()),
31               pickle.payload_size());
32}
33
34void DoomEntrySetReply(scoped_ptr<int> result,
35                       const base::Callback<void(int)>& reply_callback) {
36  reply_callback.Run(*result.get());
37}
38
39void WriteToDiskInternal(const base::FilePath& index_filename,
40                         scoped_ptr<Pickle> pickle,
41                         const base::TimeTicks& start_time,
42                         bool app_on_background) {
43  const base::FilePath temp_filename =
44      index_filename.DirName().AppendASCII("index_temp");
45  int bytes_written = file_util::WriteFile(
46      temp_filename,
47      reinterpret_cast<const char*>(pickle->data()),
48      pickle->size());
49  DCHECK_EQ(bytes_written, implicit_cast<int>(pickle->size()));
50  if (bytes_written != static_cast<int>(pickle->size())) {
51    // TODO(felipeg): Add better error handling.
52    LOG(ERROR) << "Could not write Simple Cache index to temporary file: "
53               << temp_filename.value();
54    file_util::Delete(temp_filename, /* recursive = */ false);
55  } else {
56    // Swap temp and index_file.
57    bool result = file_util::ReplaceFile(temp_filename, index_filename);
58    DCHECK(result);
59  }
60  if (app_on_background) {
61    UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Background",
62                        (base::TimeTicks::Now() - start_time));
63  } else {
64    UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Foreground",
65                        (base::TimeTicks::Now() - start_time));
66  }
67}
68
69}  // namespace
70
71namespace disk_cache {
72
73SimpleIndexFile::IndexMetadata::IndexMetadata() :
74    magic_number_(kSimpleIndexMagicNumber),
75    version_(kSimpleVersion),
76    number_of_entries_(0),
77    cache_size_(0) {}
78
79SimpleIndexFile::IndexMetadata::IndexMetadata(
80    uint64 number_of_entries, uint64 cache_size) :
81    magic_number_(kSimpleIndexMagicNumber),
82    version_(kSimpleVersion),
83    number_of_entries_(number_of_entries),
84    cache_size_(cache_size) {}
85
86void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
87  DCHECK(pickle);
88  pickle->WriteUInt64(magic_number_);
89  pickle->WriteUInt32(version_);
90  pickle->WriteUInt64(number_of_entries_);
91  pickle->WriteUInt64(cache_size_);
92}
93
94bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
95  DCHECK(it);
96  return it->ReadUInt64(&magic_number_) &&
97      it->ReadUInt32(&version_) &&
98      it->ReadUInt64(&number_of_entries_)&&
99      it->ReadUInt64(&cache_size_);
100}
101
102bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
103  return number_of_entries_ <= kMaxEntiresInIndex &&
104      magic_number_ == disk_cache::kSimpleIndexMagicNumber &&
105      version_ == disk_cache::kSimpleVersion;
106}
107
108SimpleIndexFile::SimpleIndexFile(
109    base::SingleThreadTaskRunner* cache_thread,
110    base::TaskRunner* worker_pool,
111    const base::FilePath& index_file_directory)
112    : cache_thread_(cache_thread),
113      worker_pool_(worker_pool),
114      index_file_path_(index_file_directory.AppendASCII("the-real-index")) {}
115
116SimpleIndexFile::~SimpleIndexFile() {}
117
118void SimpleIndexFile::LoadIndexEntries(
119    scoped_refptr<base::SingleThreadTaskRunner> response_thread,
120    const IndexCompletionCallback& completion_callback) {
121  worker_pool_->PostTask(
122      FROM_HERE,
123      base::Bind(&SimpleIndexFile::LoadIndexEntriesInternal,
124                 index_file_path_, response_thread, completion_callback));
125}
126
127void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
128                                  uint64 cache_size,
129                                  const base::TimeTicks& start,
130                                  bool app_on_background) {
131  IndexMetadata index_metadata(entry_set.size(), cache_size);
132  scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
133  cache_thread_->PostTask(FROM_HERE, base::Bind(
134      &WriteToDiskInternal,
135      index_file_path_,
136      base::Passed(&pickle),
137      base::TimeTicks::Now(),
138      app_on_background));
139}
140
141void SimpleIndexFile::DoomEntrySet(
142    scoped_ptr<std::vector<uint64> > entry_hashes,
143    const base::Callback<void(int)>& reply_callback) {
144  scoped_ptr<int> result(new int());
145  int* result_p(result.get());
146
147  worker_pool_->PostTaskAndReply(
148      FROM_HERE,
149      base::Bind(&SimpleSynchronousEntry::DoomEntrySet,
150                 base::Passed(entry_hashes.Pass()), index_file_path_.DirName(),
151                 result_p),
152      base::Bind(&DoomEntrySetReply, base::Passed(result.Pass()),
153                 reply_callback));
154}
155
156// static
157bool SimpleIndexFile::IsIndexFileStale(const base::FilePath& index_filename) {
158  base::Time index_mtime;
159  base::Time dir_mtime;
160  if (!simple_util::GetMTime(index_filename.DirName(), &dir_mtime))
161    return true;
162  if (!simple_util::GetMTime(index_filename, &index_mtime))
163    return true;
164  // Index file last_modified must be equal to the directory last_modified since
165  // the last operation we do is ReplaceFile in the
166  // SimpleIndexFile::WriteToDisk().
167  // If not true, we need to restore the index.
168  return index_mtime < dir_mtime;
169}
170
171// static
172scoped_ptr<SimpleIndex::EntrySet> SimpleIndexFile::LoadFromDisk(
173    const base::FilePath& index_filename) {
174  std::string contents;
175  if (!file_util::ReadFileToString(index_filename, &contents)) {
176    LOG(WARNING) << "Could not read Simple Index file.";
177    return scoped_ptr<SimpleIndex::EntrySet>();
178  }
179
180  return SimpleIndexFile::Deserialize(contents.data(), contents.size());
181}
182
183// static
184scoped_ptr<SimpleIndex::EntrySet> SimpleIndexFile::Deserialize(const char* data,
185                                                               int data_len) {
186  DCHECK(data);
187  Pickle pickle(data, data_len);
188  if (!pickle.data()) {
189    LOG(WARNING) << "Corrupt Simple Index File.";
190    return scoped_ptr<SimpleIndex::EntrySet>();
191  }
192
193  PickleIterator pickle_it(pickle);
194
195  SimpleIndexFile::PickleHeader* header_p =
196      pickle.headerT<SimpleIndexFile::PickleHeader>();
197  const uint32 crc_read = header_p->crc;
198  const uint32 crc_calculated = CalculatePickleCRC(pickle);
199
200  if (crc_read != crc_calculated) {
201    LOG(WARNING) << "Invalid CRC in Simple Index file.";
202    return scoped_ptr<SimpleIndex::EntrySet>();
203  }
204
205  SimpleIndexFile::IndexMetadata index_metadata;
206  if (!index_metadata.Deserialize(&pickle_it)) {
207    LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
208    return scoped_ptr<SimpleIndex::EntrySet>();
209  }
210
211  if (!index_metadata.CheckIndexMetadata()) {
212    LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
213    return scoped_ptr<SimpleIndex::EntrySet>();
214  }
215
216  scoped_ptr<SimpleIndex::EntrySet> index_file_entries(
217      new SimpleIndex::EntrySet());
218  while (index_file_entries->size() < index_metadata.GetNumberOfEntries()) {
219    uint64 hash_key;
220    EntryMetadata entry_metadata;
221    if (!pickle_it.ReadUInt64(&hash_key) ||
222        !entry_metadata.Deserialize(&pickle_it)) {
223      LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
224      return scoped_ptr<SimpleIndex::EntrySet>();
225    }
226    SimpleIndex::InsertInEntrySet(
227        hash_key, entry_metadata, index_file_entries.get());
228  }
229
230  return index_file_entries.Pass();
231}
232
233// static
234scoped_ptr<Pickle> SimpleIndexFile::Serialize(
235    const SimpleIndexFile::IndexMetadata& index_metadata,
236    const SimpleIndex::EntrySet& entries) {
237  scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
238
239  index_metadata.Serialize(pickle.get());
240  for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
241       it != entries.end(); ++it) {
242    pickle->WriteUInt64(it->first);
243    it->second.Serialize(pickle.get());
244  }
245  SimpleIndexFile::PickleHeader* header_p =
246      pickle->headerT<SimpleIndexFile::PickleHeader>();
247  header_p->crc = CalculatePickleCRC(*pickle);
248  return pickle.Pass();
249}
250
251// static
252void SimpleIndexFile::LoadIndexEntriesInternal(
253    const base::FilePath& index_file_path,
254    scoped_refptr<base::SingleThreadTaskRunner> response_thread,
255    const IndexCompletionCallback& completion_callback) {
256  // TODO(felipeg): probably could load a stale index and use it for something.
257  scoped_ptr<SimpleIndex::EntrySet> index_file_entries;
258
259  const bool index_file_exists = file_util::PathExists(index_file_path);
260
261  // Only load if the index is not stale.
262  const bool index_stale = IsIndexFileStale(index_file_path);
263  if (!index_stale) {
264    const base::TimeTicks start = base::TimeTicks::Now();
265    index_file_entries = LoadFromDisk(index_file_path);
266    UMA_HISTOGRAM_TIMES("SimpleCache.IndexLoadTime",
267                        base::TimeTicks::Now() - start);
268    UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesLoaded",
269                         index_file_entries->size());
270  }
271
272  UMA_HISTOGRAM_BOOLEAN("SimpleCache.IndexStale", index_stale);
273
274  bool force_index_flush = false;
275  if (!index_file_entries) {
276    const base::TimeTicks start = base::TimeTicks::Now();
277    index_file_entries = RestoreFromDisk(index_file_path);
278    UMA_HISTOGRAM_MEDIUM_TIMES("SimpleCache.IndexRestoreTime",
279                        base::TimeTicks::Now() - start);
280    UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesRestored",
281                         index_file_entries->size());
282
283    // When we restore from disk we write the merged index file to disk right
284    // away, this might save us from having to restore again next time.
285    force_index_flush = true;
286  }
287  UMA_HISTOGRAM_BOOLEAN("SimpleCache.IndexCorrupt",
288                        (!index_stale && force_index_flush));
289
290  // Used in histograms. Please only add new values at the end.
291  enum {
292    INITIALIZE_METHOD_RECOVERED = 0,
293    INITIALIZE_METHOD_LOADED = 1,
294    INITIALIZE_METHOD_NEWCACHE = 2,
295    INITIALIZE_METHOD_MAX = 3,
296  };
297  int initialize_method;
298  if (index_file_exists) {
299    if (force_index_flush)
300      initialize_method = INITIALIZE_METHOD_RECOVERED;
301    else
302      initialize_method = INITIALIZE_METHOD_LOADED;
303  } else {
304    UMA_HISTOGRAM_COUNTS("SimpleCache.IndexCreatedEntryCount",
305                         index_file_entries->size());
306    initialize_method = INITIALIZE_METHOD_NEWCACHE;
307  }
308
309  UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexInitializeMethod",
310                            initialize_method, INITIALIZE_METHOD_MAX);
311  response_thread->PostTask(FROM_HERE,
312                            base::Bind(completion_callback,
313                                       base::Passed(&index_file_entries),
314                                       force_index_flush));
315}
316
317// static
318scoped_ptr<SimpleIndex::EntrySet> SimpleIndexFile::RestoreFromDisk(
319    const base::FilePath& index_file_path) {
320  LOG(INFO) << "Simple Cache Index is being restored from disk.";
321
322  file_util::Delete(index_file_path, /* recursive = */ false);
323  scoped_ptr<SimpleIndex::EntrySet> index_file_entries(
324      new SimpleIndex::EntrySet());
325
326  // TODO(felipeg,gavinp): Fix this once we have a one-file per entry format.
327  COMPILE_ASSERT(kSimpleEntryFileCount == 3,
328                 file_pattern_must_match_file_count);
329
330  const int kFileSuffixLength = sizeof("_0") - 1;
331  const base::FilePath::StringType file_pattern = FILE_PATH_LITERAL("*_[0-2]");
332  base::FileEnumerator enumerator(index_file_path.DirName(),
333                                  false /* recursive */,
334                                  base::FileEnumerator::FILES,
335                                  file_pattern);
336  for (base::FilePath file_path = enumerator.Next(); !file_path.empty();
337       file_path = enumerator.Next()) {
338    const base::FilePath::StringType base_name = file_path.BaseName().value();
339    // Converting to std::string is OK since we never use UTF8 wide chars in our
340    // file names.
341    const std::string hash_key_string(base_name.begin(),
342                                      base_name.end() - kFileSuffixLength);
343    uint64 hash_key = 0;
344    if (!simple_util::GetEntryHashKeyFromHexString(
345            hash_key_string, &hash_key)) {
346      LOG(WARNING) << "Invalid Entry Hash Key filename while restoring "
347                   << "Simple Index from disk: " << base_name;
348      // TODO(felipeg): Should we delete the invalid file here ?
349      continue;
350    }
351
352    base::FileEnumerator::FileInfo info = enumerator.GetInfo();
353    base::Time last_used_time;
354#if defined(OS_POSIX)
355    // For POSIX systems, a last access time is available. However, it's not
356    // guaranteed to be more accurate than mtime. It is no worse though.
357    last_used_time = base::Time::FromTimeT(info.stat().st_atime);
358#endif
359    if (last_used_time.is_null())
360      last_used_time = info.GetLastModifiedTime();
361
362    int64 file_size = info.GetSize();
363    SimpleIndex::EntrySet::iterator it = index_file_entries->find(hash_key);
364    if (it == index_file_entries->end()) {
365      SimpleIndex::InsertInEntrySet(
366          hash_key,
367          EntryMetadata(last_used_time, file_size),
368          index_file_entries.get());
369    } else {
370      // Summing up the total size of the entry through all the *_[0-2] files
371      it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
372    }
373  }
374  return index_file_entries.Pass();
375}
376
377}  // namespace disk_cache
378