1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/disk_cache/simple/simple_index_file.h"
6
7#include <vector>
8
9#include "base/file_util.h"
10#include "base/files/memory_mapped_file.h"
11#include "base/hash.h"
12#include "base/logging.h"
13#include "base/metrics/histogram.h"
14#include "base/pickle.h"
15#include "base/single_thread_task_runner.h"
16#include "base/task_runner_util.h"
17#include "base/threading/thread_restrictions.h"
18#include "net/disk_cache/simple/simple_entry_format.h"
19#include "net/disk_cache/simple/simple_index.h"
20#include "net/disk_cache/simple/simple_synchronous_entry.h"
21#include "net/disk_cache/simple/simple_util.h"
22#include "third_party/zlib/zlib.h"
23
24namespace disk_cache {
25namespace {
26
27const int kEntryFilesHashLength = 16;
28const int kEntryFilesSuffixLength = 2;
29
30const uint64 kMaxEntiresInIndex = 100000000;
31
32const char kIndexFileName[] = "the-real-index";
33const char kTempIndexFileName[] = "temp-index";
34
35uint32 CalculatePickleCRC(const Pickle& pickle) {
36  return crc32(crc32(0, Z_NULL, 0),
37               reinterpret_cast<const Bytef*>(pickle.payload()),
38               pickle.payload_size());
39}
40
41void DoomEntrySetReply(const net::CompletionCallback& reply_callback,
42                       int result) {
43  reply_callback.Run(result);
44}
45
46void WriteToDiskInternal(const base::FilePath& index_filename,
47                         const base::FilePath& temp_index_filename,
48                         scoped_ptr<Pickle> pickle,
49                         const base::TimeTicks& start_time,
50                         bool app_on_background) {
51  int bytes_written = file_util::WriteFile(
52      temp_index_filename,
53      reinterpret_cast<const char*>(pickle->data()),
54      pickle->size());
55  DCHECK_EQ(bytes_written, implicit_cast<int>(pickle->size()));
56  if (bytes_written != static_cast<int>(pickle->size())) {
57    // TODO(felipeg): Add better error handling.
58    LOG(ERROR) << "Could not write Simple Cache index to temporary file: "
59               << temp_index_filename.value();
60    base::DeleteFile(temp_index_filename, /* recursive = */ false);
61  } else {
62    // Swap temp and index_file.
63    bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL);
64    DCHECK(result);
65  }
66  if (app_on_background) {
67    UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Background",
68                        (base::TimeTicks::Now() - start_time));
69  } else {
70    UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Foreground",
71                        (base::TimeTicks::Now() - start_time));
72  }
73}
74
75// Called for each cache directory traversal iteration.
76void ProcessEntryFile(SimpleIndex::EntrySet* entries,
77                      const base::FilePath& file_path) {
78  static const size_t kEntryFilesLength =
79      kEntryFilesHashLength + kEntryFilesSuffixLength;
80  // Converting to std::string is OK since we never use UTF8 wide chars in our
81  // file names.
82  const base::FilePath::StringType base_name = file_path.BaseName().value();
83  const std::string file_name(base_name.begin(), base_name.end());
84  if (file_name.size() != kEntryFilesLength)
85    return;
86  const base::StringPiece hash_string(
87      file_name.begin(), file_name.begin() + kEntryFilesHashLength);
88  uint64 hash_key = 0;
89  if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
90    LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
91                 << " disk: " << file_name;
92    return;
93  }
94
95  base::PlatformFileInfo file_info;
96  if (!file_util::GetFileInfo(file_path, &file_info)) {
97    LOG(ERROR) << "Could not get file info for " << file_path.value();
98    return;
99  }
100  base::Time last_used_time;
101#if defined(OS_POSIX)
102  // For POSIX systems, a last access time is available. However, it's not
103  // guaranteed to be more accurate than mtime. It is no worse though.
104  last_used_time = file_info.last_accessed;
105#endif
106  if (last_used_time.is_null())
107    last_used_time = file_info.last_modified;
108
109  int64 file_size = file_info.size;
110  SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
111  if (it == entries->end()) {
112    SimpleIndex::InsertInEntrySet(
113        hash_key,
114        EntryMetadata(last_used_time, file_size),
115        entries);
116  } else {
117    // Summing up the total size of the entry through all the *_[0-2] files
118    it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
119  }
120}
121
122}  // namespace
123
124SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
125                                                 flush_required(false) {
126}
127
128SimpleIndexLoadResult::~SimpleIndexLoadResult() {
129}
130
131void SimpleIndexLoadResult::Reset() {
132  did_load = false;
133  flush_required = false;
134  entries.clear();
135}
136
137SimpleIndexFile::IndexMetadata::IndexMetadata() :
138    magic_number_(kSimpleIndexMagicNumber),
139    version_(kSimpleVersion),
140    number_of_entries_(0),
141    cache_size_(0) {}
142
143SimpleIndexFile::IndexMetadata::IndexMetadata(
144    uint64 number_of_entries, uint64 cache_size) :
145    magic_number_(kSimpleIndexMagicNumber),
146    version_(kSimpleVersion),
147    number_of_entries_(number_of_entries),
148    cache_size_(cache_size) {}
149
150void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
151  DCHECK(pickle);
152  pickle->WriteUInt64(magic_number_);
153  pickle->WriteUInt32(version_);
154  pickle->WriteUInt64(number_of_entries_);
155  pickle->WriteUInt64(cache_size_);
156}
157
158bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
159  DCHECK(it);
160  return it->ReadUInt64(&magic_number_) &&
161      it->ReadUInt32(&version_) &&
162      it->ReadUInt64(&number_of_entries_)&&
163      it->ReadUInt64(&cache_size_);
164}
165
166bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
167  return number_of_entries_ <= kMaxEntiresInIndex &&
168      magic_number_ == disk_cache::kSimpleIndexMagicNumber &&
169      version_ == disk_cache::kSimpleVersion;
170}
171
172SimpleIndexFile::SimpleIndexFile(
173    base::SingleThreadTaskRunner* cache_thread,
174    base::TaskRunner* worker_pool,
175    const base::FilePath& cache_directory)
176    : cache_thread_(cache_thread),
177      worker_pool_(worker_pool),
178      cache_directory_(cache_directory),
179      index_file_(cache_directory_.AppendASCII(kIndexFileName)),
180      temp_index_file_(cache_directory_.AppendASCII(kTempIndexFileName)) {
181}
182
183SimpleIndexFile::~SimpleIndexFile() {}
184
185void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
186                                       const base::Closure& callback,
187                                       SimpleIndexLoadResult* out_result) {
188  base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
189                                  cache_last_modified, cache_directory_,
190                                  index_file_, out_result);
191  worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
192}
193
194void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
195                                  uint64 cache_size,
196                                  const base::TimeTicks& start,
197                                  bool app_on_background) {
198  IndexMetadata index_metadata(entry_set.size(), cache_size);
199  scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
200  cache_thread_->PostTask(FROM_HERE, base::Bind(
201      &WriteToDiskInternal,
202      index_file_,
203      temp_index_file_,
204      base::Passed(&pickle),
205      base::TimeTicks::Now(),
206      app_on_background));
207}
208
209void SimpleIndexFile::DoomEntrySet(
210    scoped_ptr<std::vector<uint64> > entry_hashes,
211    const net::CompletionCallback& reply_callback) {
212  PostTaskAndReplyWithResult(
213      worker_pool_,
214      FROM_HERE,
215      base::Bind(&SimpleSynchronousEntry::DoomEntrySet,
216                 base::Passed(entry_hashes.Pass()), cache_directory_),
217      base::Bind(&DoomEntrySetReply, reply_callback));
218}
219
220// static
221void SimpleIndexFile::SyncLoadIndexEntries(
222    base::Time cache_last_modified,
223    const base::FilePath& cache_directory,
224    const base::FilePath& index_file_path,
225    SimpleIndexLoadResult* out_result) {
226  // TODO(felipeg): probably could load a stale index and use it for something.
227  const SimpleIndex::EntrySet& entries = out_result->entries;
228
229  const bool index_file_exists = base::PathExists(index_file_path);
230
231  // Used in histograms. Please only add new values at the end.
232  enum {
233    INDEX_STATE_CORRUPT = 0,
234    INDEX_STATE_STALE = 1,
235    INDEX_STATE_FRESH = 2,
236    INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
237    INDEX_STATE_MAX = 4,
238  } index_file_state;
239
240  // Only load if the index is not stale.
241  if (IsIndexFileStale(cache_last_modified, index_file_path)) {
242    index_file_state = INDEX_STATE_STALE;
243  } else {
244    index_file_state = INDEX_STATE_FRESH;
245    base::Time latest_dir_mtime;
246    if (simple_util::GetMTime(cache_directory, &latest_dir_mtime) &&
247        IsIndexFileStale(latest_dir_mtime, index_file_path)) {
248      // A file operation has updated the directory since we last looked at it
249      // during backend initialization.
250      index_file_state = INDEX_STATE_FRESH_CONCURRENT_UPDATES;
251    }
252
253    const base::TimeTicks start = base::TimeTicks::Now();
254    SyncLoadFromDisk(index_file_path, out_result);
255    UMA_HISTOGRAM_TIMES("SimpleCache.IndexLoadTime",
256                        base::TimeTicks::Now() - start);
257    UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesLoaded",
258                         out_result->did_load ? entries.size() : 0);
259    if (!out_result->did_load)
260      index_file_state = INDEX_STATE_CORRUPT;
261  }
262  UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexFileStateOnLoad",
263                            index_file_state,
264                            INDEX_STATE_MAX);
265
266  if (!out_result->did_load) {
267    const base::TimeTicks start = base::TimeTicks::Now();
268    SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
269    UMA_HISTOGRAM_MEDIUM_TIMES("SimpleCache.IndexRestoreTime",
270                        base::TimeTicks::Now() - start);
271    UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesRestored",
272                         entries.size());
273  }
274
275  // Used in histograms. Please only add new values at the end.
276  enum {
277    INITIALIZE_METHOD_RECOVERED = 0,
278    INITIALIZE_METHOD_LOADED = 1,
279    INITIALIZE_METHOD_NEWCACHE = 2,
280    INITIALIZE_METHOD_MAX = 3,
281  };
282  int initialize_method;
283  if (index_file_exists) {
284    if (out_result->flush_required)
285      initialize_method = INITIALIZE_METHOD_RECOVERED;
286    else
287      initialize_method = INITIALIZE_METHOD_LOADED;
288  } else {
289    UMA_HISTOGRAM_COUNTS("SimpleCache.IndexCreatedEntryCount",
290                         entries.size());
291    initialize_method = INITIALIZE_METHOD_NEWCACHE;
292  }
293
294  UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexInitializeMethod",
295                            initialize_method, INITIALIZE_METHOD_MAX);
296}
297
298// static
299void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
300                                       SimpleIndexLoadResult* out_result) {
301  out_result->Reset();
302
303  base::MemoryMappedFile index_file_map;
304  if (!index_file_map.Initialize(index_filename)) {
305    LOG(WARNING) << "Could not map Simple Index file.";
306    base::DeleteFile(index_filename, false);
307    return;
308  }
309
310  SimpleIndexFile::Deserialize(
311      reinterpret_cast<const char*>(index_file_map.data()),
312      index_file_map.length(), out_result);
313
314  if (!out_result->did_load)
315    base::DeleteFile(index_filename, false);
316}
317
318// static
319scoped_ptr<Pickle> SimpleIndexFile::Serialize(
320    const SimpleIndexFile::IndexMetadata& index_metadata,
321    const SimpleIndex::EntrySet& entries) {
322  scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
323
324  index_metadata.Serialize(pickle.get());
325  for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
326       it != entries.end(); ++it) {
327    pickle->WriteUInt64(it->first);
328    it->second.Serialize(pickle.get());
329  }
330  SimpleIndexFile::PickleHeader* header_p =
331      pickle->headerT<SimpleIndexFile::PickleHeader>();
332  header_p->crc = CalculatePickleCRC(*pickle);
333  return pickle.Pass();
334}
335
336// static
337void SimpleIndexFile::Deserialize(const char* data, int data_len,
338                                  SimpleIndexLoadResult* out_result) {
339  DCHECK(data);
340
341  out_result->Reset();
342  SimpleIndex::EntrySet* entries = &out_result->entries;
343
344  Pickle pickle(data, data_len);
345  if (!pickle.data()) {
346    LOG(WARNING) << "Corrupt Simple Index File.";
347    return;
348  }
349
350  PickleIterator pickle_it(pickle);
351
352  SimpleIndexFile::PickleHeader* header_p =
353      pickle.headerT<SimpleIndexFile::PickleHeader>();
354  const uint32 crc_read = header_p->crc;
355  const uint32 crc_calculated = CalculatePickleCRC(pickle);
356
357  if (crc_read != crc_calculated) {
358    LOG(WARNING) << "Invalid CRC in Simple Index file.";
359    return;
360  }
361
362  SimpleIndexFile::IndexMetadata index_metadata;
363  if (!index_metadata.Deserialize(&pickle_it)) {
364    LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
365    return;
366  }
367
368  if (!index_metadata.CheckIndexMetadata()) {
369    LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
370    return;
371  }
372
373#if !defined(OS_WIN)
374  // TODO(gavinp): Consider using std::unordered_map.
375  entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
376#endif
377  while (entries->size() < index_metadata.GetNumberOfEntries()) {
378    uint64 hash_key;
379    EntryMetadata entry_metadata;
380    if (!pickle_it.ReadUInt64(&hash_key) ||
381        !entry_metadata.Deserialize(&pickle_it)) {
382      LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
383      entries->clear();
384      return;
385    }
386    SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
387  }
388
389  out_result->did_load = true;
390}
391
392// static
393void SimpleIndexFile::SyncRestoreFromDisk(
394    const base::FilePath& cache_directory,
395    const base::FilePath& index_file_path,
396    SimpleIndexLoadResult* out_result) {
397  LOG(INFO) << "Simple Cache Index is being restored from disk.";
398  base::DeleteFile(index_file_path, /* recursive = */ false);
399  out_result->Reset();
400  SimpleIndex::EntrySet* entries = &out_result->entries;
401
402  // TODO(felipeg,gavinp): Fix this once we have a one-file per entry format.
403  COMPILE_ASSERT(kSimpleEntryFileCount == 3,
404                 file_pattern_must_match_file_count);
405
406  const bool did_succeed = TraverseCacheDirectory(
407      cache_directory, base::Bind(&ProcessEntryFile, entries));
408  if (!did_succeed) {
409    LOG(ERROR) << "Could not reconstruct index from disk";
410    return;
411  }
412  out_result->did_load = true;
413  // When we restore from disk we write the merged index file to disk right
414  // away, this might save us from having to restore again next time.
415  out_result->flush_required = true;
416}
417
418// static
419bool SimpleIndexFile::IsIndexFileStale(base::Time cache_last_modified,
420                                       const base::FilePath& index_file_path) {
421  base::Time index_mtime;
422  if (!simple_util::GetMTime(index_file_path, &index_mtime))
423    return true;
424  return index_mtime < cache_last_modified;
425}
426
427}  // namespace disk_cache
428