1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/disk_cache/simple/simple_index_file.h"
6
7#include <vector>
8
9#include "base/file_util.h"
10#include "base/files/memory_mapped_file.h"
11#include "base/hash.h"
12#include "base/logging.h"
13#include "base/pickle.h"
14#include "base/single_thread_task_runner.h"
15#include "base/task_runner_util.h"
16#include "base/threading/thread_restrictions.h"
17#include "net/disk_cache/simple/simple_backend_version.h"
18#include "net/disk_cache/simple/simple_entry_format.h"
19#include "net/disk_cache/simple/simple_histogram_macros.h"
20#include "net/disk_cache/simple/simple_index.h"
21#include "net/disk_cache/simple/simple_synchronous_entry.h"
22#include "net/disk_cache/simple/simple_util.h"
23#include "third_party/zlib/zlib.h"
24
25namespace disk_cache {
26namespace {
27
28const int kEntryFilesHashLength = 16;
29const int kEntryFilesSuffixLength = 2;
30
31const uint64 kMaxEntiresInIndex = 100000000;
32
33uint32 CalculatePickleCRC(const Pickle& pickle) {
34  return crc32(crc32(0, Z_NULL, 0),
35               reinterpret_cast<const Bytef*>(pickle.payload()),
36               pickle.payload_size());
37}
38
39// Used in histograms. Please only add new values at the end.
40enum IndexFileState {
41  INDEX_STATE_CORRUPT = 0,
42  INDEX_STATE_STALE = 1,
43  INDEX_STATE_FRESH = 2,
44  INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
45  INDEX_STATE_MAX = 4,
46};
47
48void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
49  SIMPLE_CACHE_UMA(ENUMERATION,
50                   "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
51}
52
53// Used in histograms. Please only add new values at the end.
54enum IndexInitMethod {
55  INITIALIZE_METHOD_RECOVERED = 0,
56  INITIALIZE_METHOD_LOADED = 1,
57  INITIALIZE_METHOD_NEWCACHE = 2,
58  INITIALIZE_METHOD_MAX = 3,
59};
60
61void UmaRecordIndexInitMethod(IndexInitMethod method,
62                              net::CacheType cache_type) {
63  SIMPLE_CACHE_UMA(ENUMERATION,
64                   "IndexInitializeMethod", cache_type,
65                   method, INITIALIZE_METHOD_MAX);
66}
67
68bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) {
69  int bytes_written = file_util::WriteFile(
70      file_name, static_cast<const char*>(pickle->data()), pickle->size());
71  if (bytes_written != implicit_cast<int>(pickle->size())) {
72    base::DeleteFile(file_name, /* recursive = */ false);
73    return false;
74  }
75  return true;
76}
77
78// Called for each cache directory traversal iteration.
79void ProcessEntryFile(SimpleIndex::EntrySet* entries,
80                      const base::FilePath& file_path) {
81  static const size_t kEntryFilesLength =
82      kEntryFilesHashLength + kEntryFilesSuffixLength;
83  // Converting to std::string is OK since we never use UTF8 wide chars in our
84  // file names.
85  const base::FilePath::StringType base_name = file_path.BaseName().value();
86  const std::string file_name(base_name.begin(), base_name.end());
87  if (file_name.size() != kEntryFilesLength)
88    return;
89  const base::StringPiece hash_string(
90      file_name.begin(), file_name.begin() + kEntryFilesHashLength);
91  uint64 hash_key = 0;
92  if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
93    LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
94                 << " disk: " << file_name;
95    return;
96  }
97
98  base::PlatformFileInfo file_info;
99  if (!base::GetFileInfo(file_path, &file_info)) {
100    LOG(ERROR) << "Could not get file info for " << file_path.value();
101    return;
102  }
103  base::Time last_used_time;
104#if defined(OS_POSIX)
105  // For POSIX systems, a last access time is available. However, it's not
106  // guaranteed to be more accurate than mtime. It is no worse though.
107  last_used_time = file_info.last_accessed;
108#endif
109  if (last_used_time.is_null())
110    last_used_time = file_info.last_modified;
111
112  int64 file_size = file_info.size;
113  SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
114  if (it == entries->end()) {
115    SimpleIndex::InsertInEntrySet(
116        hash_key,
117        EntryMetadata(last_used_time, file_size),
118        entries);
119  } else {
120    // Summing up the total size of the entry through all the *_[0-1] files
121    it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
122  }
123}
124
125}  // namespace
126
127SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
128                                                 flush_required(false) {
129}
130
131SimpleIndexLoadResult::~SimpleIndexLoadResult() {
132}
133
134void SimpleIndexLoadResult::Reset() {
135  did_load = false;
136  flush_required = false;
137  entries.clear();
138}
139
140// static
141const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
142// static
143const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
144// static
145const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
146
147SimpleIndexFile::IndexMetadata::IndexMetadata()
148    : magic_number_(kSimpleIndexMagicNumber),
149      version_(kSimpleVersion),
150      number_of_entries_(0),
151      cache_size_(0) {}
152
153SimpleIndexFile::IndexMetadata::IndexMetadata(
154    uint64 number_of_entries, uint64 cache_size)
155    : magic_number_(kSimpleIndexMagicNumber),
156      version_(kSimpleVersion),
157      number_of_entries_(number_of_entries),
158      cache_size_(cache_size) {}
159
160void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
161  DCHECK(pickle);
162  pickle->WriteUInt64(magic_number_);
163  pickle->WriteUInt32(version_);
164  pickle->WriteUInt64(number_of_entries_);
165  pickle->WriteUInt64(cache_size_);
166}
167
168// static
169bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
170                                         Pickle* pickle) {
171  if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
172    return false;
173  SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
174  header_p->crc = CalculatePickleCRC(*pickle);
175  return true;
176}
177
178bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
179  DCHECK(it);
180  return it->ReadUInt64(&magic_number_) &&
181      it->ReadUInt32(&version_) &&
182      it->ReadUInt64(&number_of_entries_)&&
183      it->ReadUInt64(&cache_size_);
184}
185
186void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
187                                      const base::FilePath& cache_directory,
188                                      const base::FilePath& index_filename,
189                                      const base::FilePath& temp_index_filename,
190                                      scoped_ptr<Pickle> pickle,
191                                      const base::TimeTicks& start_time,
192                                      bool app_on_background) {
193  // There is a chance that the index containing all the necessary data about
194  // newly created entries will appear to be stale. This can happen if on-disk
195  // part of a Create operation does not fit into the time budget for the index
196  // flush delay. This simple approach will be reconsidered if it does not allow
197  // for maintaining freshness.
198  base::PlatformFileInfo cache_dir_info;
199  base::Time cache_dir_mtime;
200  if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
201    LOG(ERROR) << "Could obtain information about cache age";
202    return;
203  }
204  SerializeFinalData(cache_dir_mtime, pickle.get());
205  if (!WritePickleFile(pickle.get(), temp_index_filename)) {
206    if (!base::CreateDirectory(temp_index_filename.DirName())) {
207      LOG(ERROR) << "Could not create a directory to hold the index file";
208      return;
209    }
210    if (!WritePickleFile(pickle.get(), temp_index_filename)) {
211      LOG(ERROR) << "Failed to write the temporary index file";
212      return;
213    }
214  }
215
216  // Atomically rename the temporary index file to become the real one.
217  bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL);
218  DCHECK(result);
219
220  if (app_on_background) {
221    SIMPLE_CACHE_UMA(TIMES,
222                     "IndexWriteToDiskTime.Background", cache_type,
223                     (base::TimeTicks::Now() - start_time));
224  } else {
225    SIMPLE_CACHE_UMA(TIMES,
226                     "IndexWriteToDiskTime.Foreground", cache_type,
227                     (base::TimeTicks::Now() - start_time));
228  }
229}
230
231bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
232  return number_of_entries_ <= kMaxEntiresInIndex &&
233      magic_number_ == kSimpleIndexMagicNumber &&
234      version_ == kSimpleVersion;
235}
236
237SimpleIndexFile::SimpleIndexFile(
238    base::SingleThreadTaskRunner* cache_thread,
239    base::TaskRunner* worker_pool,
240    net::CacheType cache_type,
241    const base::FilePath& cache_directory)
242    : cache_thread_(cache_thread),
243      worker_pool_(worker_pool),
244      cache_type_(cache_type),
245      cache_directory_(cache_directory),
246      index_file_(cache_directory_.AppendASCII(kIndexDirectory)
247                      .AppendASCII(kIndexFileName)),
248      temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
249                           .AppendASCII(kTempIndexFileName)) {
250}
251
252SimpleIndexFile::~SimpleIndexFile() {}
253
254void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
255                                       const base::Closure& callback,
256                                       SimpleIndexLoadResult* out_result) {
257  base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
258                                  cache_type_,
259                                  cache_last_modified, cache_directory_,
260                                  index_file_, out_result);
261  worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
262}
263
264void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
265                                  uint64 cache_size,
266                                  const base::TimeTicks& start,
267                                  bool app_on_background) {
268  IndexMetadata index_metadata(entry_set.size(), cache_size);
269  scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
270  cache_thread_->PostTask(FROM_HERE, base::Bind(
271      &SimpleIndexFile::SyncWriteToDisk,
272      cache_type_,
273      cache_directory_,
274      index_file_,
275      temp_index_file_,
276      base::Passed(&pickle),
277      base::TimeTicks::Now(),
278      app_on_background));
279}
280
281// static
282void SimpleIndexFile::SyncLoadIndexEntries(
283    net::CacheType cache_type,
284    base::Time cache_last_modified,
285    const base::FilePath& cache_directory,
286    const base::FilePath& index_file_path,
287    SimpleIndexLoadResult* out_result) {
288  // Load the index and find its age.
289  base::Time last_cache_seen_by_index;
290  SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
291
292  // Consider the index loaded if it is fresh.
293  const bool index_file_existed = base::PathExists(index_file_path);
294  if (!out_result->did_load) {
295    if (index_file_existed)
296      UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
297  } else {
298    if (cache_last_modified <= last_cache_seen_by_index) {
299      base::Time latest_dir_mtime;
300      simple_util::GetMTime(cache_directory, &latest_dir_mtime);
301      if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
302        UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
303                                cache_type);
304      } else {
305        UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
306      }
307      UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
308      return;
309    }
310    UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
311  }
312
313  // Reconstruct the index by scanning the disk for entries.
314  const base::TimeTicks start = base::TimeTicks::Now();
315  SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
316  SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
317                   base::TimeTicks::Now() - start);
318  SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
319                   out_result->entries.size());
320  if (index_file_existed) {
321    UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
322  } else {
323    UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
324    SIMPLE_CACHE_UMA(COUNTS,
325                     "IndexCreatedEntryCount", cache_type,
326                     out_result->entries.size());
327  }
328}
329
330// static
331void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
332                                       base::Time* out_last_cache_seen_by_index,
333                                       SimpleIndexLoadResult* out_result) {
334  out_result->Reset();
335
336  base::MemoryMappedFile index_file_map;
337  if (!index_file_map.Initialize(index_filename)) {
338    LOG(WARNING) << "Could not map Simple Index file.";
339    base::DeleteFile(index_filename, false);
340    return;
341  }
342
343  SimpleIndexFile::Deserialize(
344      reinterpret_cast<const char*>(index_file_map.data()),
345      index_file_map.length(),
346      out_last_cache_seen_by_index,
347      out_result);
348
349  if (!out_result->did_load)
350    base::DeleteFile(index_filename, false);
351}
352
353// static
354scoped_ptr<Pickle> SimpleIndexFile::Serialize(
355    const SimpleIndexFile::IndexMetadata& index_metadata,
356    const SimpleIndex::EntrySet& entries) {
357  scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
358
359  index_metadata.Serialize(pickle.get());
360  for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
361       it != entries.end(); ++it) {
362    pickle->WriteUInt64(it->first);
363    it->second.Serialize(pickle.get());
364  }
365  return pickle.Pass();
366}
367
368// static
369void SimpleIndexFile::Deserialize(const char* data, int data_len,
370                                  base::Time* out_cache_last_modified,
371                                  SimpleIndexLoadResult* out_result) {
372  DCHECK(data);
373
374  out_result->Reset();
375  SimpleIndex::EntrySet* entries = &out_result->entries;
376
377  Pickle pickle(data, data_len);
378  if (!pickle.data()) {
379    LOG(WARNING) << "Corrupt Simple Index File.";
380    return;
381  }
382
383  PickleIterator pickle_it(pickle);
384  SimpleIndexFile::PickleHeader* header_p =
385      pickle.headerT<SimpleIndexFile::PickleHeader>();
386  const uint32 crc_read = header_p->crc;
387  const uint32 crc_calculated = CalculatePickleCRC(pickle);
388
389  if (crc_read != crc_calculated) {
390    LOG(WARNING) << "Invalid CRC in Simple Index file.";
391    return;
392  }
393
394  SimpleIndexFile::IndexMetadata index_metadata;
395  if (!index_metadata.Deserialize(&pickle_it)) {
396    LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
397    return;
398  }
399
400  if (!index_metadata.CheckIndexMetadata()) {
401    LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
402    return;
403  }
404
405#if !defined(OS_WIN)
406  // TODO(gavinp): Consider using std::unordered_map.
407  entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
408#endif
409  while (entries->size() < index_metadata.GetNumberOfEntries()) {
410    uint64 hash_key;
411    EntryMetadata entry_metadata;
412    if (!pickle_it.ReadUInt64(&hash_key) ||
413        !entry_metadata.Deserialize(&pickle_it)) {
414      LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
415      entries->clear();
416      return;
417    }
418    SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
419  }
420
421  int64 cache_last_modified;
422  if (!pickle_it.ReadInt64(&cache_last_modified)) {
423    entries->clear();
424    return;
425  }
426  DCHECK(out_cache_last_modified);
427  *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
428
429  out_result->did_load = true;
430}
431
432// static
433void SimpleIndexFile::SyncRestoreFromDisk(
434    const base::FilePath& cache_directory,
435    const base::FilePath& index_file_path,
436    SimpleIndexLoadResult* out_result) {
437  LOG(INFO) << "Simple Cache Index is being restored from disk.";
438  base::DeleteFile(index_file_path, /* recursive = */ false);
439  out_result->Reset();
440  SimpleIndex::EntrySet* entries = &out_result->entries;
441
442  const bool did_succeed = TraverseCacheDirectory(
443      cache_directory, base::Bind(&ProcessEntryFile, entries));
444  if (!did_succeed) {
445    LOG(ERROR) << "Could not reconstruct index from disk";
446    return;
447  }
448  out_result->did_load = true;
449  // When we restore from disk we write the merged index file to disk right
450  // away, this might save us from having to restore again next time.
451  out_result->flush_required = true;
452}
453
454// static
455bool SimpleIndexFile::LegacyIsIndexFileStale(
456    base::Time cache_last_modified,
457    const base::FilePath& index_file_path) {
458  base::Time index_mtime;
459  if (!simple_util::GetMTime(index_file_path, &index_mtime))
460    return true;
461  return index_mtime < cache_last_modified;
462}
463
464}  // namespace disk_cache
465