1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/chromeos/drive/file_cache.h"
6
7#include <vector>
8
9#include "base/callback_helpers.h"
10#include "base/files/file_enumerator.h"
11#include "base/files/file_util.h"
12#include "base/logging.h"
13#include "base/metrics/histogram.h"
14#include "base/strings/string_util.h"
15#include "base/strings/stringprintf.h"
16#include "base/sys_info.h"
17#include "chrome/browser/chromeos/drive/drive.pb.h"
18#include "chrome/browser/chromeos/drive/file_system_util.h"
19#include "chrome/browser/chromeos/drive/resource_metadata_storage.h"
20#include "chrome/browser/drive/drive_api_util.h"
21#include "chromeos/chromeos_constants.h"
22#include "content/public/browser/browser_thread.h"
23#include "google_apis/drive/task_util.h"
24#include "net/base/filename_util.h"
25#include "net/base/mime_sniffer.h"
26#include "net/base/mime_util.h"
27#include "third_party/cros_system_api/constants/cryptohome.h"
28
29using content::BrowserThread;
30
31namespace drive {
32namespace internal {
33namespace {
34
35// Returns ID extracted from the path.
36std::string GetIdFromPath(const base::FilePath& path) {
37  return util::UnescapeCacheFileName(path.BaseName().AsUTF8Unsafe());
38}
39
40}  // namespace
41
42FileCache::FileCache(ResourceMetadataStorage* storage,
43                     const base::FilePath& cache_file_directory,
44                     base::SequencedTaskRunner* blocking_task_runner,
45                     FreeDiskSpaceGetterInterface* free_disk_space_getter)
46    : cache_file_directory_(cache_file_directory),
47      blocking_task_runner_(blocking_task_runner),
48      storage_(storage),
49      free_disk_space_getter_(free_disk_space_getter),
50      weak_ptr_factory_(this) {
51  DCHECK(blocking_task_runner_.get());
52  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
53}
54
55FileCache::~FileCache() {
56  // Must be on the sequenced worker pool, as |metadata_| must be deleted on
57  // the sequenced worker pool.
58  AssertOnSequencedWorkerPool();
59}
60
61base::FilePath FileCache::GetCacheFilePath(const std::string& id) const {
62  return cache_file_directory_.Append(
63      base::FilePath::FromUTF8Unsafe(util::EscapeCacheFileName(id)));
64}
65
66void FileCache::AssertOnSequencedWorkerPool() {
67  DCHECK(blocking_task_runner_->RunsTasksOnCurrentThread());
68}
69
70bool FileCache::IsUnderFileCacheDirectory(const base::FilePath& path) const {
71  return cache_file_directory_.IsParent(path);
72}
73
74bool FileCache::FreeDiskSpaceIfNeededFor(int64 num_bytes) {
75  AssertOnSequencedWorkerPool();
76
77  // Do nothing and return if we have enough space.
78  if (HasEnoughSpaceFor(num_bytes, cache_file_directory_))
79    return true;
80
81  // Otherwise, try to free up the disk space.
82  DVLOG(1) << "Freeing up disk space for " << num_bytes;
83
84  // Remove all entries unless specially marked.
85  scoped_ptr<ResourceMetadataStorage::Iterator> it = storage_->GetIterator();
86  for (; !it->IsAtEnd(); it->Advance()) {
87    if (it->GetValue().file_specific_info().has_cache_state() &&
88        !it->GetValue().file_specific_info().cache_state().is_pinned() &&
89        !it->GetValue().file_specific_info().cache_state().is_dirty() &&
90        !mounted_files_.count(it->GetID())) {
91      ResourceEntry entry(it->GetValue());
92      entry.mutable_file_specific_info()->clear_cache_state();
93      storage_->PutEntry(entry);
94    }
95  }
96  if (it->HasError())
97    return false;
98
99  // Remove all files which have no corresponding cache entries.
100  base::FileEnumerator enumerator(cache_file_directory_,
101                                  false,  // not recursive
102                                  base::FileEnumerator::FILES);
103  ResourceEntry entry;
104  for (base::FilePath current = enumerator.Next(); !current.empty();
105       current = enumerator.Next()) {
106    std::string id = GetIdFromPath(current);
107    FileError error = storage_->GetEntry(id, &entry);
108    if (error == FILE_ERROR_NOT_FOUND ||
109        (error == FILE_ERROR_OK &&
110         !entry.file_specific_info().cache_state().is_present()))
111      base::DeleteFile(current, false /* recursive */);
112    else if (error != FILE_ERROR_OK)
113      return false;
114  }
115
116  // Check the disk space again.
117  return HasEnoughSpaceFor(num_bytes, cache_file_directory_);
118}
119
120FileError FileCache::GetFile(const std::string& id,
121                             base::FilePath* cache_file_path) {
122  AssertOnSequencedWorkerPool();
123  DCHECK(cache_file_path);
124
125  ResourceEntry entry;
126  FileError error = storage_->GetEntry(id, &entry);
127  if (error != FILE_ERROR_OK)
128    return error;
129  if (!entry.file_specific_info().cache_state().is_present())
130    return FILE_ERROR_NOT_FOUND;
131
132  *cache_file_path = GetCacheFilePath(id);
133  return FILE_ERROR_OK;
134}
135
136FileError FileCache::Store(const std::string& id,
137                           const std::string& md5,
138                           const base::FilePath& source_path,
139                           FileOperationType file_operation_type) {
140  AssertOnSequencedWorkerPool();
141
142  ResourceEntry entry;
143  FileError error = storage_->GetEntry(id, &entry);
144  if (error != FILE_ERROR_OK)
145    return error;
146
147  int64 file_size = 0;
148  if (file_operation_type == FILE_OPERATION_COPY) {
149    if (!base::GetFileSize(source_path, &file_size)) {
150      LOG(WARNING) << "Couldn't get file size for: " << source_path.value();
151      return FILE_ERROR_FAILED;
152    }
153  }
154  if (!FreeDiskSpaceIfNeededFor(file_size))
155    return FILE_ERROR_NO_LOCAL_SPACE;
156
157  // If file is mounted, return error.
158  if (mounted_files_.count(id))
159    return FILE_ERROR_IN_USE;
160
161  base::FilePath dest_path = GetCacheFilePath(id);
162  bool success = false;
163  switch (file_operation_type) {
164    case FILE_OPERATION_MOVE:
165      success = base::Move(source_path, dest_path);
166      break;
167    case FILE_OPERATION_COPY:
168      success = base::CopyFile(source_path, dest_path);
169      break;
170    default:
171      NOTREACHED();
172  }
173
174  if (!success) {
175    LOG(ERROR) << "Failed to store: "
176               << "source_path = " << source_path.value() << ", "
177               << "dest_path = " << dest_path.value() << ", "
178               << "file_operation_type = " << file_operation_type;
179    return FILE_ERROR_FAILED;
180  }
181
182  // Now that file operations have completed, update metadata.
183  FileCacheEntry* cache_state =
184      entry.mutable_file_specific_info()->mutable_cache_state();
185  cache_state->set_md5(md5);
186  cache_state->set_is_present(true);
187  if (md5.empty())
188    cache_state->set_is_dirty(true);
189  return storage_->PutEntry(entry);
190}
191
192FileError FileCache::Pin(const std::string& id) {
193  AssertOnSequencedWorkerPool();
194
195  ResourceEntry entry;
196  FileError error = storage_->GetEntry(id, &entry);
197  if (error != FILE_ERROR_OK)
198    return error;
199  entry.mutable_file_specific_info()->mutable_cache_state()->set_is_pinned(
200      true);
201  return storage_->PutEntry(entry);
202}
203
204FileError FileCache::Unpin(const std::string& id) {
205  AssertOnSequencedWorkerPool();
206
207  // Unpinning a file means its entry must exist in cache.
208  ResourceEntry entry;
209  FileError error = storage_->GetEntry(id, &entry);
210  if (error != FILE_ERROR_OK)
211    return error;
212
213  // Now that file operations have completed, update metadata.
214  if (entry.file_specific_info().cache_state().is_present()) {
215    entry.mutable_file_specific_info()->mutable_cache_state()->set_is_pinned(
216        false);
217  } else {
218    // Remove the existing entry if we are unpinning a non-present file.
219    entry.mutable_file_specific_info()->clear_cache_state();
220  }
221  error = storage_->PutEntry(entry);
222  if (error != FILE_ERROR_OK)
223    return error;
224
225  // Now it's a chance to free up space if needed.
226  FreeDiskSpaceIfNeededFor(0);
227
228  return FILE_ERROR_OK;
229}
230
231FileError FileCache::MarkAsMounted(const std::string& id,
232                                   base::FilePath* cache_file_path) {
233  AssertOnSequencedWorkerPool();
234  DCHECK(cache_file_path);
235
236  // Get cache entry associated with the id and md5
237  ResourceEntry entry;
238  FileError error = storage_->GetEntry(id, &entry);
239  if (error != FILE_ERROR_OK)
240    return error;
241  if (!entry.file_specific_info().cache_state().is_present())
242    return FILE_ERROR_NOT_FOUND;
243
244  if (mounted_files_.count(id))
245    return FILE_ERROR_INVALID_OPERATION;
246
247  // Ensure the file is readable to cros_disks. See crbug.com/236994.
248  base::FilePath path = GetCacheFilePath(id);
249  if (!base::SetPosixFilePermissions(
250          path,
251          base::FILE_PERMISSION_READ_BY_USER |
252          base::FILE_PERMISSION_WRITE_BY_USER |
253          base::FILE_PERMISSION_READ_BY_GROUP |
254          base::FILE_PERMISSION_READ_BY_OTHERS))
255    return FILE_ERROR_FAILED;
256
257  mounted_files_.insert(id);
258
259  *cache_file_path = path;
260  return FILE_ERROR_OK;
261}
262
263FileError FileCache::OpenForWrite(
264    const std::string& id,
265    scoped_ptr<base::ScopedClosureRunner>* file_closer) {
266  AssertOnSequencedWorkerPool();
267
268  // Marking a file dirty means its entry and actual file blob must exist in
269  // cache.
270  ResourceEntry entry;
271  FileError error = storage_->GetEntry(id, &entry);
272  if (error != FILE_ERROR_OK)
273    return error;
274  if (!entry.file_specific_info().cache_state().is_present()) {
275    LOG(WARNING) << "Can't mark dirty a file that wasn't cached: " << id;
276    return FILE_ERROR_NOT_FOUND;
277  }
278
279  entry.mutable_file_specific_info()->mutable_cache_state()->set_is_dirty(true);
280  entry.mutable_file_specific_info()->mutable_cache_state()->clear_md5();
281  error = storage_->PutEntry(entry);
282  if (error != FILE_ERROR_OK)
283    return error;
284
285  write_opened_files_[id]++;
286  file_closer->reset(new base::ScopedClosureRunner(
287      base::Bind(&google_apis::RunTaskWithTaskRunner,
288                 blocking_task_runner_,
289                 base::Bind(&FileCache::CloseForWrite,
290                            weak_ptr_factory_.GetWeakPtr(),
291                            id))));
292  return FILE_ERROR_OK;
293}
294
295bool FileCache::IsOpenedForWrite(const std::string& id) {
296  AssertOnSequencedWorkerPool();
297  return write_opened_files_.count(id);
298}
299
300FileError FileCache::UpdateMd5(const std::string& id) {
301  AssertOnSequencedWorkerPool();
302
303  if (IsOpenedForWrite(id))
304    return FILE_ERROR_IN_USE;
305
306  ResourceEntry entry;
307  FileError error = storage_->GetEntry(id, &entry);
308  if (error != FILE_ERROR_OK)
309    return error;
310  if (!entry.file_specific_info().cache_state().is_present())
311    return FILE_ERROR_NOT_FOUND;
312
313  const std::string& md5 = util::GetMd5Digest(GetCacheFilePath(id));
314  if (md5.empty())
315    return FILE_ERROR_NOT_FOUND;
316
317  entry.mutable_file_specific_info()->mutable_cache_state()->set_md5(md5);
318  return storage_->PutEntry(entry);
319}
320
321FileError FileCache::ClearDirty(const std::string& id) {
322  AssertOnSequencedWorkerPool();
323
324  if (IsOpenedForWrite(id))
325    return FILE_ERROR_IN_USE;
326
327  // Clearing a dirty file means its entry and actual file blob must exist in
328  // cache.
329  ResourceEntry entry;
330  FileError error = storage_->GetEntry(id, &entry);
331  if (error != FILE_ERROR_OK)
332    return error;
333  if (!entry.file_specific_info().cache_state().is_present()) {
334    LOG(WARNING) << "Can't clear dirty state of a file that wasn't cached: "
335                 << id;
336    return FILE_ERROR_NOT_FOUND;
337  }
338
339  // If a file is not dirty (it should have been marked dirty via OpenForWrite),
340  // clearing its dirty state is an invalid operation.
341  if (!entry.file_specific_info().cache_state().is_dirty()) {
342    LOG(WARNING) << "Can't clear dirty state of a non-dirty file: " << id;
343    return FILE_ERROR_INVALID_OPERATION;
344  }
345
346  entry.mutable_file_specific_info()->mutable_cache_state()->set_is_dirty(
347      false);
348  return storage_->PutEntry(entry);
349}
350
351FileError FileCache::Remove(const std::string& id) {
352  AssertOnSequencedWorkerPool();
353
354  ResourceEntry entry;
355
356  // If entry doesn't exist, nothing to do.
357  FileError error = storage_->GetEntry(id, &entry);
358  if (error == FILE_ERROR_NOT_FOUND)
359    return FILE_ERROR_OK;
360  if (error != FILE_ERROR_OK)
361    return error;
362  if (!entry.file_specific_info().has_cache_state())
363    return FILE_ERROR_OK;
364
365  // Cannot delete a mounted file.
366  if (mounted_files_.count(id))
367    return FILE_ERROR_IN_USE;
368
369  // Delete the file.
370  base::FilePath path = GetCacheFilePath(id);
371  if (!base::DeleteFile(path, false /* recursive */))
372    return FILE_ERROR_FAILED;
373
374  // Now that all file operations have completed, remove from metadata.
375  entry.mutable_file_specific_info()->clear_cache_state();
376  return storage_->PutEntry(entry);
377}
378
379bool FileCache::ClearAll() {
380  AssertOnSequencedWorkerPool();
381
382  // Remove files.
383  base::FileEnumerator enumerator(cache_file_directory_,
384                                  false,  // not recursive
385                                  base::FileEnumerator::FILES);
386  for (base::FilePath file = enumerator.Next(); !file.empty();
387       file = enumerator.Next())
388    base::DeleteFile(file, false /* recursive */);
389
390  return true;
391}
392
393bool FileCache::Initialize() {
394  AssertOnSequencedWorkerPool();
395
396  // Older versions do not clear MD5 when marking entries dirty.
397  // Clear MD5 of all dirty entries to deal with old data.
398  scoped_ptr<ResourceMetadataStorage::Iterator> it = storage_->GetIterator();
399  for (; !it->IsAtEnd(); it->Advance()) {
400    if (it->GetValue().file_specific_info().cache_state().is_dirty()) {
401      ResourceEntry new_entry(it->GetValue());
402      new_entry.mutable_file_specific_info()->mutable_cache_state()->
403          clear_md5();
404      if (storage_->PutEntry(new_entry) != FILE_ERROR_OK)
405        return false;
406    }
407  }
408  if (it->HasError())
409    return false;
410
411  if (!RenameCacheFilesToNewFormat())
412    return false;
413  return true;
414}
415
416void FileCache::Destroy() {
417  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
418
419  // Destroy myself on the blocking pool.
420  // Note that base::DeletePointer<> cannot be used as the destructor of this
421  // class is private.
422  blocking_task_runner_->PostTask(
423      FROM_HERE,
424      base::Bind(&FileCache::DestroyOnBlockingPool, base::Unretained(this)));
425}
426
427void FileCache::DestroyOnBlockingPool() {
428  AssertOnSequencedWorkerPool();
429  delete this;
430}
431
432bool FileCache::RecoverFilesFromCacheDirectory(
433    const base::FilePath& dest_directory,
434    const ResourceMetadataStorage::RecoveredCacheInfoMap&
435        recovered_cache_info) {
436  int file_number = 1;
437
438  base::FileEnumerator enumerator(cache_file_directory_,
439                                  false,  // not recursive
440                                  base::FileEnumerator::FILES);
441  for (base::FilePath current = enumerator.Next(); !current.empty();
442       current = enumerator.Next()) {
443    const std::string& id = GetIdFromPath(current);
444    ResourceEntry entry;
445    FileError error = storage_->GetEntry(id, &entry);
446    if (error != FILE_ERROR_OK && error != FILE_ERROR_NOT_FOUND)
447      return false;
448    if (error == FILE_ERROR_OK &&
449        entry.file_specific_info().cache_state().is_present()) {
450      // This file is managed by FileCache, no need to recover it.
451      continue;
452    }
453
454    // If a cache entry which is non-dirty and has matching MD5 is found in
455    // |recovered_cache_entries|, it means the current file is already uploaded
456    // to the server. Just delete it instead of recovering it.
457    ResourceMetadataStorage::RecoveredCacheInfoMap::const_iterator it =
458        recovered_cache_info.find(id);
459    if (it != recovered_cache_info.end()) {
460      // Due to the DB corruption, cache info might be recovered from old
461      // revision. Perform MD5 check even when is_dirty is false just in case.
462      if (!it->second.is_dirty &&
463          it->second.md5 == util::GetMd5Digest(current)) {
464        base::DeleteFile(current, false /* recursive */);
465        continue;
466      }
467    }
468
469    // Read file contents to sniff mime type.
470    std::vector<char> content(net::kMaxBytesToSniff);
471    const int read_result =
472        base::ReadFile(current, &content[0], content.size());
473    if (read_result < 0) {
474      LOG(WARNING) << "Cannot read: " << current.value();
475      return false;
476    }
477    if (read_result == 0)  // Skip empty files.
478      continue;
479
480    // Use recovered file name if available, otherwise decide file name with
481    // sniffed mime type.
482    base::FilePath dest_base_name(FILE_PATH_LITERAL("file"));
483    std::string mime_type;
484    if (it != recovered_cache_info.end() && !it->second.title.empty()) {
485      // We can use a file name recovered from the trashed DB.
486      dest_base_name = base::FilePath::FromUTF8Unsafe(it->second.title);
487    } else if (net::SniffMimeType(&content[0], read_result,
488                                  net::FilePathToFileURL(current),
489                                  std::string(), &mime_type) ||
490               net::SniffMimeTypeFromLocalData(&content[0], read_result,
491                                               &mime_type)) {
492      // Change base name for common mime types.
493      if (net::MatchesMimeType("image/*", mime_type)) {
494        dest_base_name = base::FilePath(FILE_PATH_LITERAL("image"));
495      } else if (net::MatchesMimeType("video/*", mime_type)) {
496        dest_base_name = base::FilePath(FILE_PATH_LITERAL("video"));
497      } else if (net::MatchesMimeType("audio/*", mime_type)) {
498        dest_base_name = base::FilePath(FILE_PATH_LITERAL("audio"));
499      }
500
501      // Estimate extension from mime type.
502      std::vector<base::FilePath::StringType> extensions;
503      base::FilePath::StringType extension;
504      if (net::GetPreferredExtensionForMimeType(mime_type, &extension))
505        extensions.push_back(extension);
506      else
507        net::GetExtensionsForMimeType(mime_type, &extensions);
508
509      // Add extension if possible.
510      if (!extensions.empty())
511        dest_base_name = dest_base_name.AddExtension(extensions[0]);
512    }
513
514    // Add file number to the file name and move.
515    const base::FilePath& dest_path = dest_directory.Append(dest_base_name)
516        .InsertBeforeExtensionASCII(base::StringPrintf("%08d", file_number++));
517    if (!base::CreateDirectory(dest_directory) ||
518        !base::Move(current, dest_path)) {
519      LOG(WARNING) << "Failed to move: " << current.value()
520                   << " to " << dest_path.value();
521      return false;
522    }
523  }
524  UMA_HISTOGRAM_COUNTS("Drive.NumberOfCacheFilesRecoveredAfterDBCorruption",
525                       file_number - 1);
526  return true;
527}
528
529FileError FileCache::MarkAsUnmounted(const base::FilePath& file_path) {
530  AssertOnSequencedWorkerPool();
531  DCHECK(IsUnderFileCacheDirectory(file_path));
532
533  std::string id = GetIdFromPath(file_path);
534
535  // Get the entry associated with the id.
536  ResourceEntry entry;
537  FileError error = storage_->GetEntry(id, &entry);
538  if (error != FILE_ERROR_OK)
539    return error;
540
541  std::set<std::string>::iterator it = mounted_files_.find(id);
542  if (it == mounted_files_.end())
543    return FILE_ERROR_INVALID_OPERATION;
544
545  mounted_files_.erase(it);
546  return FILE_ERROR_OK;
547}
548
549bool FileCache::HasEnoughSpaceFor(int64 num_bytes,
550                                  const base::FilePath& path) {
551  int64 free_space = 0;
552  if (free_disk_space_getter_)
553    free_space = free_disk_space_getter_->AmountOfFreeDiskSpace();
554  else
555    free_space = base::SysInfo::AmountOfFreeDiskSpace(path);
556
557  // Subtract this as if this portion does not exist.
558  free_space -= cryptohome::kMinFreeSpaceInBytes;
559  return (free_space >= num_bytes);
560}
561
562bool FileCache::RenameCacheFilesToNewFormat() {
563  base::FileEnumerator enumerator(cache_file_directory_,
564                                  false,  // not recursive
565                                  base::FileEnumerator::FILES);
566  for (base::FilePath current = enumerator.Next(); !current.empty();
567       current = enumerator.Next()) {
568    base::FilePath new_path = current.RemoveExtension();
569    if (!new_path.Extension().empty()) {
570      // Delete files with multiple extensions.
571      if (!base::DeleteFile(current, false /* recursive */))
572        return false;
573      continue;
574    }
575    const std::string& id = GetIdFromPath(new_path);
576    new_path = GetCacheFilePath(util::CanonicalizeResourceId(id));
577    if (new_path != current && !base::Move(current, new_path))
578      return false;
579  }
580  return true;
581}
582
583void FileCache::CloseForWrite(const std::string& id) {
584  AssertOnSequencedWorkerPool();
585
586  std::map<std::string, int>::iterator it = write_opened_files_.find(id);
587  if (it == write_opened_files_.end())
588    return;
589
590  DCHECK_LT(0, it->second);
591  --it->second;
592  if (it->second == 0)
593    write_opened_files_.erase(it);
594
595  // Update last modified date.
596  ResourceEntry entry;
597  FileError error = storage_->GetEntry(id, &entry);
598  if (error != FILE_ERROR_OK) {
599    LOG(ERROR) << "Failed to get entry: " << id << ", "
600               << FileErrorToString(error);
601    return;
602  }
603  entry.mutable_file_info()->set_last_modified(
604      base::Time::Now().ToInternalValue());
605  error = storage_->PutEntry(entry);
606  if (error != FILE_ERROR_OK) {
607    LOG(ERROR) << "Failed to put entry: " << id << ", "
608               << FileErrorToString(error);
609  }
610}
611
612}  // namespace internal
613}  // namespace drive
614