1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/storage_partition_impl_map.h"
6
7#include "base/bind.h"
8#include "base/callback.h"
9#include "base/files/file_enumerator.h"
10#include "base/files/file_path.h"
11#include "base/files/file_util.h"
12#include "base/stl_util.h"
13#include "base/strings/string_number_conversions.h"
14#include "base/strings/string_util.h"
15#include "base/strings/stringprintf.h"
16#include "base/threading/sequenced_worker_pool.h"
17#include "content/browser/appcache/chrome_appcache_service.h"
18#include "content/browser/fileapi/browser_file_system_helper.h"
19#include "content/browser/fileapi/chrome_blob_storage_context.h"
20#include "content/browser/loader/resource_request_info_impl.h"
21#include "content/browser/resource_context_impl.h"
22#include "content/browser/service_worker/service_worker_request_handler.h"
23#include "content/browser/storage_partition_impl.h"
24#include "content/browser/streams/stream.h"
25#include "content/browser/streams/stream_context.h"
26#include "content/browser/streams/stream_registry.h"
27#include "content/browser/streams/stream_url_request_job.h"
28#include "content/browser/webui/url_data_manager_backend.h"
29#include "content/public/browser/browser_context.h"
30#include "content/public/browser/browser_thread.h"
31#include "content/public/browser/content_browser_client.h"
32#include "content/public/browser/storage_partition.h"
33#include "content/public/common/content_constants.h"
34#include "content/public/common/url_constants.h"
35#include "crypto/sha2.h"
36#include "net/url_request/url_request_context.h"
37#include "net/url_request/url_request_context_getter.h"
38#include "storage/browser/blob/blob_storage_context.h"
39#include "storage/browser/blob/blob_url_request_job_factory.h"
40#include "storage/browser/fileapi/file_system_url_request_job_factory.h"
41#include "storage/common/blob/blob_data.h"
42
43using storage::FileSystemContext;
44using storage::BlobStorageContext;
45
46namespace content {
47
48namespace {
49
50// A derivative that knows about Streams too.
51class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler {
52 public:
53  BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context,
54                      StreamContext* stream_context,
55                      storage::FileSystemContext* file_system_context)
56      : blob_storage_context_(blob_storage_context),
57        stream_context_(stream_context),
58        file_system_context_(file_system_context) {}
59
60  virtual ~BlobProtocolHandler() {
61  }
62
63  virtual net::URLRequestJob* MaybeCreateJob(
64      net::URLRequest* request,
65      net::NetworkDelegate* network_delegate) const OVERRIDE {
66    scoped_refptr<Stream> stream =
67        stream_context_->registry()->GetStream(request->url());
68    if (stream.get())
69      return new StreamURLRequestJob(request, network_delegate, stream);
70
71    if (!blob_protocol_handler_) {
72      // Construction is deferred because 'this' is constructed on
73      // the main thread but we want blob_protocol_handler_ constructed
74      // on the IO thread.
75      blob_protocol_handler_.reset(new storage::BlobProtocolHandler(
76          blob_storage_context_->context(),
77          file_system_context_.get(),
78          BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE)
79              .get()));
80    }
81    return blob_protocol_handler_->MaybeCreateJob(request, network_delegate);
82  }
83
84 private:
85  const scoped_refptr<ChromeBlobStorageContext> blob_storage_context_;
86  const scoped_refptr<StreamContext> stream_context_;
87  const scoped_refptr<storage::FileSystemContext> file_system_context_;
88  mutable scoped_ptr<storage::BlobProtocolHandler> blob_protocol_handler_;
89  DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler);
90};
91
92// These constants are used to create the directory structure under the profile
93// where renderers with a non-default storage partition keep their persistent
94// state. This will contain a set of directories that partially mirror the
95// directory structure of BrowserContext::GetPath().
96//
97// The kStoragePartitionDirname contains an extensions directory which is
98// further partitioned by extension id, followed by another level of directories
99// for the "default" extension storage partition and one directory for each
100// persistent partition used by a webview tag. Example:
101//
102//   Storage/ext/ABCDEF/def
103//   Storage/ext/ABCDEF/hash(partition name)
104//
105// The code in GetStoragePartitionPath() constructs these path names.
106//
107// TODO(nasko): Move extension related path code out of content.
108const base::FilePath::CharType kStoragePartitionDirname[] =
109    FILE_PATH_LITERAL("Storage");
110const base::FilePath::CharType kExtensionsDirname[] =
111    FILE_PATH_LITERAL("ext");
112const base::FilePath::CharType kDefaultPartitionDirname[] =
113    FILE_PATH_LITERAL("def");
114const base::FilePath::CharType kTrashDirname[] =
115    FILE_PATH_LITERAL("trash");
116
117// Because partition names are user specified, they can be arbitrarily long
118// which makes them unsuitable for paths names. We use a truncation of a
119// SHA256 hash to perform a deterministic shortening of the string. The
120// kPartitionNameHashBytes constant controls the length of the truncation.
121// We use 6 bytes, which gives us 99.999% reliability against collisions over
122// 1 million partition domains.
123//
124// Analysis:
125// We assume that all partition names within one partition domain are
126// controlled by the the same entity. Thus there is no chance for adverserial
127// attack and all we care about is accidental collision. To get 5 9s over
128// 1 million domains, we need the probability of a collision in any one domain
129// to be
130//
131//    p < nroot(1000000, .99999) ~= 10^-11
132//
133// We use the following birthday attack approximation to calculate the max
134// number of unique names for this probability:
135//
136//    n(p,H) = sqrt(2*H * ln(1/(1-p)))
137//
138// For a 6-byte hash, H = 2^(6*8).  n(10^-11, H) ~= 75
139//
140// An average partition domain is likely to have less than 10 unique
141// partition names which is far lower than 75.
142//
143// Note, that for 4 9s of reliability, the limit is 237 partition names per
144// partition domain.
145const int kPartitionNameHashBytes = 6;
146
147// Needed for selecting all files in ObliterateOneDirectory() below.
148#if defined(OS_POSIX)
149const int kAllFileTypes = base::FileEnumerator::FILES |
150                          base::FileEnumerator::DIRECTORIES |
151                          base::FileEnumerator::SHOW_SYM_LINKS;
152#else
153const int kAllFileTypes = base::FileEnumerator::FILES |
154                          base::FileEnumerator::DIRECTORIES;
155#endif
156
157base::FilePath GetStoragePartitionDomainPath(
158    const std::string& partition_domain) {
159  CHECK(base::IsStringUTF8(partition_domain));
160
161  return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname)
162      .Append(base::FilePath::FromUTF8Unsafe(partition_domain));
163}
164
165// Helper function for doing a depth-first deletion of the data on disk.
166// Examines paths directly in |current_dir| (no recursion) and tries to
167// delete from disk anything that is in, or isn't a parent of something in
168// |paths_to_keep|. Paths that need further expansion are added to
169// |paths_to_consider|.
170void ObliterateOneDirectory(const base::FilePath& current_dir,
171                            const std::vector<base::FilePath>& paths_to_keep,
172                            std::vector<base::FilePath>* paths_to_consider) {
173  CHECK(current_dir.IsAbsolute());
174
175  base::FileEnumerator enumerator(current_dir, false, kAllFileTypes);
176  for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty();
177       to_delete = enumerator.Next()) {
178    // Enum tracking which of the 3 possible actions to take for |to_delete|.
179    enum { kSkip, kEnqueue, kDelete } action = kDelete;
180
181    for (std::vector<base::FilePath>::const_iterator to_keep =
182             paths_to_keep.begin();
183         to_keep != paths_to_keep.end();
184         ++to_keep) {
185      if (to_delete == *to_keep) {
186        action = kSkip;
187        break;
188      } else if (to_delete.IsParent(*to_keep)) {
189        // |to_delete| contains a path to keep. Add to stack for further
190        // processing.
191        action = kEnqueue;
192        break;
193      }
194    }
195
196    switch (action) {
197      case kDelete:
198        base::DeleteFile(to_delete, true);
199        break;
200
201      case kEnqueue:
202        paths_to_consider->push_back(to_delete);
203        break;
204
205      case kSkip:
206        break;
207    }
208  }
209}
210
211// Synchronously attempts to delete |unnormalized_root|, preserving only
212// entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on
213// disk, then it completely removes |unnormalized_root|. All paths must be
214// absolute paths.
215void BlockingObliteratePath(
216    const base::FilePath& unnormalized_browser_context_root,
217    const base::FilePath& unnormalized_root,
218    const std::vector<base::FilePath>& paths_to_keep,
219    const scoped_refptr<base::TaskRunner>& closure_runner,
220    const base::Closure& on_gc_required) {
221  // Early exit required because MakeAbsoluteFilePath() will fail on POSIX
222  // if |unnormalized_root| does not exist. This is safe because there is
223  // nothing to do in this situation anwyays.
224  if (!base::PathExists(unnormalized_root)) {
225    return;
226  }
227
228  // Never try to obliterate things outside of the browser context root or the
229  // browser context root itself. Die hard.
230  base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root);
231  base::FilePath browser_context_root =
232      base::MakeAbsoluteFilePath(unnormalized_browser_context_root);
233  CHECK(!root.empty());
234  CHECK(!browser_context_root.empty());
235  CHECK(browser_context_root.IsParent(root) && browser_context_root != root);
236
237  // Reduce |paths_to_keep| set to those under the root and actually on disk.
238  std::vector<base::FilePath> valid_paths_to_keep;
239  for (std::vector<base::FilePath>::const_iterator it = paths_to_keep.begin();
240       it != paths_to_keep.end();
241       ++it) {
242    if (root.IsParent(*it) && base::PathExists(*it))
243      valid_paths_to_keep.push_back(*it);
244  }
245
246  // If none of the |paths_to_keep| are valid anymore then we just whack the
247  // root and be done with it.  Otherwise, signal garbage collection and do
248  // a best-effort delete of the on-disk structures.
249  if (valid_paths_to_keep.empty()) {
250    base::DeleteFile(root, true);
251    return;
252  }
253  closure_runner->PostTask(FROM_HERE, on_gc_required);
254
255  // Otherwise, start at the root and delete everything that is not in
256  // |valid_paths_to_keep|.
257  std::vector<base::FilePath> paths_to_consider;
258  paths_to_consider.push_back(root);
259  while(!paths_to_consider.empty()) {
260    base::FilePath path = paths_to_consider.back();
261    paths_to_consider.pop_back();
262    ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider);
263  }
264}
265
266// Ensures each path in |active_paths| is a direct child of storage_root.
267void NormalizeActivePaths(const base::FilePath& storage_root,
268                          base::hash_set<base::FilePath>* active_paths) {
269  base::hash_set<base::FilePath> normalized_active_paths;
270
271  for (base::hash_set<base::FilePath>::iterator iter = active_paths->begin();
272       iter != active_paths->end(); ++iter) {
273    base::FilePath relative_path;
274    if (!storage_root.AppendRelativePath(*iter, &relative_path))
275      continue;
276
277    std::vector<base::FilePath::StringType> components;
278    relative_path.GetComponents(&components);
279
280    DCHECK(!relative_path.empty());
281    normalized_active_paths.insert(storage_root.Append(components.front()));
282  }
283
284  active_paths->swap(normalized_active_paths);
285}
286
287// Deletes all entries inside the |storage_root| that are not in the
288// |active_paths|.  Deletion is done in 2 steps:
289//
290//   (1) Moving all garbage collected paths into a trash directory.
291//   (2) Asynchronously deleting the trash directory.
292//
293// The deletion is asynchronous because after (1) completes, calling code can
294// safely continue to use the paths that had just been garbage collected
295// without fear of race conditions.
296//
297// This code also ignores failed moves rather than attempting a smarter retry.
298// Moves shouldn't fail here unless there is some out-of-band error (eg.,
299// FS corruption). Retry logic is dangerous in the general case because
300// there is not necessarily a guaranteed case where the logic may succeed.
301//
302// This function is still named BlockingGarbageCollect() because it does
303// execute a few filesystem operations synchronously.
304void BlockingGarbageCollect(
305    const base::FilePath& storage_root,
306    const scoped_refptr<base::TaskRunner>& file_access_runner,
307    scoped_ptr<base::hash_set<base::FilePath> > active_paths) {
308  CHECK(storage_root.IsAbsolute());
309
310  NormalizeActivePaths(storage_root, active_paths.get());
311
312  base::FileEnumerator enumerator(storage_root, false, kAllFileTypes);
313  base::FilePath trash_directory;
314  if (!base::CreateTemporaryDirInDir(storage_root, kTrashDirname,
315                                     &trash_directory)) {
316    // Unable to continue without creating the trash directory so give up.
317    return;
318  }
319  for (base::FilePath path = enumerator.Next(); !path.empty();
320       path = enumerator.Next()) {
321    if (active_paths->find(path) == active_paths->end() &&
322        path != trash_directory) {
323      // Since |trash_directory| is unique for each run of this function there
324      // can be no colllisions on the move.
325      base::Move(path, trash_directory.Append(path.BaseName()));
326    }
327  }
328
329  file_access_runner->PostTask(
330      FROM_HERE,
331      base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true));
332}
333
334}  // namespace
335
336// static
337base::FilePath StoragePartitionImplMap::GetStoragePartitionPath(
338    const std::string& partition_domain,
339    const std::string& partition_name) {
340  if (partition_domain.empty())
341    return base::FilePath();
342
343  base::FilePath path = GetStoragePartitionDomainPath(partition_domain);
344
345  // TODO(ajwong): Mangle in-memory into this somehow, either by putting
346  // it into the partition_name, or by manually adding another path component
347  // here.  Otherwise, it's possible to have an in-memory StoragePartition and
348  // a persistent one that return the same FilePath for GetPath().
349  if (!partition_name.empty()) {
350    // For analysis of why we can ignore collisions, see the comment above
351    // kPartitionNameHashBytes.
352    char buffer[kPartitionNameHashBytes];
353    crypto::SHA256HashString(partition_name, &buffer[0],
354                             sizeof(buffer));
355    return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer)));
356  }
357
358  return path.Append(kDefaultPartitionDirname);
359}
360
361StoragePartitionImplMap::StoragePartitionImplMap(
362    BrowserContext* browser_context)
363    : browser_context_(browser_context),
364      resource_context_initialized_(false) {
365  // Doing here instead of initializer list cause it's just too ugly to read.
366  base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool();
367  file_access_runner_ =
368      blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken());
369}
370
371StoragePartitionImplMap::~StoragePartitionImplMap() {
372  STLDeleteContainerPairSecondPointers(partitions_.begin(),
373                                       partitions_.end());
374}
375
376StoragePartitionImpl* StoragePartitionImplMap::Get(
377    const std::string& partition_domain,
378    const std::string& partition_name,
379    bool in_memory) {
380  // Find the previously created partition if it's available.
381  StoragePartitionConfig partition_config(
382      partition_domain, partition_name, in_memory);
383
384  PartitionMap::const_iterator it = partitions_.find(partition_config);
385  if (it != partitions_.end())
386    return it->second;
387
388  base::FilePath partition_path =
389      browser_context_->GetPath().Append(
390          GetStoragePartitionPath(partition_domain, partition_name));
391  StoragePartitionImpl* partition =
392      StoragePartitionImpl::Create(browser_context_, in_memory,
393                                   partition_path);
394  partitions_[partition_config] = partition;
395
396  ChromeBlobStorageContext* blob_storage_context =
397      ChromeBlobStorageContext::GetFor(browser_context_);
398  StreamContext* stream_context = StreamContext::GetFor(browser_context_);
399  ProtocolHandlerMap protocol_handlers;
400  protocol_handlers[url::kBlobScheme] =
401      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
402          new BlobProtocolHandler(blob_storage_context,
403                                  stream_context,
404                                  partition->GetFileSystemContext()));
405  protocol_handlers[url::kFileSystemScheme] =
406      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
407          CreateFileSystemProtocolHandler(partition_domain,
408                                          partition->GetFileSystemContext()));
409  protocol_handlers[kChromeUIScheme] =
410      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
411          URLDataManagerBackend::CreateProtocolHandler(
412              browser_context_->GetResourceContext(),
413              browser_context_->IsOffTheRecord(),
414              partition->GetAppCacheService(),
415              blob_storage_context));
416  std::vector<std::string> additional_webui_schemes;
417  GetContentClient()->browser()->GetAdditionalWebUISchemes(
418      &additional_webui_schemes);
419  for (std::vector<std::string>::const_iterator it =
420           additional_webui_schemes.begin();
421       it != additional_webui_schemes.end();
422       ++it) {
423    protocol_handlers[*it] =
424        linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
425            URLDataManagerBackend::CreateProtocolHandler(
426                browser_context_->GetResourceContext(),
427                browser_context_->IsOffTheRecord(),
428                partition->GetAppCacheService(),
429                blob_storage_context));
430  }
431  protocol_handlers[kChromeDevToolsScheme] =
432      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
433          CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(),
434                                        browser_context_->IsOffTheRecord()));
435
436  URLRequestInterceptorScopedVector request_interceptors;
437  request_interceptors.push_back(
438      ServiceWorkerRequestHandler::CreateInterceptor().release());
439
440  // These calls must happen after StoragePartitionImpl::Create().
441  if (partition_domain.empty()) {
442    partition->SetURLRequestContext(
443        GetContentClient()->browser()->CreateRequestContext(
444            browser_context_,
445            &protocol_handlers,
446            request_interceptors.Pass()));
447  } else {
448    partition->SetURLRequestContext(
449        GetContentClient()->browser()->CreateRequestContextForStoragePartition(
450            browser_context_,
451            partition->GetPath(),
452            in_memory,
453            &protocol_handlers,
454            request_interceptors.Pass()));
455  }
456  partition->SetMediaURLRequestContext(
457      partition_domain.empty() ?
458      browser_context_->GetMediaRequestContext() :
459      browser_context_->GetMediaRequestContextForStoragePartition(
460          partition->GetPath(), in_memory));
461
462  PostCreateInitialization(partition, in_memory);
463
464  return partition;
465}
466
467void StoragePartitionImplMap::AsyncObliterate(
468    const GURL& site,
469    const base::Closure& on_gc_required) {
470  // This method should avoid creating any StoragePartition (which would
471  // create more open file handles) so that it can delete as much of the
472  // data off disk as possible.
473  std::string partition_domain;
474  std::string partition_name;
475  bool in_memory = false;
476  GetContentClient()->browser()->GetStoragePartitionConfigForSite(
477      browser_context_, site, false, &partition_domain,
478      &partition_name, &in_memory);
479
480  // Find the active partitions for the domain. Because these partitions are
481  // active, it is not possible to just delete the directories that contain
482  // the backing data structures without causing the browser to crash. Instead,
483  // of deleteing the directory, we tell each storage context later to
484  // remove any data they have saved. This will leave the directory structure
485  // intact but it will only contain empty databases.
486  std::vector<StoragePartitionImpl*> active_partitions;
487  std::vector<base::FilePath> paths_to_keep;
488  for (PartitionMap::const_iterator it = partitions_.begin();
489       it != partitions_.end();
490       ++it) {
491    const StoragePartitionConfig& config = it->first;
492    if (config.partition_domain == partition_domain) {
493      it->second->ClearData(
494          // All except shader cache.
495          ~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE,
496          StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL,
497          GURL(),
498          StoragePartition::OriginMatcherFunction(),
499          base::Time(), base::Time::Max(),
500          base::Bind(&base::DoNothing));
501      if (!config.in_memory) {
502        paths_to_keep.push_back(it->second->GetPath());
503      }
504    }
505  }
506
507  // Start a best-effort delete of the on-disk storage excluding paths that are
508  // known to still be in use. This is to delete any previously created
509  // StoragePartition state that just happens to not have been used during this
510  // run of the browser.
511  base::FilePath domain_root = browser_context_->GetPath().Append(
512      GetStoragePartitionDomainPath(partition_domain));
513
514  BrowserThread::PostBlockingPoolTask(
515      FROM_HERE,
516      base::Bind(&BlockingObliteratePath, browser_context_->GetPath(),
517                 domain_root, paths_to_keep,
518                 base::MessageLoopProxy::current(), on_gc_required));
519}
520
521void StoragePartitionImplMap::GarbageCollect(
522    scoped_ptr<base::hash_set<base::FilePath> > active_paths,
523    const base::Closure& done) {
524  // Include all paths for current StoragePartitions in the active_paths since
525  // they cannot be deleted safely.
526  for (PartitionMap::const_iterator it = partitions_.begin();
527       it != partitions_.end();
528       ++it) {
529    const StoragePartitionConfig& config = it->first;
530    if (!config.in_memory)
531      active_paths->insert(it->second->GetPath());
532  }
533
534  // Find the directory holding the StoragePartitions and delete everything in
535  // there that isn't considered active.
536  base::FilePath storage_root = browser_context_->GetPath().Append(
537      GetStoragePartitionDomainPath(std::string()));
538  file_access_runner_->PostTaskAndReply(
539      FROM_HERE,
540      base::Bind(&BlockingGarbageCollect, storage_root,
541                 file_access_runner_,
542                 base::Passed(&active_paths)),
543      done);
544}
545
546void StoragePartitionImplMap::ForEach(
547    const BrowserContext::StoragePartitionCallback& callback) {
548  for (PartitionMap::const_iterator it = partitions_.begin();
549       it != partitions_.end();
550       ++it) {
551    callback.Run(it->second);
552  }
553}
554
555void StoragePartitionImplMap::PostCreateInitialization(
556    StoragePartitionImpl* partition,
557    bool in_memory) {
558  // TODO(ajwong): ResourceContexts no longer have any storage related state.
559  // We should move this into a place where it is called once per
560  // BrowserContext creation rather than piggybacking off the default context
561  // creation.
562  // Note: moving this into Get() before partitions_[] is set causes reentrency.
563  if (!resource_context_initialized_) {
564    resource_context_initialized_ = true;
565    InitializeResourceContext(browser_context_);
566  }
567
568  // Check first to avoid memory leak in unittests.
569  if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) {
570    BrowserThread::PostTask(
571        BrowserThread::IO, FROM_HERE,
572        base::Bind(&ChromeAppCacheService::InitializeOnIOThread,
573                   partition->GetAppCacheService(),
574                   in_memory ? base::FilePath() :
575                       partition->GetPath().Append(kAppCacheDirname),
576                   browser_context_->GetResourceContext(),
577                   make_scoped_refptr(partition->GetURLRequestContext()),
578                   make_scoped_refptr(
579                       browser_context_->GetSpecialStoragePolicy())));
580
581    BrowserThread::PostTask(
582        BrowserThread::IO,
583        FROM_HERE,
584        base::Bind(&ServiceWorkerContextWrapper::SetBlobParametersForCache,
585                   partition->GetServiceWorkerContext(),
586                   make_scoped_refptr(partition->GetURLRequestContext()),
587                   make_scoped_refptr(
588                       ChromeBlobStorageContext::GetFor(browser_context_))));
589
590    // We do not call InitializeURLRequestContext() for media contexts because,
591    // other than the HTTP cache, the media contexts share the same backing
592    // objects as their associated "normal" request context.  Thus, the previous
593    // call serves to initialize the media request context for this storage
594    // partition as well.
595  }
596}
597
598}  // namespace content
599