storage_partition_impl_map.cc revision bbcdd45c55eb7c4641ab97aef9889b0fc828e7d3
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/storage_partition_impl_map.h"
6
7#include "base/bind.h"
8#include "base/callback.h"
9#include "base/file_util.h"
10#include "base/files/file_enumerator.h"
11#include "base/files/file_path.h"
12#include "base/stl_util.h"
13#include "base/strings/string_number_conversions.h"
14#include "base/strings/string_util.h"
15#include "base/strings/stringprintf.h"
16#include "base/threading/sequenced_worker_pool.h"
17#include "content/browser/appcache/chrome_appcache_service.h"
18#include "content/browser/fileapi/browser_file_system_helper.h"
19#include "content/browser/fileapi/chrome_blob_storage_context.h"
20#include "content/browser/loader/resource_request_info_impl.h"
21#include "content/browser/resource_context_impl.h"
22#include "content/browser/storage_partition_impl.h"
23#include "content/browser/streams/stream.h"
24#include "content/browser/streams/stream_context.h"
25#include "content/browser/streams/stream_registry.h"
26#include "content/browser/streams/stream_url_request_job.h"
27#include "content/browser/webui/url_data_manager_backend.h"
28#include "content/public/browser/browser_context.h"
29#include "content/public/browser/browser_thread.h"
30#include "content/public/browser/content_browser_client.h"
31#include "content/public/browser/storage_partition.h"
32#include "content/public/common/content_constants.h"
33#include "content/public/common/url_constants.h"
34#include "crypto/sha2.h"
35#include "net/url_request/url_request_context.h"
36#include "net/url_request/url_request_context_getter.h"
37#include "webkit/browser/blob/blob_url_request_job_factory.h"
38#include "webkit/browser/fileapi/file_system_url_request_job_factory.h"
39#include "webkit/common/blob/blob_data.h"
40
41using appcache::AppCacheService;
42using fileapi::FileSystemContext;
43using webkit_blob::BlobStorageController;
44
45namespace content {
46
47namespace {
48
49class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler {
50 public:
51  BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context,
52                      StreamContext* stream_context,
53                      fileapi::FileSystemContext* file_system_context)
54      : blob_storage_context_(blob_storage_context),
55        stream_context_(stream_context),
56        file_system_context_(file_system_context) {}
57
58  virtual ~BlobProtocolHandler() {}
59
60  virtual net::URLRequestJob* MaybeCreateJob(
61      net::URLRequest* request,
62      net::NetworkDelegate* network_delegate) const OVERRIDE {
63    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
64    if (!webkit_blob_protocol_handler_impl_) {
65      webkit_blob_protocol_handler_impl_.reset(
66          new WebKitBlobProtocolHandlerImpl(blob_storage_context_->controller(),
67                                            stream_context_.get(),
68                                            file_system_context_.get()));
69    }
70    return webkit_blob_protocol_handler_impl_->MaybeCreateJob(request,
71                                                              network_delegate);
72  }
73
74 private:
75  // An implementation of webkit_blob::BlobProtocolHandler that gets
76  // the BlobData from ResourceRequestInfoImpl.
77  class WebKitBlobProtocolHandlerImpl
78      : public webkit_blob::BlobProtocolHandler {
79   public:
80    WebKitBlobProtocolHandlerImpl(
81        webkit_blob::BlobStorageController* blob_storage_controller,
82        StreamContext* stream_context,
83        fileapi::FileSystemContext* file_system_context)
84        : webkit_blob::BlobProtocolHandler(
85              blob_storage_controller,
86              file_system_context,
87              BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE)
88                  .get()),
89          stream_context_(stream_context) {}
90
91    virtual ~WebKitBlobProtocolHandlerImpl() {}
92
93    virtual net::URLRequestJob* MaybeCreateJob(
94        net::URLRequest* request,
95        net::NetworkDelegate* network_delegate) const OVERRIDE {
96      scoped_refptr<Stream> stream =
97          stream_context_->registry()->GetStream(request->url());
98      if (stream.get())
99        return new StreamURLRequestJob(request, network_delegate, stream);
100
101      return webkit_blob::BlobProtocolHandler::MaybeCreateJob(
102          request, network_delegate);
103    }
104
105   private:
106    // webkit_blob::BlobProtocolHandler implementation.
107    virtual scoped_refptr<webkit_blob::BlobData>
108        LookupBlobData(net::URLRequest* request) const OVERRIDE {
109      const ResourceRequestInfoImpl* info =
110          ResourceRequestInfoImpl::ForRequest(request);
111      if (!info)
112        return NULL;
113      return info->requested_blob_data();
114    }
115
116    const scoped_refptr<StreamContext> stream_context_;
117    DISALLOW_COPY_AND_ASSIGN(WebKitBlobProtocolHandlerImpl);
118  };
119
120  const scoped_refptr<ChromeBlobStorageContext> blob_storage_context_;
121  const scoped_refptr<StreamContext> stream_context_;
122  const scoped_refptr<fileapi::FileSystemContext> file_system_context_;
123
124  mutable scoped_ptr<WebKitBlobProtocolHandlerImpl>
125  webkit_blob_protocol_handler_impl_;
126
127  DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler);
128};
129
130// These constants are used to create the directory structure under the profile
131// where renderers with a non-default storage partition keep their persistent
132// state. This will contain a set of directories that partially mirror the
133// directory structure of BrowserContext::GetPath().
134//
135// The kStoragePartitionDirname contains an extensions directory which is
136// further partitioned by extension id, followed by another level of directories
137// for the "default" extension storage partition and one directory for each
138// persistent partition used by a webview tag. Example:
139//
140//   Storage/ext/ABCDEF/def
141//   Storage/ext/ABCDEF/hash(partition name)
142//
143// The code in GetStoragePartitionPath() constructs these path names.
144//
145// TODO(nasko): Move extension related path code out of content.
146const base::FilePath::CharType kStoragePartitionDirname[] =
147    FILE_PATH_LITERAL("Storage");
148const base::FilePath::CharType kExtensionsDirname[] =
149    FILE_PATH_LITERAL("ext");
150const base::FilePath::CharType kDefaultPartitionDirname[] =
151    FILE_PATH_LITERAL("def");
152const base::FilePath::CharType kTrashDirname[] =
153    FILE_PATH_LITERAL("trash");
154
155// Because partition names are user specified, they can be arbitrarily long
156// which makes them unsuitable for paths names. We use a truncation of a
157// SHA256 hash to perform a deterministic shortening of the string. The
158// kPartitionNameHashBytes constant controls the length of the truncation.
159// We use 6 bytes, which gives us 99.999% reliability against collisions over
160// 1 million partition domains.
161//
162// Analysis:
163// We assume that all partition names within one partition domain are
164// controlled by the the same entity. Thus there is no chance for adverserial
165// attack and all we care about is accidental collision. To get 5 9s over
166// 1 million domains, we need the probability of a collision in any one domain
167// to be
168//
169//    p < nroot(1000000, .99999) ~= 10^-11
170//
171// We use the following birthday attack approximation to calculate the max
172// number of unique names for this probability:
173//
174//    n(p,H) = sqrt(2*H * ln(1/(1-p)))
175//
176// For a 6-byte hash, H = 2^(6*8).  n(10^-11, H) ~= 75
177//
178// An average partition domain is likely to have less than 10 unique
179// partition names which is far lower than 75.
180//
181// Note, that for 4 9s of reliability, the limit is 237 partition names per
182// partition domain.
183const int kPartitionNameHashBytes = 6;
184
185// Needed for selecting all files in ObliterateOneDirectory() below.
186#if defined(OS_POSIX)
187const int kAllFileTypes = base::FileEnumerator::FILES |
188                          base::FileEnumerator::DIRECTORIES |
189                          base::FileEnumerator::SHOW_SYM_LINKS;
190#else
191const int kAllFileTypes = base::FileEnumerator::FILES |
192                          base::FileEnumerator::DIRECTORIES;
193#endif
194
195base::FilePath GetStoragePartitionDomainPath(
196    const std::string& partition_domain) {
197  CHECK(IsStringUTF8(partition_domain));
198
199  return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname)
200      .Append(base::FilePath::FromUTF8Unsafe(partition_domain));
201}
202
203// Helper function for doing a depth-first deletion of the data on disk.
204// Examines paths directly in |current_dir| (no recursion) and tries to
205// delete from disk anything that is in, or isn't a parent of something in
206// |paths_to_keep|. Paths that need further expansion are added to
207// |paths_to_consider|.
208void ObliterateOneDirectory(const base::FilePath& current_dir,
209                            const std::vector<base::FilePath>& paths_to_keep,
210                            std::vector<base::FilePath>* paths_to_consider) {
211  CHECK(current_dir.IsAbsolute());
212
213  base::FileEnumerator enumerator(current_dir, false, kAllFileTypes);
214  for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty();
215       to_delete = enumerator.Next()) {
216    // Enum tracking which of the 3 possible actions to take for |to_delete|.
217    enum { kSkip, kEnqueue, kDelete } action = kDelete;
218
219    for (std::vector<base::FilePath>::const_iterator to_keep =
220             paths_to_keep.begin();
221         to_keep != paths_to_keep.end();
222         ++to_keep) {
223      if (to_delete == *to_keep) {
224        action = kSkip;
225        break;
226      } else if (to_delete.IsParent(*to_keep)) {
227        // |to_delete| contains a path to keep. Add to stack for further
228        // processing.
229        action = kEnqueue;
230        break;
231      }
232    }
233
234    switch (action) {
235      case kDelete:
236        base::DeleteFile(to_delete, true);
237        break;
238
239      case kEnqueue:
240        paths_to_consider->push_back(to_delete);
241        break;
242
243      case kSkip:
244        break;
245    }
246  }
247}
248
249// Synchronously attempts to delete |unnormalized_root|, preserving only
250// entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on
251// disk, then it completely removes |unnormalized_root|. All paths must be
252// absolute paths.
253void BlockingObliteratePath(
254    const base::FilePath& unnormalized_browser_context_root,
255    const base::FilePath& unnormalized_root,
256    const std::vector<base::FilePath>& paths_to_keep,
257    const scoped_refptr<base::TaskRunner>& closure_runner,
258    const base::Closure& on_gc_required) {
259  // Early exit required because MakeAbsoluteFilePath() will fail on POSIX
260  // if |unnormalized_root| does not exist. This is safe because there is
261  // nothing to do in this situation anwyays.
262  if (!base::PathExists(unnormalized_root)) {
263    return;
264  }
265
266  // Never try to obliterate things outside of the browser context root or the
267  // browser context root itself. Die hard.
268  base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root);
269  base::FilePath browser_context_root =
270      base::MakeAbsoluteFilePath(unnormalized_browser_context_root);
271  CHECK(!root.empty());
272  CHECK(!browser_context_root.empty());
273  CHECK(browser_context_root.IsParent(root) && browser_context_root != root);
274
275  // Reduce |paths_to_keep| set to those under the root and actually on disk.
276  std::vector<base::FilePath> valid_paths_to_keep;
277  for (std::vector<base::FilePath>::const_iterator it = paths_to_keep.begin();
278       it != paths_to_keep.end();
279       ++it) {
280    if (root.IsParent(*it) && base::PathExists(*it))
281      valid_paths_to_keep.push_back(*it);
282  }
283
284  // If none of the |paths_to_keep| are valid anymore then we just whack the
285  // root and be done with it.  Otherwise, signal garbage collection and do
286  // a best-effort delete of the on-disk structures.
287  if (valid_paths_to_keep.empty()) {
288    base::DeleteFile(root, true);
289    return;
290  }
291  closure_runner->PostTask(FROM_HERE, on_gc_required);
292
293  // Otherwise, start at the root and delete everything that is not in
294  // |valid_paths_to_keep|.
295  std::vector<base::FilePath> paths_to_consider;
296  paths_to_consider.push_back(root);
297  while(!paths_to_consider.empty()) {
298    base::FilePath path = paths_to_consider.back();
299    paths_to_consider.pop_back();
300    ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider);
301  }
302}
303
304// Deletes all entries inside the |storage_root| that are not in the
305// |active_paths|.  Deletion is done in 2 steps:
306//
307//   (1) Moving all garbage collected paths into a trash directory.
308//   (2) Asynchronously deleting the trash directory.
309//
310// The deletion is asynchronous because after (1) completes, calling code can
311// safely continue to use the paths that had just been garbage collected
312// without fear of race conditions.
313//
314// This code also ignores failed moves rather than attempting a smarter retry.
315// Moves shouldn't fail here unless there is some out-of-band error (eg.,
316// FS corruption). Retry logic is dangerous in the general case because
317// there is not necessarily a guaranteed case where the logic may succeed.
318//
319// This function is still named BlockingGarbageCollect() because it does
320// execute a few filesystem operations synchronously.
321void BlockingGarbageCollect(
322    const base::FilePath& storage_root,
323    const scoped_refptr<base::TaskRunner>& file_access_runner,
324    scoped_ptr<base::hash_set<base::FilePath> > active_paths) {
325  CHECK(storage_root.IsAbsolute());
326
327  base::FileEnumerator enumerator(storage_root, false, kAllFileTypes);
328  base::FilePath trash_directory;
329  if (!file_util::CreateTemporaryDirInDir(storage_root, kTrashDirname,
330                                          &trash_directory)) {
331    // Unable to continue without creating the trash directory so give up.
332    return;
333  }
334  for (base::FilePath path = enumerator.Next(); !path.empty();
335       path = enumerator.Next()) {
336    if (active_paths->find(path) == active_paths->end() &&
337        path != trash_directory) {
338      // Since |trash_directory| is unique for each run of this function there
339      // can be no colllisions on the move.
340      base::Move(path, trash_directory.Append(path.BaseName()));
341    }
342  }
343
344  file_access_runner->PostTask(
345      FROM_HERE,
346      base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true));
347}
348
349}  // namespace
350
351// static
352base::FilePath StoragePartitionImplMap::GetStoragePartitionPath(
353    const std::string& partition_domain,
354    const std::string& partition_name) {
355  if (partition_domain.empty())
356    return base::FilePath();
357
358  base::FilePath path = GetStoragePartitionDomainPath(partition_domain);
359
360  // TODO(ajwong): Mangle in-memory into this somehow, either by putting
361  // it into the partition_name, or by manually adding another path component
362  // here.  Otherwise, it's possible to have an in-memory StoragePartition and
363  // a persistent one that return the same FilePath for GetPath().
364  if (!partition_name.empty()) {
365    // For analysis of why we can ignore collisions, see the comment above
366    // kPartitionNameHashBytes.
367    char buffer[kPartitionNameHashBytes];
368    crypto::SHA256HashString(partition_name, &buffer[0],
369                             sizeof(buffer));
370    return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer)));
371  }
372
373  return path.Append(kDefaultPartitionDirname);
374}
375
376StoragePartitionImplMap::StoragePartitionImplMap(
377    BrowserContext* browser_context)
378    : browser_context_(browser_context),
379      resource_context_initialized_(false) {
380  // Doing here instead of initializer list cause it's just too ugly to read.
381  base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool();
382  file_access_runner_ =
383      blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken());
384}
385
386StoragePartitionImplMap::~StoragePartitionImplMap() {
387  STLDeleteContainerPairSecondPointers(partitions_.begin(),
388                                       partitions_.end());
389}
390
391StoragePartitionImpl* StoragePartitionImplMap::Get(
392    const std::string& partition_domain,
393    const std::string& partition_name,
394    bool in_memory) {
395  // Find the previously created partition if it's available.
396  StoragePartitionConfig partition_config(
397      partition_domain, partition_name, in_memory);
398
399  PartitionMap::const_iterator it = partitions_.find(partition_config);
400  if (it != partitions_.end())
401    return it->second;
402
403  base::FilePath partition_path =
404      browser_context_->GetPath().Append(
405          GetStoragePartitionPath(partition_domain, partition_name));
406  StoragePartitionImpl* partition =
407      StoragePartitionImpl::Create(browser_context_, in_memory,
408                                   partition_path);
409  partitions_[partition_config] = partition;
410
411  ChromeBlobStorageContext* blob_storage_context =
412      ChromeBlobStorageContext::GetFor(browser_context_);
413  StreamContext* stream_context = StreamContext::GetFor(browser_context_);
414  ProtocolHandlerMap protocol_handlers;
415  protocol_handlers[chrome::kBlobScheme] =
416      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
417          new BlobProtocolHandler(blob_storage_context,
418                                  stream_context,
419                                  partition->GetFileSystemContext()));
420  protocol_handlers[chrome::kFileSystemScheme] =
421      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
422          CreateFileSystemProtocolHandler(partition->GetFileSystemContext()));
423  protocol_handlers[chrome::kChromeUIScheme] =
424      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
425          URLDataManagerBackend::CreateProtocolHandler(
426              browser_context_->GetResourceContext(),
427              browser_context_->IsOffTheRecord(),
428              partition->GetAppCacheService(),
429              blob_storage_context));
430  std::vector<std::string> additional_webui_schemes;
431  GetContentClient()->browser()->GetAdditionalWebUISchemes(
432      &additional_webui_schemes);
433  for (std::vector<std::string>::const_iterator it =
434           additional_webui_schemes.begin();
435       it != additional_webui_schemes.end();
436       ++it) {
437    protocol_handlers[*it] =
438        linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
439            URLDataManagerBackend::CreateProtocolHandler(
440                browser_context_->GetResourceContext(),
441                browser_context_->IsOffTheRecord(),
442                partition->GetAppCacheService(),
443                blob_storage_context));
444  }
445  protocol_handlers[chrome::kChromeDevToolsScheme] =
446      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
447          CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(),
448                                        browser_context_->IsOffTheRecord()));
449
450  // These calls must happen after StoragePartitionImpl::Create().
451  if (partition_domain.empty()) {
452    partition->SetURLRequestContext(
453        GetContentClient()->browser()->CreateRequestContext(
454            browser_context_,
455            &protocol_handlers));
456  } else {
457    partition->SetURLRequestContext(
458        GetContentClient()->browser()->CreateRequestContextForStoragePartition(
459            browser_context_, partition->GetPath(), in_memory,
460            &protocol_handlers));
461  }
462  partition->SetMediaURLRequestContext(
463      partition_domain.empty() ?
464      browser_context_->GetMediaRequestContext() :
465      browser_context_->GetMediaRequestContextForStoragePartition(
466          partition->GetPath(), in_memory));
467
468  PostCreateInitialization(partition, in_memory);
469
470  return partition;
471}
472
473void StoragePartitionImplMap::AsyncObliterate(
474    const GURL& site,
475    const base::Closure& on_gc_required) {
476  // This method should avoid creating any StoragePartition (which would
477  // create more open file handles) so that it can delete as much of the
478  // data off disk as possible.
479  std::string partition_domain;
480  std::string partition_name;
481  bool in_memory = false;
482  GetContentClient()->browser()->GetStoragePartitionConfigForSite(
483      browser_context_, site, false, &partition_domain,
484      &partition_name, &in_memory);
485
486  // Find the active partitions for the domain. Because these partitions are
487  // active, it is not possible to just delete the directories that contain
488  // the backing data structures without causing the browser to crash. Instead,
489  // of deleteing the directory, we tell each storage context later to
490  // remove any data they have saved. This will leave the directory structure
491  // intact but it will only contain empty databases.
492  std::vector<StoragePartitionImpl*> active_partitions;
493  std::vector<base::FilePath> paths_to_keep;
494  for (PartitionMap::const_iterator it = partitions_.begin();
495       it != partitions_.end();
496       ++it) {
497    const StoragePartitionConfig& config = it->first;
498    if (config.partition_domain == partition_domain) {
499      it->second->ClearDataForUnboundedRange(
500          StoragePartition::REMOVE_DATA_MASK_ALL,
501          StoragePartition::kAllStorage);
502      if (!config.in_memory) {
503        paths_to_keep.push_back(it->second->GetPath());
504      }
505    }
506  }
507
508  // Start a best-effort delete of the on-disk storage excluding paths that are
509  // known to still be in use. This is to delete any previously created
510  // StoragePartition state that just happens to not have been used during this
511  // run of the browser.
512  base::FilePath domain_root = browser_context_->GetPath().Append(
513      GetStoragePartitionDomainPath(partition_domain));
514
515  BrowserThread::PostBlockingPoolTask(
516      FROM_HERE,
517      base::Bind(&BlockingObliteratePath, browser_context_->GetPath(),
518                 domain_root, paths_to_keep,
519                 base::MessageLoopProxy::current(), on_gc_required));
520}
521
522void StoragePartitionImplMap::GarbageCollect(
523    scoped_ptr<base::hash_set<base::FilePath> > active_paths,
524    const base::Closure& done) {
525  // Include all paths for current StoragePartitions in the active_paths since
526  // they cannot be deleted safely.
527  for (PartitionMap::const_iterator it = partitions_.begin();
528       it != partitions_.end();
529       ++it) {
530    const StoragePartitionConfig& config = it->first;
531    if (!config.in_memory)
532      active_paths->insert(it->second->GetPath());
533  }
534
535  // Find the directory holding the StoragePartitions and delete everything in
536  // there that isn't considered active.
537  base::FilePath storage_root = browser_context_->GetPath().Append(
538      GetStoragePartitionDomainPath(std::string()));
539  file_access_runner_->PostTaskAndReply(
540      FROM_HERE,
541      base::Bind(&BlockingGarbageCollect, storage_root,
542                 file_access_runner_,
543                 base::Passed(&active_paths)),
544      done);
545}
546
547void StoragePartitionImplMap::ForEach(
548    const BrowserContext::StoragePartitionCallback& callback) {
549  for (PartitionMap::const_iterator it = partitions_.begin();
550       it != partitions_.end();
551       ++it) {
552    callback.Run(it->second);
553  }
554}
555
556void StoragePartitionImplMap::PostCreateInitialization(
557    StoragePartitionImpl* partition,
558    bool in_memory) {
559  // TODO(ajwong): ResourceContexts no longer have any storage related state.
560  // We should move this into a place where it is called once per
561  // BrowserContext creation rather than piggybacking off the default context
562  // creation.
563  // Note: moving this into Get() before partitions_[] is set causes reentrency.
564  if (!resource_context_initialized_) {
565    resource_context_initialized_ = true;
566    InitializeResourceContext(browser_context_);
567  }
568
569  // Check first to avoid memory leak in unittests.
570  if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) {
571    BrowserThread::PostTask(
572        BrowserThread::IO, FROM_HERE,
573        base::Bind(&ChromeAppCacheService::InitializeOnIOThread,
574                   partition->GetAppCacheService(),
575                   in_memory ? base::FilePath() :
576                       partition->GetPath().Append(kAppCacheDirname),
577                   browser_context_->GetResourceContext(),
578                   make_scoped_refptr(partition->GetURLRequestContext()),
579                   make_scoped_refptr(
580                       browser_context_->GetSpecialStoragePolicy())));
581
582    // We do not call InitializeURLRequestContext() for media contexts because,
583    // other than the HTTP cache, the media contexts share the same backing
584    // objects as their associated "normal" request context.  Thus, the previous
585    // call serves to initialize the media request context for this storage
586    // partition as well.
587  }
588}
589
590}  // namespace content
591