1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "extensions/browser/content_hash_fetcher.h"
6
7#include <algorithm>
8
9#include "base/base64.h"
10#include "base/files/file_enumerator.h"
11#include "base/files/file_util.h"
12#include "base/json/json_reader.h"
13#include "base/memory/ref_counted.h"
14#include "base/metrics/histogram.h"
15#include "base/synchronization/lock.h"
16#include "base/task_runner_util.h"
17#include "base/timer/elapsed_timer.h"
18#include "base/version.h"
19#include "content/public/browser/browser_context.h"
20#include "content/public/browser/browser_thread.h"
21#include "crypto/sha2.h"
22#include "extensions/browser/computed_hashes.h"
23#include "extensions/browser/content_hash_tree.h"
24#include "extensions/browser/content_verifier_delegate.h"
25#include "extensions/browser/verified_contents.h"
26#include "extensions/common/constants.h"
27#include "extensions/common/extension.h"
28#include "extensions/common/file_util.h"
29#include "net/base/load_flags.h"
30#include "net/url_request/url_fetcher.h"
31#include "net/url_request/url_fetcher_delegate.h"
32#include "net/url_request/url_request_status.h"
33
34namespace {
35
36typedef std::set<base::FilePath> SortedFilePathSet;
37
38}  // namespace
39
40namespace extensions {
41
42// This class takes care of doing the disk and network I/O work to ensure we
43// have both verified_contents.json files from the webstore and
44// computed_hashes.json files computed over the files in an extension's
45// directory.
46class ContentHashFetcherJob
47    : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
48      public net::URLFetcherDelegate {
49 public:
50  typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
51  ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
52                        const ContentVerifierKey& key,
53                        const std::string& extension_id,
54                        const base::FilePath& extension_path,
55                        const GURL& fetch_url,
56                        bool force,
57                        const CompletionCallback& callback);
58
59  void Start();
60
61  // Cancels this job, which will attempt to stop I/O operations sooner than
62  // just waiting for the entire job to complete. Safe to call from any thread.
63  void Cancel();
64
65  // Checks whether this job has been cancelled. Safe to call from any thread.
66  bool IsCancelled();
67
68  // Returns whether this job was successful (we have both verified contents
69  // and computed hashes). Even if the job was a success, there might have been
70  // files that were found to have contents not matching expectations; these
71  // are available by calling hash_mismatch_paths().
72  bool success() { return success_; }
73
74  bool force() { return force_; }
75
76  const std::string& extension_id() { return extension_id_; }
77
78  // Returns the set of paths that had a hash mismatch.
79  const std::set<base::FilePath>& hash_mismatch_paths() {
80    return hash_mismatch_paths_;
81  }
82
83 private:
84  friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
85  virtual ~ContentHashFetcherJob();
86
87  // Tries to load a verified_contents.json file at |path|. On successfully
88  // reading and validing the file, the verified_contents_ member variable will
89  // be set and this function will return true. If the file does not exist, or
90  // exists but is invalid, it will return false. Also, any invalid
91  // file will be removed from disk and
92  bool LoadVerifiedContents(const base::FilePath& path);
93
94  // Callback for when we're done doing file I/O to see if we already have
95  // a verified contents file. If we don't, this will kick off a network
96  // request to get one.
97  void DoneCheckingForVerifiedContents(bool found);
98
99  // URLFetcherDelegate interface
100  virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
101
102  // Callback for when we're done ensuring we have verified contents, and are
103  // ready to move on to MaybeCreateHashes.
104  void DoneFetchingVerifiedContents(bool success);
105
106  // Callback for the job to write the verified contents to the filesystem.
107  void OnVerifiedContentsWritten(size_t expected_size, int write_result);
108
109  // The verified contents file from the webstore only contains the treehash
110  // root hash, but for performance we want to cache the individual block level
111  // hashes. This function will create that cache with block-level hashes for
112  // each file in the extension if needed (the treehash root hash for each of
113  // these should equal what is in the verified contents file from the
114  // webstore).
115  void MaybeCreateHashes();
116
117  // Computes hashes for all files in |extension_path_|, and uses a
118  // ComputedHashes::Writer to write that information into
119  // |hashes_file|. Returns true on success.
120  bool CreateHashes(const base::FilePath& hashes_file);
121
122  // Will call the callback, if we haven't been cancelled.
123  void DispatchCallback();
124
125  net::URLRequestContextGetter* request_context_;
126  std::string extension_id_;
127  base::FilePath extension_path_;
128
129  // The url we'll need to use to fetch a verified_contents.json file.
130  GURL fetch_url_;
131
132  bool force_;
133
134  CompletionCallback callback_;
135  content::BrowserThread::ID creation_thread_;
136
137  // Used for fetching content signatures.
138  scoped_ptr<net::URLFetcher> url_fetcher_;
139
140  // The key used to validate verified_contents.json.
141  ContentVerifierKey key_;
142
143  // The parsed contents of the verified_contents.json file, either read from
144  // disk or fetched from the network and then written to disk.
145  scoped_ptr<VerifiedContents> verified_contents_;
146
147  // Whether this job succeeded.
148  bool success_;
149
150  // Paths that were found to have a mismatching hash.
151  std::set<base::FilePath> hash_mismatch_paths_;
152
153  // The block size to use for hashing.
154  int block_size_;
155
156  // Note: this may be accessed from multiple threads, so all access should
157  // be protected by |cancelled_lock_|.
158  bool cancelled_;
159
160  // A lock for synchronizing access to |cancelled_|.
161  base::Lock cancelled_lock_;
162
163  DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob);
164};
165
166ContentHashFetcherJob::ContentHashFetcherJob(
167    net::URLRequestContextGetter* request_context,
168    const ContentVerifierKey& key,
169    const std::string& extension_id,
170    const base::FilePath& extension_path,
171    const GURL& fetch_url,
172    bool force,
173    const CompletionCallback& callback)
174    : request_context_(request_context),
175      extension_id_(extension_id),
176      extension_path_(extension_path),
177      fetch_url_(fetch_url),
178      force_(force),
179      callback_(callback),
180      key_(key),
181      success_(false),
182      // TODO(asargent) - use the value from verified_contents.json for each
183      // file, instead of using a constant.
184      block_size_(4096),
185      cancelled_(false) {
186  bool got_id =
187      content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
188  DCHECK(got_id);
189}
190
191void ContentHashFetcherJob::Start() {
192  base::FilePath verified_contents_path =
193      file_util::GetVerifiedContentsPath(extension_path_);
194  base::PostTaskAndReplyWithResult(
195      content::BrowserThread::GetBlockingPool(),
196      FROM_HERE,
197      base::Bind(&ContentHashFetcherJob::LoadVerifiedContents,
198                 this,
199                 verified_contents_path),
200      base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
201                 this));
202}
203
204void ContentHashFetcherJob::Cancel() {
205  base::AutoLock autolock(cancelled_lock_);
206  cancelled_ = true;
207}
208
209bool ContentHashFetcherJob::IsCancelled() {
210  base::AutoLock autolock(cancelled_lock_);
211  bool result = cancelled_;
212  return result;
213}
214
215ContentHashFetcherJob::~ContentHashFetcherJob() {
216}
217
218bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath& path) {
219  if (!base::PathExists(path))
220    return false;
221  verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
222  if (!verified_contents_->InitFrom(path, false)) {
223    verified_contents_.reset();
224    if (!base::DeleteFile(path, false))
225      LOG(WARNING) << "Failed to delete " << path.value();
226    return false;
227  }
228  return true;
229}
230
231void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
232  if (IsCancelled())
233    return;
234  if (found) {
235    VLOG(1) << "Found verified contents for " << extension_id_;
236    DoneFetchingVerifiedContents(true);
237  } else {
238    VLOG(1) << "Missing verified contents for " << extension_id_
239            << ", fetching...";
240    url_fetcher_.reset(
241        net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
242    url_fetcher_->SetRequestContext(request_context_);
243    url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
244                               net::LOAD_DO_NOT_SAVE_COOKIES |
245                               net::LOAD_DISABLE_CACHE);
246    url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
247    url_fetcher_->Start();
248  }
249}
250
251// Helper function to let us pass ownership of a string via base::Bind with the
252// contents to be written into a file. Also ensures that the directory for
253// |path| exists, creating it if needed.
254static int WriteFileHelper(const base::FilePath& path,
255                           scoped_ptr<std::string> content) {
256  base::FilePath dir = path.DirName();
257  return (base::CreateDirectoryAndGetError(dir, NULL) &&
258          base::WriteFile(path, content->data(), content->size()));
259}
260
261void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
262  VLOG(1) << "URLFetchComplete for " << extension_id_
263          << " is_success:" << url_fetcher_->GetStatus().is_success() << " "
264          << fetch_url_.possibly_invalid_spec();
265  if (IsCancelled())
266    return;
267  scoped_ptr<std::string> response(new std::string);
268  if (!url_fetcher_->GetStatus().is_success() ||
269      !url_fetcher_->GetResponseAsString(response.get())) {
270    DoneFetchingVerifiedContents(false);
271    return;
272  }
273
274  // Parse the response to make sure it is valid json (on staging sometimes it
275  // can be a login redirect html, xml file, etc. if you aren't logged in with
276  // the right cookies).  TODO(asargent) - It would be a nice enhancement to
277  // move to parsing this in a sandboxed helper (crbug.com/372878).
278  scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
279  if (parsed) {
280    VLOG(1) << "JSON parsed ok for " << extension_id_;
281
282    parsed.reset();  // no longer needed
283    base::FilePath destination =
284        file_util::GetVerifiedContentsPath(extension_path_);
285    size_t size = response->size();
286    base::PostTaskAndReplyWithResult(
287        content::BrowserThread::GetBlockingPool(),
288        FROM_HERE,
289        base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
290        base::Bind(
291            &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
292  } else {
293    DoneFetchingVerifiedContents(false);
294  }
295}
296
297void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
298                                                      int write_result) {
299  bool success =
300      (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
301  DoneFetchingVerifiedContents(success);
302}
303
304void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
305  if (IsCancelled())
306    return;
307
308  if (!success) {
309    DispatchCallback();
310    return;
311  }
312
313  content::BrowserThread::PostBlockingPoolSequencedTask(
314      "ContentHashFetcher",
315      FROM_HERE,
316      base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
317}
318
319void ContentHashFetcherJob::MaybeCreateHashes() {
320  if (IsCancelled())
321    return;
322  base::FilePath hashes_file =
323      file_util::GetComputedHashesPath(extension_path_);
324
325  if (!force_ && base::PathExists(hashes_file)) {
326    success_ = true;
327  } else {
328    if (force_)
329      base::DeleteFile(hashes_file, false /* recursive */);
330    success_ = CreateHashes(hashes_file);
331  }
332
333  content::BrowserThread::PostTask(
334      creation_thread_,
335      FROM_HERE,
336      base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
337}
338
339bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
340  base::ElapsedTimer timer;
341  if (IsCancelled())
342    return false;
343  // Make sure the directory exists.
344  if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
345    return false;
346
347  if (!verified_contents_.get()) {
348    base::FilePath verified_contents_path =
349        file_util::GetVerifiedContentsPath(extension_path_);
350    verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
351    if (!verified_contents_->InitFrom(verified_contents_path, false))
352      return false;
353    verified_contents_.reset();
354  }
355
356  base::FileEnumerator enumerator(extension_path_,
357                                  true, /* recursive */
358                                  base::FileEnumerator::FILES);
359  // First discover all the file paths and put them in a sorted set.
360  SortedFilePathSet paths;
361  for (;;) {
362    if (IsCancelled())
363      return false;
364
365    base::FilePath full_path = enumerator.Next();
366    if (full_path.empty())
367      break;
368    paths.insert(full_path);
369  }
370
371  // Now iterate over all the paths in sorted order and compute the block hashes
372  // for each one.
373  ComputedHashes::Writer writer;
374  for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
375    if (IsCancelled())
376      return false;
377    const base::FilePath& full_path = *i;
378    base::FilePath relative_path;
379    extension_path_.AppendRelativePath(full_path, &relative_path);
380    relative_path = relative_path.NormalizePathSeparatorsTo('/');
381
382    if (!verified_contents_->HasTreeHashRoot(relative_path))
383      continue;
384
385    std::string contents;
386    if (!base::ReadFileToString(full_path, &contents)) {
387      LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
388      continue;
389    }
390
391    // Iterate through taking the hash of each block of size (block_size_) of
392    // the file.
393    std::vector<std::string> hashes;
394    ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes);
395    std::string root =
396        ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
397    if (!verified_contents_->TreeHashRootEquals(relative_path, root)) {
398      VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe();
399      hash_mismatch_paths_.insert(relative_path);
400      continue;
401    }
402
403    writer.AddHashes(relative_path, block_size_, hashes);
404  }
405  bool result = writer.WriteToFile(hashes_file);
406  UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
407                      timer.Elapsed());
408  return result;
409}
410
411void ContentHashFetcherJob::DispatchCallback() {
412  {
413    base::AutoLock autolock(cancelled_lock_);
414    if (cancelled_)
415      return;
416  }
417  callback_.Run(this);
418}
419
420// ----
421
422ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
423                                       ContentVerifierDelegate* delegate,
424                                       const FetchCallback& callback)
425    : context_(context),
426      delegate_(delegate),
427      fetch_callback_(callback),
428      weak_ptr_factory_(this) {
429}
430
431ContentHashFetcher::~ContentHashFetcher() {
432  for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
433    i->second->Cancel();
434  }
435}
436
437void ContentHashFetcher::DoFetch(const Extension* extension, bool force) {
438  DCHECK(extension);
439
440  IdAndVersion key(extension->id(), extension->version()->GetString());
441  JobMap::iterator found = jobs_.find(key);
442  if (found != jobs_.end()) {
443    if (!force || found->second->force()) {
444      // Just let the existing job keep running.
445      return;
446    } else {
447      // Kill the existing non-force job, so we can start a new one below.
448      found->second->Cancel();
449      jobs_.erase(found);
450    }
451  }
452
453  // TODO(asargent) - we should do something here to remember recent attempts
454  // to fetch signatures by extension id, and use exponential backoff to avoid
455  // hammering the server when we aren't successful in getting them.
456  // crbug.com/373397
457
458  DCHECK(extension->version());
459  GURL url =
460      delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
461  ContentHashFetcherJob* job =
462      new ContentHashFetcherJob(context_->GetRequestContext(),
463                                delegate_->PublicKey(),
464                                extension->id(),
465                                extension->path(),
466                                url,
467                                force,
468                                base::Bind(&ContentHashFetcher::JobFinished,
469                                           weak_ptr_factory_.GetWeakPtr()));
470  jobs_.insert(std::make_pair(key, job));
471  job->Start();
472}
473
474void ContentHashFetcher::ExtensionLoaded(const Extension* extension) {
475  CHECK(extension);
476  DoFetch(extension, false);
477}
478
479void ContentHashFetcher::ExtensionUnloaded(const Extension* extension) {
480  CHECK(extension);
481  IdAndVersion key(extension->id(), extension->version()->GetString());
482  JobMap::iterator found = jobs_.find(key);
483  if (found != jobs_.end()) {
484    found->second->Cancel();
485    jobs_.erase(found);
486  }
487}
488
489void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
490  if (!job->IsCancelled()) {
491    fetch_callback_.Run(job->extension_id(),
492                        job->success(),
493                        job->force(),
494                        job->hash_mismatch_paths());
495  }
496
497  for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
498    if (i->second.get() == job) {
499      jobs_.erase(i);
500      break;
501    }
502  }
503}
504
505}  // namespace extensions
506