1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Implementation of the MalwareDetails class.
6
7#include "chrome/browser/safe_browsing/malware_details.h"
8
9#include "base/bind.h"
10#include "base/lazy_instance.h"
11#include "base/md5.h"
12#include "base/strings/string_util.h"
13#include "chrome/browser/net/chrome_url_request_context_getter.h"
14#include "chrome/browser/safe_browsing/malware_details_cache.h"
15#include "chrome/browser/safe_browsing/report.pb.h"
16#include "chrome/browser/safe_browsing/safe_browsing_service.h"
17#include "content/public/browser/browser_thread.h"
18#include "net/base/host_port_pair.h"
19#include "net/base/load_flags.h"
20#include "net/base/net_errors.h"
21#include "net/http/http_response_headers.h"
22#include "net/url_request/url_fetcher.h"
23#include "net/url_request/url_request_context_getter.h"
24#include "net/url_request/url_request_status.h"
25
26using content::BrowserThread;
27using safe_browsing::ClientMalwareReportRequest;
28
29// Only send small files for now, a better strategy would use the size
30// of the whole report and the user's bandwidth.
31static const uint32 kMaxBodySizeBytes = 1024;
32
33MalwareDetailsCacheCollector::MalwareDetailsCacheCollector()
34    : resources_(NULL), result_(NULL), has_started_(false) {}
35
36void MalwareDetailsCacheCollector::StartCacheCollection(
37    net::URLRequestContextGetter* request_context_getter,
38    safe_browsing::ResourceMap* resources,
39    bool* result,
40    const base::Closure& callback) {
41  // Start the data collection from the HTTP cache. We use a URLFetcher
42  // and set the right flags so we only hit the cache.
43  DVLOG(1) << "Getting cache data for all urls...";
44  request_context_getter_ = request_context_getter;
45  resources_ = resources;
46  resources_it_ = resources_->begin();
47  result_ = result;
48  callback_ = callback;
49  has_started_ = true;
50
51  // Post a task in the message loop, so the callers don't need to
52  // check if we call their callback immediately.
53  BrowserThread::PostTask(
54      BrowserThread::IO, FROM_HERE,
55      base::Bind(&MalwareDetailsCacheCollector::OpenEntry, this));
56}
57
58bool MalwareDetailsCacheCollector::HasStarted() {
59  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
60  return has_started_;
61}
62
63MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() {}
64
65// Fetch a URL and advance to the next one when done.
66void MalwareDetailsCacheCollector::OpenEntry() {
67  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
68  DVLOG(1) << "OpenEntry";
69
70  if (resources_it_ == resources_->end()) {
71    AllDone(true);
72    return;
73  }
74
75  if (!request_context_getter_.get()) {
76    DVLOG(1) << "Missing request context getter";
77    AllDone(false);
78    return;
79  }
80
81  current_fetch_.reset(net::URLFetcher::Create(
82      GURL(resources_it_->first), net::URLFetcher::GET, this));
83  current_fetch_->SetRequestContext(request_context_getter_.get());
84  // Only from cache, and don't save cookies.
85  current_fetch_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE |
86                               net::LOAD_DO_NOT_SAVE_COOKIES);
87  current_fetch_->SetAutomaticallyRetryOn5xx(false);  // No retries.
88  current_fetch_->Start();  // OnURLFetchComplete will be called when done.
89}
90
91ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource(
92    const GURL& url) {
93  safe_browsing::ResourceMap::iterator it = resources_->find(url.spec());
94  if (it != resources_->end()) {
95    return it->second.get();
96  }
97  return NULL;
98}
99
100void MalwareDetailsCacheCollector::OnURLFetchComplete(
101    const net::URLFetcher* source) {
102  DVLOG(1) << "OnUrlFetchComplete";
103  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
104  DCHECK(current_fetch_.get());
105  if (source->GetStatus().status() != net::URLRequestStatus::SUCCESS &&
106      source->GetStatus().error() == net::ERR_CACHE_MISS) {
107    // Cache miss, skip this resource.
108    DVLOG(1) << "Cache miss for url: " << source->GetURL();
109    AdvanceEntry();
110    return;
111  }
112
113  if (source->GetStatus().status() != net::URLRequestStatus::SUCCESS) {
114    // Some other error occurred, e.g. the request could have been cancelled.
115    DVLOG(1) << "Unsuccessful fetch: " << source->GetURL();
116    AdvanceEntry();
117    return;
118  }
119
120  // Set the response headers and body to the right resource, which
121  // might not be the same as the one we asked for.
122  // For redirects, resources_it_->first != url.spec().
123  ClientMalwareReportRequest::Resource* resource =
124      GetResource(source->GetURL());
125  if (!resource) {
126    DVLOG(1) << "Cannot find resource for url:" << source->GetURL();
127    AdvanceEntry();
128    return;
129  }
130
131  ReadResponse(resource, source);
132  std::string data;
133  source->GetResponseAsString(&data);
134  ReadData(resource, data);
135  AdvanceEntry();
136}
137
138void MalwareDetailsCacheCollector::ReadResponse(
139    ClientMalwareReportRequest::Resource* pb_resource,
140    const net::URLFetcher* source) {
141  DVLOG(1) << "ReadResponse";
142  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
143  net::HttpResponseHeaders* headers = source->GetResponseHeaders();
144  if (!headers) {
145    DVLOG(1) << "Missing response headers.";
146    return;
147  }
148
149  ClientMalwareReportRequest::HTTPResponse* pb_response =
150      pb_resource->mutable_response();
151  pb_response->mutable_firstline()->set_code(headers->response_code());
152  void* iter = NULL;
153  std::string name, value;
154  while (headers->EnumerateHeaderLines(&iter, &name, &value)) {
155    ClientMalwareReportRequest::HTTPHeader* pb_header =
156        pb_response->add_headers();
157    pb_header->set_name(name);
158    // Strip any Set-Cookie headers.
159    if (LowerCaseEqualsASCII(name, "set-cookie")) {
160      pb_header->set_value("");
161    } else {
162      pb_header->set_value(value);
163    }
164  }
165
166  if (!source->WasFetchedViaProxy()) {
167    pb_response->set_remote_ip(source->GetSocketAddress().ToString());
168  }
169}
170
171void MalwareDetailsCacheCollector::ReadData(
172    ClientMalwareReportRequest::Resource* pb_resource,
173    const std::string& data) {
174  DVLOG(1) << "ReadData";
175  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
176  ClientMalwareReportRequest::HTTPResponse* pb_response =
177      pb_resource->mutable_response();
178  if (data.size() <= kMaxBodySizeBytes) {  // Only send small bodies for now.
179    pb_response->set_body(data);
180  }
181  pb_response->set_bodylength(data.size());
182  base::MD5Digest digest;
183  base::MD5Sum(data.c_str(), data.size(), &digest);
184  pb_response->set_bodydigest(base::MD5DigestToBase16(digest));
185}
186
187void MalwareDetailsCacheCollector::AdvanceEntry() {
188  DVLOG(1) << "AdvanceEntry";
189  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
190  // Advance to the next resource.
191  ++resources_it_;
192  current_fetch_.reset(NULL);
193
194  // Create a task so we don't take over the IO thread for too long.
195  BrowserThread::PostTask(
196      BrowserThread::IO, FROM_HERE,
197      base::Bind(&MalwareDetailsCacheCollector::OpenEntry, this));
198}
199
200void MalwareDetailsCacheCollector::AllDone(bool success) {
201  DVLOG(1) << "AllDone";
202  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
203  *result_ = success;
204  BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_);
205  callback_.Reset();
206}
207