1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Implementation of the MalwareDetails class.
6
7#include "chrome/browser/safe_browsing/malware_details.h"
8
9#include "base/callback.h"
10#include "base/lazy_instance.h"
11#include "base/md5.h"
12#include "base/string_util.h"
13#include "chrome/browser/net/chrome_url_request_context.h"
14#include "chrome/browser/safe_browsing/malware_details_cache.h"
15#include "chrome/browser/safe_browsing/safe_browsing_service.h"
16#include "chrome/browser/safe_browsing/report.pb.h"
17#include "content/browser/browser_thread.h"
18#include "net/base/load_flags.h"
19#include "net/http/http_response_headers.h"
20#include "net/url_request/url_request_context_getter.h"
21#include "net/url_request/url_request_status.h"
22
23using safe_browsing::ClientMalwareReportRequest;
24
25// Only send small files for now, a better strategy would use the size
26// of the whole report and the user's bandwidth.
27static const uint32 kMaxBodySizeBytes = 1024;
28
29MalwareDetailsCacheCollector::MalwareDetailsCacheCollector()
30    : has_started_(false),
31      current_fetch_(NULL) {
32}
33
34MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() {
35}
36
37void MalwareDetailsCacheCollector::StartCacheCollection(
38    net::URLRequestContextGetter* request_context_getter,
39    safe_browsing::ResourceMap* resources,
40    bool* result,
41    Task* callback) {
42  // Start the data collection from the HTTP cache. We use a URLFetcher
43  // and set the right flags so we only hit the cache.
44  DVLOG(1) << "Getting cache data for all urls...";
45  request_context_getter_ = request_context_getter;
46  resources_ = resources;
47  resources_it_ = resources_->begin();
48  result_ = result;
49  callback_ = callback;
50  has_started_ = true;
51
52  // Post a task in the message loop, so the callers don't need to
53  // check if we call their callback immediately.
54  BrowserThread::PostTask(
55      BrowserThread::IO, FROM_HERE,
56      NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry));
57}
58
59bool MalwareDetailsCacheCollector::HasStarted() {
60  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
61  return has_started_;
62}
63
64// Fetch a URL and advance to the next one when done.
65void MalwareDetailsCacheCollector::OpenEntry() {
66  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
67  DVLOG(1) << "OpenEntry";
68
69  if (resources_it_ == resources_->end()) { // We are done.
70    AllDone(true);
71    return;
72  }
73
74  if (!request_context_getter_) {
75    DVLOG(1) << "Missing request context getter";
76    AllDone(false);
77    return;
78  }
79
80  current_fetch_.reset(new URLFetcher(
81      GURL(resources_it_->first),
82      URLFetcher::GET,
83      this));
84  current_fetch_->set_request_context(request_context_getter_);
85  // Only from cache, and don't save cookies.
86  current_fetch_->set_load_flags(net::LOAD_ONLY_FROM_CACHE |
87                                 net::LOAD_DO_NOT_SAVE_COOKIES);
88  current_fetch_->set_automatically_retry_on_5xx(false);  // No retries.
89  current_fetch_->Start();  // OnURLFetchComplete will be called when done.
90}
91
92ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource(
93    const GURL& url) {
94  safe_browsing::ResourceMap::iterator it = resources_->find(url.spec());
95  if (it != resources_->end()) {
96    return it->second.get();
97  }
98  return NULL;
99}
100
101void MalwareDetailsCacheCollector::OnURLFetchComplete(
102    const URLFetcher* source,
103    const GURL& url,
104    const net::URLRequestStatus& status,
105    int response_code,
106    const ResponseCookies& cookies,
107    const std::string& data) {
108  DVLOG(1) << "OnUrlFetchComplete";
109  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
110  DCHECK(current_fetch_.get());
111  if (status.status() != net::URLRequestStatus::SUCCESS &&
112      status.os_error() == net::ERR_CACHE_MISS) {
113    // Cache miss, skip this resource.
114    DVLOG(1) << "Cache miss for url: " << url;
115    AdvanceEntry();
116    return;
117  }
118
119  if (status.status() != net::URLRequestStatus::SUCCESS) {
120    // Some other error occurred, e.g. the request could have been cancelled.
121    DVLOG(1) << "Unsuccessful fetch: " << url;
122    AdvanceEntry();
123    return;
124  }
125
126  // Set the response headers and body to the right resource, which
127  // might not be the same as the one we asked for.
128  // For redirects, resources_it_->first != url.spec().
129  ClientMalwareReportRequest::Resource* resource = GetResource(url);
130  if (!resource) {
131    DVLOG(1) << "Cannot find resource for url:" << url;
132    AdvanceEntry();
133    return;
134  }
135
136  ReadResponse(resource, source);
137  ReadData(resource, data);
138  AdvanceEntry();
139}
140
141void MalwareDetailsCacheCollector::ReadResponse(
142    ClientMalwareReportRequest::Resource* pb_resource,
143    const URLFetcher* source) {
144  DVLOG(1) << "ReadResponse";
145  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
146  net::HttpResponseHeaders* headers = source->response_headers();
147  if (!headers) {
148    DVLOG(1) << "Missing response headers.";
149    return;
150  }
151
152  ClientMalwareReportRequest::HTTPResponse* pb_response =
153      pb_resource->mutable_response();
154  pb_response->mutable_firstline()->set_code(headers->response_code());
155  void* iter = NULL;
156  std::string name, value;
157  while (headers->EnumerateHeaderLines(&iter, &name, &value)) {
158    ClientMalwareReportRequest::HTTPHeader* pb_header =
159        pb_response->add_headers();
160    pb_header->set_name(name);
161    // Strip any Set-Cookie headers.
162    if (LowerCaseEqualsASCII(name, "set-cookie")) {
163      pb_header->set_value("");
164    } else {
165      pb_header->set_value(value);
166    }
167  }
168}
169
170void MalwareDetailsCacheCollector::ReadData(
171    ClientMalwareReportRequest::Resource* pb_resource,
172    const std::string& data) {
173  DVLOG(1) << "ReadData";
174  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
175  ClientMalwareReportRequest::HTTPResponse* pb_response =
176      pb_resource->mutable_response();
177  if (data.size() <= kMaxBodySizeBytes) {  // Only send small bodies for now.
178    pb_response->set_body(data);
179  }
180  pb_response->set_bodylength(data.size());
181  MD5Digest digest;
182  MD5Sum(data.c_str(), data.size(), &digest);
183  pb_response->set_bodydigest(MD5DigestToBase16(digest));
184}
185
186void MalwareDetailsCacheCollector::AdvanceEntry() {
187  DVLOG(1) << "AdvanceEntry";
188  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
189  // Advance to the next resource.
190  ++resources_it_;
191  current_fetch_.reset(NULL);
192
193  // Create a task so we don't take over the IO thread for too long.
194  BrowserThread::PostTask(
195      BrowserThread::IO, FROM_HERE,
196      NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry));
197}
198
199void MalwareDetailsCacheCollector::AllDone(bool success) {
200  DVLOG(1) << "AllDone";
201  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
202  *result_ = success;
203  BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_);
204}
205