1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Use of this source code is governed by a BSD-style license that can be 3ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// found in the LICENSE file. 4ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// 5ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Implementation of the MalwareDetails class. 6ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 7ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/safe_browsing/malware_details.h" 8ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 9ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/callback.h" 10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/lazy_instance.h" 11ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/md5.h" 12ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/string_util.h" 13ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/net/chrome_url_request_context.h" 14ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/safe_browsing/malware_details_cache.h" 15ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/safe_browsing/safe_browsing_service.h" 16ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/safe_browsing/report.pb.h" 17ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "content/browser/browser_thread.h" 18ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "net/base/load_flags.h" 19ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "net/http/http_response_headers.h" 20ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "net/url_request/url_request_context_getter.h" 21ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "net/url_request/url_request_status.h" 22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenusing safe_browsing::ClientMalwareReportRequest; 24ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 25ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Only send small files for now, a better strategy would use the size 26ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// of the whole report and the user's bandwidth. 27ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic const uint32 kMaxBodySizeBytes = 1024; 28ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenMalwareDetailsCacheCollector::MalwareDetailsCacheCollector() 30ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen : has_started_(false), 31ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_(NULL) { 32ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 33ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 34ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenMalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() { 35ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 37ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::StartCacheCollection( 38ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen net::URLRequestContextGetter* request_context_getter, 39ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen safe_browsing::ResourceMap* resources, 40ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen bool* result, 41ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen Task* callback) { 42ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Start the data collection from the HTTP cache. We use a URLFetcher 43ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // and set the right flags so we only hit the cache. 44ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "Getting cache data for all urls..."; 45ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen request_context_getter_ = request_context_getter; 46ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen resources_ = resources; 47ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen resources_it_ = resources_->begin(); 48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen result_ = result; 49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen callback_ = callback; 50ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen has_started_ = true; 51ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 52ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Post a task in the message loop, so the callers don't need to 53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // check if we call their callback immediately. 54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen BrowserThread::PostTask( 55ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen BrowserThread::IO, FROM_HERE, 56ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); 57ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool MalwareDetailsCacheCollector::HasStarted() { 60ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 61ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return has_started_; 62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 63ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Fetch a URL and advance to the next one when done. 65ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::OpenEntry() { 66ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "OpenEntry"; 68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (resources_it_ == resources_->end()) { // We are done. 70ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AllDone(true); 71ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 72ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 73ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 74ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!request_context_getter_) { 75ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "Missing request context getter"; 76ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AllDone(false); 77ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 78ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 79ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 80ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_.reset(new URLFetcher( 81ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen GURL(resources_it_->first), 82ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen URLFetcher::GET, 83ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen this)); 84ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_->set_request_context(request_context_getter_); 85ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Only from cache, and don't save cookies. 86ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_->set_load_flags(net::LOAD_ONLY_FROM_CACHE | 87ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen net::LOAD_DO_NOT_SAVE_COOKIES); 88ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_->set_automatically_retry_on_5xx(false); // No retries. 89ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_->Start(); // OnURLFetchComplete will be called when done. 90ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 91ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 92ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource( 93ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const GURL& url) { 94ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen safe_browsing::ResourceMap::iterator it = resources_->find(url.spec()); 95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (it != resources_->end()) { 96ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return it->second.get(); 97ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 98ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 99ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 100ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 101ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::OnURLFetchComplete( 102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const URLFetcher* source, 103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const GURL& url, 104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const net::URLRequestStatus& status, 105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int response_code, 106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const ResponseCookies& cookies, 107ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const std::string& data) { 108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "OnUrlFetchComplete"; 109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 110ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(current_fetch_.get()); 111ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (status.status() != net::URLRequestStatus::SUCCESS && 112ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen status.os_error() == net::ERR_CACHE_MISS) { 113ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Cache miss, skip this resource. 114ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "Cache miss for url: " << url; 115ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AdvanceEntry(); 116ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 117ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 118ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 119ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (status.status() != net::URLRequestStatus::SUCCESS) { 120ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Some other error occurred, e.g. the request could have been cancelled. 121ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "Unsuccessful fetch: " << url; 122ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AdvanceEntry(); 123ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 124ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 125ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 126ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Set the response headers and body to the right resource, which 127ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // might not be the same as the one we asked for. 128ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // For redirects, resources_it_->first != url.spec(). 129ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ClientMalwareReportRequest::Resource* resource = GetResource(url); 130ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!resource) { 131ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "Cannot find resource for url:" << url; 132ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AdvanceEntry(); 133ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 134ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 135ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 136ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ReadResponse(resource, source); 137ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ReadData(resource, data); 138ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AdvanceEntry(); 139ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 140ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 141ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::ReadResponse( 142ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ClientMalwareReportRequest::Resource* pb_resource, 143ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const URLFetcher* source) { 144ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "ReadResponse"; 145ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 146ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen net::HttpResponseHeaders* headers = source->response_headers(); 147ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!headers) { 148ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "Missing response headers."; 149ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 150ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 151ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 152ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ClientMalwareReportRequest::HTTPResponse* pb_response = 153ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_resource->mutable_response(); 154ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_response->mutable_firstline()->set_code(headers->response_code()); 155ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen void* iter = NULL; 156ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::string name, value; 157ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen while (headers->EnumerateHeaderLines(&iter, &name, &value)) { 158ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ClientMalwareReportRequest::HTTPHeader* pb_header = 159ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_response->add_headers(); 160ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_header->set_name(name); 161ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Strip any Set-Cookie headers. 162ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (LowerCaseEqualsASCII(name, "set-cookie")) { 163ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_header->set_value(""); 164ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } else { 165ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_header->set_value(value); 166ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 167ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 168ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 169ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 170ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::ReadData( 171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ClientMalwareReportRequest::Resource* pb_resource, 172ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const std::string& data) { 173ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "ReadData"; 174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 175ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ClientMalwareReportRequest::HTTPResponse* pb_response = 176ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_resource->mutable_response(); 177ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (data.size() <= kMaxBodySizeBytes) { // Only send small bodies for now. 178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_response->set_body(data); 179ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_response->set_bodylength(data.size()); 181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Digest digest; 182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Sum(data.c_str(), data.size(), &digest); 183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen pb_response->set_bodydigest(MD5DigestToBase16(digest)); 184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::AdvanceEntry() { 187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "AdvanceEntry"; 188ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 189ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Advance to the next resource. 190ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ++resources_it_; 191ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen current_fetch_.reset(NULL); 192ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 193ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Create a task so we don't take over the IO thread for too long. 194ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen BrowserThread::PostTask( 195ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen BrowserThread::IO, FROM_HERE, 196ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); 197ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 198ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 199ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid MalwareDetailsCacheCollector::AllDone(bool success) { 200ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DVLOG(1) << "AllDone"; 201ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 202ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen *result_ = success; 203ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_); 204ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 205