malware_details.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Implementation of the MalwareDetails class.
6
7#include "chrome/browser/safe_browsing/malware_details.h"
8
9#include "base/bind.h"
10#include "base/lazy_instance.h"
11#include "chrome/browser/net/chrome_url_request_context.h"
12#include "chrome/browser/profiles/profile.h"
13#include "chrome/browser/safe_browsing/malware_details_cache.h"
14#include "chrome/browser/safe_browsing/malware_details_history.h"
15#include "chrome/browser/safe_browsing/report.pb.h"
16#include "chrome/common/safe_browsing/safebrowsing_messages.h"
17#include "content/public/browser/browser_thread.h"
18#include "content/public/browser/navigation_controller.h"
19#include "content/public/browser/navigation_entry.h"
20#include "content/public/browser/render_view_host.h"
21#include "content/public/browser/web_contents.h"
22#include "net/base/io_buffer.h"
23#include "net/disk_cache/disk_cache.h"
24#include "net/url_request/url_request_context_getter.h"
25
26using content::BrowserThread;
27using content::NavigationEntry;
28using content::WebContents;
29using safe_browsing::ClientMalwareReportRequest;
30
31// Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
32static const uint32 kMaxDomNodes = 500;
33
34// static
35MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
36
37// The default MalwareDetailsFactory.  Global, made a singleton so we
38// don't leak it.
39class MalwareDetailsFactoryImpl
40    : public MalwareDetailsFactory {
41 public:
42  virtual MalwareDetails* CreateMalwareDetails(
43      SafeBrowsingUIManager* ui_manager,
44      WebContents* web_contents,
45      const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
46    return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
47  }
48
49 private:
50  friend struct base::DefaultLazyInstanceTraits<
51      MalwareDetailsFactoryImpl>;
52
53  MalwareDetailsFactoryImpl() { }
54
55  DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
56};
57
58static base::LazyInstance<MalwareDetailsFactoryImpl>
59    g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
60
61// Create a MalwareDetails for the given tab.
62/* static */
63MalwareDetails* MalwareDetails::NewMalwareDetails(
64    SafeBrowsingUIManager* ui_manager,
65    WebContents* web_contents,
66    const UnsafeResource& resource) {
67  // Set up the factory if this has not been done already (tests do that
68  // before this method is called).
69  if (!factory_)
70    factory_ = g_malware_details_factory_impl.Pointer();
71  return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
72}
73
74// Create a MalwareDetails for the given tab. Runs in the UI thread.
75MalwareDetails::MalwareDetails(
76    SafeBrowsingUIManager* ui_manager,
77    content::WebContents* web_contents,
78    const UnsafeResource& resource)
79    : content::WebContentsObserver(web_contents),
80      profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
81      request_context_getter_(profile_->GetRequestContext()),
82      ui_manager_(ui_manager),
83      resource_(resource),
84      cache_result_(false),
85      cache_collector_(new MalwareDetailsCacheCollector),
86      redirects_collector_(
87          new MalwareDetailsRedirectsCollector(profile_)) {
88  StartCollection();
89}
90
91MalwareDetails::~MalwareDetails() {
92}
93
94bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
95  bool handled = true;
96  IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
97    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
98                        OnReceivedMalwareDOMDetails)
99    IPC_MESSAGE_UNHANDLED(handled = false)
100  IPC_END_MESSAGE_MAP()
101  return handled;
102}
103
104bool MalwareDetails::IsPublicUrl(const GURL& url) const {
105  return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
106}
107
108// Looks for a Resource for the given url in resources_.  If found, it
109// updates |resource|. Otherwise, it creates a new message, adds it to
110// resources_ and updates |resource| to point to it.
111ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
112    const GURL& url) {
113  safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
114  if (it != resources_.end()) {
115    return it->second.get();
116  }
117
118  // Create the resource for |url|.
119  int id = resources_.size();
120  linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
121      new ClientMalwareReportRequest::Resource());
122  new_resource->set_url(url.spec());
123  new_resource->set_id(id);
124  resources_[url.spec()] = new_resource;
125  return new_resource.get();
126}
127
128void MalwareDetails::AddUrl(const GURL& url,
129                            const GURL& parent,
130                            const std::string& tagname,
131                            const std::vector<GURL>* children) {
132  if (!url.is_valid() || !IsPublicUrl(url))
133    return;
134
135  // Find (or create) the resource for the url.
136  ClientMalwareReportRequest::Resource* url_resource =
137      FindOrCreateResource(url);
138  if (!tagname.empty()) {
139    url_resource->set_tag_name(tagname);
140  }
141  if (!parent.is_empty() && IsPublicUrl(parent)) {
142    // Add the resource for the parent.
143    ClientMalwareReportRequest::Resource* parent_resource =
144        FindOrCreateResource(parent);
145    // Update the parent-child relation
146    url_resource->set_parent_id(parent_resource->id());
147  }
148  if (children) {
149    for (std::vector<GURL>::const_iterator it = children->begin();
150         it != children->end(); it++) {
151      ClientMalwareReportRequest::Resource* child_resource =
152          FindOrCreateResource(*it);
153      url_resource->add_child_ids(child_resource->id());
154    }
155  }
156}
157
158void MalwareDetails::StartCollection() {
159  DVLOG(1) << "Starting to compute malware details.";
160  report_.reset(new ClientMalwareReportRequest());
161
162  if (IsPublicUrl(resource_.url)) {
163    report_->set_malware_url(resource_.url.spec());
164  }
165
166  GURL page_url = web_contents()->GetURL();
167  if (IsPublicUrl(page_url)) {
168    report_->set_page_url(page_url.spec());
169  }
170
171  GURL referrer_url;
172  NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
173  if (nav_entry) {
174    referrer_url = nav_entry->GetReferrer().url;
175    if (IsPublicUrl(referrer_url)) {
176      report_->set_referrer_url(referrer_url.spec());
177    }
178  }
179
180  // Add the nodes, starting from the page url.
181  AddUrl(page_url, GURL(), std::string(), NULL);
182
183  // Add the resource_url and its original url, if non-empty and different.
184  if (!resource_.original_url.is_empty() &&
185      resource_.url != resource_.original_url) {
186    // Add original_url, as the parent of resource_url.
187    AddUrl(resource_.original_url, GURL(), std::string(), NULL);
188    AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
189  } else {
190    AddUrl(resource_.url, GURL(), std::string(), NULL);
191  }
192
193  // Add the redirect urls, if non-empty. The redirect urls do not include the
194  // original url, but include the unsafe url which is the last one of the
195  // redirect urls chain
196  GURL parent_url;
197  // Set the original url as the parent of the first redirect url if it's not
198  // empty.
199  if (!resource_.original_url.is_empty()) {
200    parent_url = resource_.original_url;
201  }
202  // Set the previous redirect url as the parent of the next one
203  for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
204    AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
205    parent_url = resource_.redirect_urls[i];
206  }
207
208  // Add the referrer url.
209  if (nav_entry && !referrer_url.is_empty()) {
210    AddUrl(referrer_url, GURL(), std::string(), NULL);
211  }
212
213  // Get URLs of frames, scripts etc from the DOM.
214  // OnReceivedMalwareDOMDetails will be called when the renderer replies.
215  content::RenderViewHost* view = web_contents()->GetRenderViewHost();
216  view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
217}
218
219// When the renderer is done, this is called.
220void MalwareDetails::OnReceivedMalwareDOMDetails(
221    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
222  // Schedule this in IO thread, so it doesn't conflict with future users
223  // of our data structures (eg GetSerializedReport).
224  BrowserThread::PostTask(
225      BrowserThread::IO, FROM_HERE,
226      base::Bind(&MalwareDetails::AddDOMDetails, this, params));
227}
228
229void MalwareDetails::AddDOMDetails(
230    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
231  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
232  DVLOG(1) << "Nodes from the DOM: " << params.size();
233
234  // If we have already started getting redirects from history service,
235  // don't modify state, otherwise will invalidate the iterators.
236  if (redirects_collector_->HasStarted())
237    return;
238
239  // If we have already started collecting data from the HTTP cache, don't
240  // modify our state.
241  if (cache_collector_->HasStarted())
242    return;
243
244  // Add the urls from the DOM to |resources_|.  The renderer could be
245  // sending bogus messages, so limit the number of nodes we accept.
246  for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
247    SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
248    DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
249    AddUrl(node.url, node.parent, node.tag_name, &(node.children));
250  }
251}
252
253// Called from the SB Service on the IO thread, after the user has
254// closed the tab, or clicked proceed or goback.  Since the user needs
255// to take an action, we expect this to be called after
256// OnReceivedMalwareDOMDetails in most cases. If not, we don't include
257// the DOM data in our report.
258void MalwareDetails::FinishCollection() {
259  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
260
261  std::vector<GURL> urls;
262  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
263       it != resources_.end(); it++) {
264    urls.push_back(GURL(it->first));
265  }
266  redirects_collector_->StartHistoryCollection(
267      urls,
268      base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
269}
270
271void MalwareDetails::OnRedirectionCollectionReady() {
272  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
273  const std::vector<safe_browsing::RedirectChain>& redirects =
274      redirects_collector_->GetCollectedUrls();
275
276  for (size_t i = 0; i < redirects.size(); ++i)
277    AddRedirectUrlList(redirects[i]);
278
279  // Call the cache collector
280  cache_collector_->StartCacheCollection(
281      request_context_getter_.get(),
282      &resources_,
283      &cache_result_,
284      base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
285}
286
287void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
288  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
289  for (size_t i = 0; i < urls.size()-1; ++i) {
290    AddUrl(urls[i], urls[i + 1], std::string(), NULL);
291  }
292}
293
294void MalwareDetails::OnCacheCollectionReady() {
295  DVLOG(1) << "OnCacheCollectionReady.";
296  // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
297  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
298       it != resources_.end(); it++) {
299    ClientMalwareReportRequest::Resource* pb_resource =
300        report_->add_resources();
301    pb_resource->CopyFrom(*(it->second));
302  }
303
304  report_->set_complete(cache_result_);
305
306  // Send the report, using the SafeBrowsingService.
307  std::string serialized;
308  if (!report_->SerializeToString(&serialized)) {
309    DLOG(ERROR) << "Unable to serialize the malware report.";
310    return;
311  }
312
313  ui_manager_->SendSerializedMalwareDetails(serialized);
314}
315