1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Implementation of the MalwareDetails class.
6
7#include "chrome/browser/safe_browsing/malware_details.h"
8
9#include "base/callback.h"
10#include "base/lazy_instance.h"
11#include "chrome/browser/net/chrome_url_request_context.h"
12#include "chrome/browser/profiles/profile.h"
13#include "chrome/browser/safe_browsing/malware_details_cache.h"
14#include "chrome/browser/safe_browsing/report.pb.h"
15#include "chrome/browser/safe_browsing/safe_browsing_service.h"
16#include "chrome/common/safe_browsing/safebrowsing_messages.h"
17#include "content/browser/browser_thread.h"
18#include "content/browser/renderer_host/render_view_host.h"
19#include "content/browser/tab_contents/navigation_entry.h"
20#include "content/browser/tab_contents/tab_contents.h"
21#include "net/base/io_buffer.h"
22#include "net/disk_cache/disk_cache.h"
23#include "net/url_request/url_request_context_getter.h"
24
25using safe_browsing::ClientMalwareReportRequest;
26
27// Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
28static const uint32 kMaxDomNodes = 500;
29
30// static
31MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
32
33// The default MalwareDetailsFactory.  Global, made a singleton so we
34// don't leak it.
35class MalwareDetailsFactoryImpl
36    : public MalwareDetailsFactory {
37 public:
38  MalwareDetails* CreateMalwareDetails(
39      SafeBrowsingService* sb_service,
40      TabContents* tab_contents,
41      const SafeBrowsingService::UnsafeResource& unsafe_resource) {
42    return new MalwareDetails(sb_service, tab_contents, unsafe_resource);
43  }
44
45 private:
46  friend struct base::DefaultLazyInstanceTraits<
47      MalwareDetailsFactoryImpl>;
48
49  MalwareDetailsFactoryImpl() { }
50
51  DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
52};
53
54static base::LazyInstance<MalwareDetailsFactoryImpl>
55    g_malware_details_factory_impl(base::LINKER_INITIALIZED);
56
57// Create a MalwareDetails for the given tab.
58/* static */
59MalwareDetails* MalwareDetails::NewMalwareDetails(
60    SafeBrowsingService* sb_service,
61    TabContents* tab_contents,
62    const SafeBrowsingService::UnsafeResource& resource) {
63  // Set up the factory if this has not been done already (tests do that
64  // before this method is called).
65  if (!factory_)
66    factory_ = g_malware_details_factory_impl.Pointer();
67  return factory_->CreateMalwareDetails(sb_service, tab_contents, resource);
68}
69
70// Create a MalwareDetails for the given tab. Runs in the UI thread.
71MalwareDetails::MalwareDetails(
72    SafeBrowsingService* sb_service,
73    TabContents* tab_contents,
74    const SafeBrowsingService::UnsafeResource& resource)
75    : TabContentsObserver(tab_contents),
76      request_context_getter_(tab_contents->profile()->GetRequestContext()),
77      sb_service_(sb_service),
78      resource_(resource),
79      cache_collector_(new MalwareDetailsCacheCollector) {
80  StartCollection();
81}
82
83MalwareDetails::~MalwareDetails() {
84}
85
86bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
87  bool handled = true;
88  IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
89    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
90                        OnReceivedMalwareDOMDetails)
91    IPC_MESSAGE_UNHANDLED(handled = false)
92  IPC_END_MESSAGE_MAP()
93  return handled;
94}
95
96bool MalwareDetails::IsPublicUrl(const GURL& url) const {
97  return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
98}
99
100// Looks for a Resource for the given url in resources_.  If found, it
101// updates |resource|. Otherwise, it creates a new message, adds it to
102// resources_ and updates |resource| to point to it.
103ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
104    const GURL& url) {
105  safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
106  if (it != resources_.end()) {
107    return it->second.get();
108  }
109
110  // Create the resource for |url|.
111  int id = resources_.size();
112  linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
113      new ClientMalwareReportRequest::Resource());
114  new_resource->set_url(url.spec());
115  new_resource->set_id(id);
116  resources_[url.spec()] = new_resource;
117  return new_resource.get();
118}
119
120void MalwareDetails::AddUrl(const GURL& url,
121                            const GURL& parent,
122                            const std::string& tagname,
123                            const std::vector<GURL>* children) {
124  if (!IsPublicUrl(url))
125    return;
126
127  // Find (or create) the resource for the url.
128  ClientMalwareReportRequest::Resource* url_resource =
129      FindOrCreateResource(url);
130  if (!tagname.empty()) {
131    url_resource->set_tag_name(tagname);
132  }
133  if (!parent.is_empty() && IsPublicUrl(parent)) {
134    // Add the resource for the parent.
135    ClientMalwareReportRequest::Resource* parent_resource =
136        FindOrCreateResource(parent);
137    // Update the parent-child relation
138    url_resource->set_parent_id(parent_resource->id());
139  }
140  if (children) {
141    for (std::vector<GURL>::const_iterator it = children->begin();
142         it != children->end(); it++) {
143      ClientMalwareReportRequest::Resource* child_resource =
144          FindOrCreateResource(*it);
145      url_resource->add_child_ids(child_resource->id());
146    }
147  }
148}
149
150void MalwareDetails::StartCollection() {
151  DVLOG(1) << "Starting to compute malware details.";
152  report_.reset(new ClientMalwareReportRequest());
153
154  if (IsPublicUrl(resource_.url)) {
155    report_->set_malware_url(resource_.url.spec());
156  }
157
158  GURL page_url = tab_contents()->GetURL();
159  if (IsPublicUrl(page_url)) {
160    report_->set_page_url(page_url.spec());
161  }
162
163  GURL referrer_url;
164  NavigationEntry* nav_entry = tab_contents()->controller().GetActiveEntry();
165  if (nav_entry) {
166    referrer_url = nav_entry->referrer();
167    if (IsPublicUrl(referrer_url)) {
168      report_->set_referrer_url(referrer_url.spec());
169    }
170  }
171
172  // Add the nodes, starting from the page url.
173  AddUrl(page_url, GURL(), "", NULL);
174
175  // Add the resource_url and its original url, if non-empty and different.
176  if (!resource_.original_url.is_empty() &&
177      resource_.url != resource_.original_url) {
178    // Add original_url, as the parent of resource_url.
179    AddUrl(resource_.original_url, GURL(), "", NULL);
180    AddUrl(resource_.url, resource_.original_url, "", NULL);
181  } else {
182    AddUrl(resource_.url, GURL(), "", NULL);
183  }
184
185  // Add the redirect urls, if non-empty. The redirect urls do not include the
186  // original url, but include the unsafe url which is the last one of the
187  // redirect urls chain
188  GURL parent_url;
189  // Set the original url as the parent of the first redirect url if it's not
190  // empty.
191  if (!resource_.original_url.is_empty()) {
192    parent_url = resource_.original_url;
193  }
194  // Set the previous redirect url as the parent of the next one
195  for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
196    AddUrl(resource_.redirect_urls[i], parent_url, "", NULL);
197    parent_url = resource_.redirect_urls[i];
198  }
199
200  // Add the referrer url.
201  if (nav_entry && !referrer_url.is_empty()) {
202    AddUrl(referrer_url, GURL(), "", NULL);
203  }
204
205  // Get URLs of frames, scripts etc from the DOM.
206  // OnReceivedMalwareDOMDetails will be called when the renderer replies.
207  tab_contents()->render_view_host()->GetMalwareDOMDetails();
208}
209
210// When the renderer is done, this is called.
211void MalwareDetails::OnReceivedMalwareDOMDetails(
212    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
213  // Schedule this in IO thread, so it doesn't conflict with future users
214  // of our data structures (eg GetSerializedReport).
215  BrowserThread::PostTask(
216      BrowserThread::IO, FROM_HERE,
217      NewRunnableMethod(
218          this, &MalwareDetails::AddDOMDetails, params));
219}
220
221void MalwareDetails::AddDOMDetails(
222    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
223  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
224  DVLOG(1) << "Nodes from the DOM: " << params.size();
225
226  // If we have already started collecting data from the HTTP cache, don't
227  // modify our state.
228  if (cache_collector_->HasStarted())
229    return;
230
231  // Add the urls from the DOM to |resources_|.  The renderer could be
232  // sending bogus messages, so limit the number of nodes we accept.
233  for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
234    SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
235    DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
236    AddUrl(node.url, node.parent, node.tag_name, &(node.children));
237  }
238}
239
240// Called from the SB Service on the IO thread, after the user has
241// closed the tab, or clicked proceed or goback.  Since the user needs
242// to take an action, we expect this to be called after
243// OnReceivedMalwareDOMDetails in most cases. If not, we don't include
244// the DOM data in our report.
245void MalwareDetails::FinishCollection() {
246  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
247
248  cache_collector_->StartCacheCollection(
249      request_context_getter_,
250      &resources_,
251      &cache_result_,
252      NewRunnableMethod(this, &MalwareDetails::OnCacheCollectionReady));
253}
254
255void MalwareDetails::OnCacheCollectionReady() {
256  DVLOG(1) << "OnCacheCollectionReady.";
257  // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
258  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
259       it != resources_.end(); it++) {
260    ClientMalwareReportRequest::Resource* pb_resource =
261        report_->add_resources();
262    pb_resource->CopyFrom(*(it->second));
263  }
264
265  report_->set_complete(cache_result_);
266
267  // Send the report, using the SafeBrowsingService.
268  std::string serialized;
269  if (!report_->SerializeToString(&serialized)) {
270    DLOG(ERROR) << "Unable to serialize the malware report.";
271    return;
272  }
273
274  sb_service_->SendSerializedMalwareDetails(serialized);
275}
276