1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Implementation of the MalwareDetails class.
6
7#include "chrome/browser/safe_browsing/malware_details.h"
8
9#include "base/bind.h"
10#include "base/lazy_instance.h"
11#include "chrome/browser/profiles/profile.h"
12#include "chrome/browser/safe_browsing/malware_details_cache.h"
13#include "chrome/browser/safe_browsing/malware_details_history.h"
14#include "chrome/browser/safe_browsing/report.pb.h"
15#include "chrome/common/safe_browsing/safebrowsing_messages.h"
16#include "content/public/browser/browser_thread.h"
17#include "content/public/browser/navigation_controller.h"
18#include "content/public/browser/navigation_entry.h"
19#include "content/public/browser/render_view_host.h"
20#include "content/public/browser/web_contents.h"
21#include "net/url_request/url_request_context_getter.h"
22
23using content::BrowserThread;
24using content::NavigationEntry;
25using content::WebContents;
26using safe_browsing::ClientMalwareReportRequest;
27
28// Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
29static const uint32 kMaxDomNodes = 500;
30
31// static
32MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
33
34// The default MalwareDetailsFactory.  Global, made a singleton so we
35// don't leak it.
36class MalwareDetailsFactoryImpl : public MalwareDetailsFactory {
37 public:
38  virtual MalwareDetails* CreateMalwareDetails(
39      SafeBrowsingUIManager* ui_manager,
40      WebContents* web_contents,
41      const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
42    return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
43  }
44
45 private:
46  friend struct base::DefaultLazyInstanceTraits<MalwareDetailsFactoryImpl>;
47
48  MalwareDetailsFactoryImpl() {}
49
50  DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
51};
52
53static base::LazyInstance<MalwareDetailsFactoryImpl>
54    g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
55
56// Create a MalwareDetails for the given tab.
57/* static */
58MalwareDetails* MalwareDetails::NewMalwareDetails(
59    SafeBrowsingUIManager* ui_manager,
60    WebContents* web_contents,
61    const UnsafeResource& resource) {
62  // Set up the factory if this has not been done already (tests do that
63  // before this method is called).
64  if (!factory_)
65    factory_ = g_malware_details_factory_impl.Pointer();
66  return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
67}
68
69// Create a MalwareDetails for the given tab. Runs in the UI thread.
70MalwareDetails::MalwareDetails(
71    SafeBrowsingUIManager* ui_manager,
72    content::WebContents* web_contents,
73    const UnsafeResource& resource)
74    : content::WebContentsObserver(web_contents),
75      profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
76      request_context_getter_(profile_->GetRequestContext()),
77      ui_manager_(ui_manager),
78      resource_(resource),
79      cache_result_(false),
80      cache_collector_(new MalwareDetailsCacheCollector),
81      redirects_collector_(
82          new MalwareDetailsRedirectsCollector(profile_)) {
83  StartCollection();
84}
85
86MalwareDetails::~MalwareDetails() {
87}
88
89bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
90  bool handled = true;
91  IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
92    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
93                        OnReceivedMalwareDOMDetails)
94    IPC_MESSAGE_UNHANDLED(handled = false)
95  IPC_END_MESSAGE_MAP()
96  return handled;
97}
98
99bool MalwareDetails::IsPublicUrl(const GURL& url) const {
100  return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
101}
102
103// Looks for a Resource for the given url in resources_.  If found, it
104// updates |resource|. Otherwise, it creates a new message, adds it to
105// resources_ and updates |resource| to point to it.
106ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
107    const GURL& url) {
108  safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
109  if (it != resources_.end())
110    return it->second.get();
111
112  // Create the resource for |url|.
113  int id = resources_.size();
114  linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
115      new ClientMalwareReportRequest::Resource());
116  new_resource->set_url(url.spec());
117  new_resource->set_id(id);
118  resources_[url.spec()] = new_resource;
119  return new_resource.get();
120}
121
122void MalwareDetails::AddUrl(const GURL& url,
123                            const GURL& parent,
124                            const std::string& tagname,
125                            const std::vector<GURL>* children) {
126  if (!url.is_valid() || !IsPublicUrl(url))
127    return;
128
129  // Find (or create) the resource for the url.
130  ClientMalwareReportRequest::Resource* url_resource =
131      FindOrCreateResource(url);
132  if (!tagname.empty())
133    url_resource->set_tag_name(tagname);
134  if (!parent.is_empty() && IsPublicUrl(parent)) {
135    // Add the resource for the parent.
136    ClientMalwareReportRequest::Resource* parent_resource =
137        FindOrCreateResource(parent);
138    // Update the parent-child relation
139    url_resource->set_parent_id(parent_resource->id());
140  }
141  if (children) {
142    for (std::vector<GURL>::const_iterator it = children->begin();
143         it != children->end(); ++it) {
144      ClientMalwareReportRequest::Resource* child_resource =
145          FindOrCreateResource(*it);
146      url_resource->add_child_ids(child_resource->id());
147    }
148  }
149}
150
151void MalwareDetails::StartCollection() {
152  DVLOG(1) << "Starting to compute malware details.";
153  report_.reset(new ClientMalwareReportRequest());
154
155  if (IsPublicUrl(resource_.url))
156    report_->set_malware_url(resource_.url.spec());
157
158  GURL page_url = web_contents()->GetURL();
159  if (IsPublicUrl(page_url))
160    report_->set_page_url(page_url.spec());
161
162  GURL referrer_url;
163  NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
164  if (nav_entry) {
165    referrer_url = nav_entry->GetReferrer().url;
166    if (IsPublicUrl(referrer_url)) {
167      report_->set_referrer_url(referrer_url.spec());
168    }
169  }
170
171  // Add the nodes, starting from the page url.
172  AddUrl(page_url, GURL(), std::string(), NULL);
173
174  // Add the resource_url and its original url, if non-empty and different.
175  if (!resource_.original_url.is_empty() &&
176      resource_.url != resource_.original_url) {
177    // Add original_url, as the parent of resource_url.
178    AddUrl(resource_.original_url, GURL(), std::string(), NULL);
179    AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
180  } else {
181    AddUrl(resource_.url, GURL(), std::string(), NULL);
182  }
183
184  // Add the redirect urls, if non-empty. The redirect urls do not include the
185  // original url, but include the unsafe url which is the last one of the
186  // redirect urls chain
187  GURL parent_url;
188  // Set the original url as the parent of the first redirect url if it's not
189  // empty.
190  if (!resource_.original_url.is_empty())
191    parent_url = resource_.original_url;
192
193  // Set the previous redirect url as the parent of the next one
194  for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) {
195    AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
196    parent_url = resource_.redirect_urls[i];
197  }
198
199  // Add the referrer url.
200  if (nav_entry && !referrer_url.is_empty())
201    AddUrl(referrer_url, GURL(), std::string(), NULL);
202
203  // Get URLs of frames, scripts etc from the DOM.
204  // OnReceivedMalwareDOMDetails will be called when the renderer replies.
205  content::RenderViewHost* view = web_contents()->GetRenderViewHost();
206  view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
207}
208
209// When the renderer is done, this is called.
210void MalwareDetails::OnReceivedMalwareDOMDetails(
211    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
212  // Schedule this in IO thread, so it doesn't conflict with future users
213  // of our data structures (eg GetSerializedReport).
214  BrowserThread::PostTask(
215      BrowserThread::IO, FROM_HERE,
216      base::Bind(&MalwareDetails::AddDOMDetails, this, params));
217}
218
219void MalwareDetails::AddDOMDetails(
220    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
221  DCHECK_CURRENTLY_ON(BrowserThread::IO);
222  DVLOG(1) << "Nodes from the DOM: " << params.size();
223
224  // If we have already started getting redirects from history service,
225  // don't modify state, otherwise will invalidate the iterators.
226  if (redirects_collector_->HasStarted())
227    return;
228
229  // If we have already started collecting data from the HTTP cache, don't
230  // modify our state.
231  if (cache_collector_->HasStarted())
232    return;
233
234  // Add the urls from the DOM to |resources_|.  The renderer could be
235  // sending bogus messages, so limit the number of nodes we accept.
236  for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
237    SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
238    DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
239    AddUrl(node.url, node.parent, node.tag_name, &(node.children));
240  }
241}
242
243// Called from the SB Service on the IO thread, after the user has
244// closed the tab, or clicked proceed or goback.  Since the user needs
245// to take an action, we expect this to be called after
246// OnReceivedMalwareDOMDetails in most cases. If not, we don't include
247// the DOM data in our report.
248void MalwareDetails::FinishCollection() {
249  DCHECK_CURRENTLY_ON(BrowserThread::IO);
250
251  std::vector<GURL> urls;
252  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
253       it != resources_.end(); ++it) {
254    urls.push_back(GURL(it->first));
255  }
256  redirects_collector_->StartHistoryCollection(
257      urls,
258      base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
259}
260
261void MalwareDetails::OnRedirectionCollectionReady() {
262  DCHECK_CURRENTLY_ON(BrowserThread::IO);
263  const std::vector<safe_browsing::RedirectChain>& redirects =
264      redirects_collector_->GetCollectedUrls();
265
266  for (size_t i = 0; i < redirects.size(); ++i)
267    AddRedirectUrlList(redirects[i]);
268
269  // Call the cache collector
270  cache_collector_->StartCacheCollection(
271      request_context_getter_.get(),
272      &resources_,
273      &cache_result_,
274      base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
275}
276
277void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
278  DCHECK_CURRENTLY_ON(BrowserThread::IO);
279  for (size_t i = 0; i < urls.size() - 1; ++i) {
280    AddUrl(urls[i], urls[i + 1], std::string(), NULL);
281  }
282}
283
284void MalwareDetails::OnCacheCollectionReady() {
285  DVLOG(1) << "OnCacheCollectionReady.";
286  // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
287  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
288       it != resources_.end(); ++it) {
289    ClientMalwareReportRequest::Resource* pb_resource =
290        report_->add_resources();
291    pb_resource->CopyFrom(*(it->second));
292  }
293
294  report_->set_complete(cache_result_);
295
296  // Send the report, using the SafeBrowsingService.
297  std::string serialized;
298  if (!report_->SerializeToString(&serialized)) {
299    DLOG(ERROR) << "Unable to serialize the malware report.";
300    return;
301  }
302
303  ui_manager_->SendSerializedMalwareDetails(serialized);
304}
305