15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Implementation of the MalwareDetails class.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/malware_details.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/lazy_instance.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/profiles/profile.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/malware_details_cache.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/malware_details_history.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/report.pb.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/safebrowsing_messages.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/browser_thread.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/navigation_controller.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/navigation_entry.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/render_view_host.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/web_contents.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/url_request/url_request_context_getter.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::BrowserThread;
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::NavigationEntry;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::WebContents;
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using safe_browsing::ClientMalwareReportRequest;
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const uint32 kMaxDomNodes = 500;
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The default MalwareDetailsFactory.  Global, made a singleton so we
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// don't leak it.
36116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass MalwareDetailsFactoryImpl : public MalwareDetailsFactory {
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  virtual MalwareDetails* CreateMalwareDetails(
392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SafeBrowsingUIManager* ui_manager,
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WebContents* web_contents,
412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
46116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  friend struct base::DefaultLazyInstanceTraits<MalwareDetailsFactoryImpl>;
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  MalwareDetailsFactoryImpl() {}
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static base::LazyInstance<MalwareDetailsFactoryImpl>
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Create a MalwareDetails for the given tab.
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* static */
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MalwareDetails* MalwareDetails::NewMalwareDetails(
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SafeBrowsingUIManager* ui_manager,
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WebContents* web_contents,
612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const UnsafeResource& resource) {
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set up the factory if this has not been done already (tests do that
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // before this method is called).
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!factory_)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    factory_ = g_malware_details_factory_impl.Pointer();
662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Create a MalwareDetails for the given tab. Runs in the UI thread.
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MalwareDetails::MalwareDetails(
712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SafeBrowsingUIManager* ui_manager,
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    content::WebContents* web_contents,
732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const UnsafeResource& resource)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : content::WebContentsObserver(web_contents),
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      request_context_getter_(profile_->GetRequestContext()),
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      ui_manager_(ui_manager),
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource_(resource),
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      cache_result_(false),
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      cache_collector_(new MalwareDetailsCacheCollector),
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      redirects_collector_(
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          new MalwareDetailsRedirectsCollector(profile_)) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartCollection();
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MalwareDetails::~MalwareDetails() {
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool handled = true;
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        OnReceivedMalwareDOMDetails)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    IPC_MESSAGE_UNHANDLED(handled = false)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IPC_END_MESSAGE_MAP()
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return handled;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool MalwareDetails::IsPublicUrl(const GURL& url) const {
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Looks for a Resource for the given url in resources_.  If found, it
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// updates |resource|. Otherwise, it creates a new message, adds it to
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// resources_ and updates |resource| to point to it.
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const GURL& url) {
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
109116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  if (it != resources_.end())
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return it->second.get();
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Create the resource for |url|.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int id = resources_.size();
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      new ClientMalwareReportRequest::Resource());
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  new_resource->set_url(url.spec());
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  new_resource->set_id(id);
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  resources_[url.spec()] = new_resource;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return new_resource.get();
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::AddUrl(const GURL& url,
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const GURL& parent,
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const std::string& tagname,
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const std::vector<GURL>* children) {
1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!url.is_valid() || !IsPublicUrl(url))
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Find (or create) the resource for the url.
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientMalwareReportRequest::Resource* url_resource =
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FindOrCreateResource(url);
132116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  if (!tagname.empty())
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_resource->set_tag_name(tagname);
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!parent.is_empty() && IsPublicUrl(parent)) {
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Add the resource for the parent.
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ClientMalwareReportRequest::Resource* parent_resource =
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        FindOrCreateResource(parent);
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Update the parent-child relation
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_resource->set_parent_id(parent_resource->id());
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (children) {
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (std::vector<GURL>::const_iterator it = children->begin();
143116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch         it != children->end(); ++it) {
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ClientMalwareReportRequest::Resource* child_resource =
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          FindOrCreateResource(*it);
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_resource->add_child_ids(child_resource->id());
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::StartCollection() {
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DVLOG(1) << "Starting to compute malware details.";
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  report_.reset(new ClientMalwareReportRequest());
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  if (IsPublicUrl(resource_.url))
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    report_->set_malware_url(resource_.url.spec());
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL page_url = web_contents()->GetURL();
159116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  if (IsPublicUrl(page_url))
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    report_->set_page_url(page_url.spec());
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL referrer_url;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nav_entry) {
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    referrer_url = nav_entry->GetReferrer().url;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (IsPublicUrl(referrer_url)) {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      report_->set_referrer_url(referrer_url.spec());
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add the nodes, starting from the page url.
172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  AddUrl(page_url, GURL(), std::string(), NULL);
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add the resource_url and its original url, if non-empty and different.
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!resource_.original_url.is_empty() &&
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource_.url != resource_.original_url) {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Add original_url, as the parent of resource_url.
178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    AddUrl(resource_.original_url, GURL(), std::string(), NULL);
179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    AddUrl(resource_.url, GURL(), std::string(), NULL);
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add the redirect urls, if non-empty. The redirect urls do not include the
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // original url, but include the unsafe url which is the last one of the
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // redirect urls chain
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL parent_url;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set the original url as the parent of the first redirect url if it's not
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // empty.
190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  if (!resource_.original_url.is_empty())
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    parent_url = resource_.original_url;
192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set the previous redirect url as the parent of the next one
194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) {
195c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    parent_url = resource_.redirect_urls[i];
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add the referrer url.
200116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  if (nav_entry && !referrer_url.is_empty())
201c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    AddUrl(referrer_url, GURL(), std::string(), NULL);
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Get URLs of frames, scripts etc from the DOM.
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // OnReceivedMalwareDOMDetails will be called when the renderer replies.
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  content::RenderViewHost* view = web_contents()->GetRenderViewHost();
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// When the renderer is done, this is called.
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::OnReceivedMalwareDOMDetails(
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Schedule this in IO thread, so it doesn't conflict with future users
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // of our data structures (eg GetSerializedReport).
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BrowserThread::PostTask(
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      BrowserThread::IO, FROM_HERE,
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&MalwareDetails::AddDOMDetails, this, params));
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::AddDOMDetails(
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
221116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  DCHECK_CURRENTLY_ON(BrowserThread::IO);
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DVLOG(1) << "Nodes from the DOM: " << params.size();
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we have already started getting redirects from history service,
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // don't modify state, otherwise will invalidate the iterators.
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (redirects_collector_->HasStarted())
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we have already started collecting data from the HTTP cache, don't
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // modify our state.
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (cache_collector_->HasStarted())
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add the urls from the DOM to |resources_|.  The renderer could be
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sending bogus messages, so limit the number of nodes we accept.
236116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddUrl(node.url, node.parent, node.tag_name, &(node.children));
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Called from the SB Service on the IO thread, after the user has
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// closed the tab, or clicked proceed or goback.  Since the user needs
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to take an action, we expect this to be called after
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OnReceivedMalwareDOMDetails in most cases. If not, we don't include
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the DOM data in our report.
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::FinishCollection() {
249116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  DCHECK_CURRENTLY_ON(BrowserThread::IO);
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<GURL> urls;
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
253116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch       it != resources_.end(); ++it) {
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    urls.push_back(GURL(it->first));
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirects_collector_->StartHistoryCollection(
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      urls,
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::OnRedirectionCollectionReady() {
262116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  DCHECK_CURRENTLY_ON(BrowserThread::IO);
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::vector<safe_browsing::RedirectChain>& redirects =
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      redirects_collector_->GetCollectedUrls();
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < redirects.size(); ++i)
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddRedirectUrlList(redirects[i]);
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Call the cache collector
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cache_collector_->StartCacheCollection(
271868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      request_context_getter_.get(),
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      &resources_,
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      &cache_result_,
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
278116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  DCHECK_CURRENTLY_ON(BrowserThread::IO);
279116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  for (size_t i = 0; i < urls.size() - 1; ++i) {
280c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    AddUrl(urls[i], urls[i + 1], std::string(), NULL);
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void MalwareDetails::OnCacheCollectionReady() {
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DVLOG(1) << "OnCacheCollectionReady.";
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
288116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch       it != resources_.end(); ++it) {
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ClientMalwareReportRequest::Resource* pb_resource =
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        report_->add_resources();
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pb_resource->CopyFrom(*(it->second));
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  report_->set_complete(cache_result_);
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Send the report, using the SafeBrowsingService.
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string serialized;
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!report_->SerializeToString(&serialized)) {
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DLOG(ERROR) << "Unable to serialize the malware report.";
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ui_manager_->SendSerializedMalwareDetails(serialized);
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
305