client_side_detection_host.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/safe_browsing/client_side_detection_host.h" 6 7#include <vector> 8 9#include "base/logging.h" 10#include "base/memory/ref_counted.h" 11#include "base/memory/scoped_ptr.h" 12#include "base/metrics/histogram.h" 13#include "base/prefs/pref_service.h" 14#include "base/sequenced_task_runner_helpers.h" 15#include "chrome/browser/browser_process.h" 16#include "chrome/browser/profiles/profile.h" 17#include "chrome/browser/safe_browsing/browser_feature_extractor.h" 18#include "chrome/browser/safe_browsing/client_side_detection_service.h" 19#include "chrome/browser/safe_browsing/database_manager.h" 20#include "chrome/browser/safe_browsing/safe_browsing_service.h" 21#include "chrome/common/chrome_switches.h" 22#include "chrome/common/chrome_version_info.h" 23#include "chrome/common/pref_names.h" 24#include "chrome/common/safe_browsing/csd.pb.h" 25#include "chrome/common/safe_browsing/safebrowsing_messages.h" 26#include "content/public/browser/browser_thread.h" 27#include "content/public/browser/navigation_controller.h" 28#include "content/public/browser/navigation_details.h" 29#include "content/public/browser/navigation_entry.h" 30#include "content/public/browser/notification_details.h" 31#include "content/public/browser/notification_source.h" 32#include "content/public/browser/notification_types.h" 33#include "content/public/browser/render_process_host.h" 34#include "content/public/browser/render_view_host.h" 35#include "content/public/browser/resource_request_details.h" 36#include "content/public/browser/web_contents.h" 37#include "content/public/common/frame_navigate_params.h" 38#include "googleurl/src/gurl.h" 39 40using content::BrowserThread; 41using content::NavigationEntry; 42using content::ResourceRequestDetails; 43using content::WebContents; 44 45namespace safe_browsing { 46 47const int ClientSideDetectionHost::kMaxHostsPerIP = 20; 48const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200; 49 50namespace { 51 52void EmptyUrlCheckCallback(bool processed) { 53 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 54} 55 56} // namespace 57 58// This class is instantiated each time a new toplevel URL loads, and 59// asynchronously checks whether the phishing classifier should run for this 60// URL. If so, it notifies the renderer with a StartPhishingDetection IPC. 61// Objects of this class are ref-counted and will be destroyed once nobody 62// uses it anymore. If |web_contents|, |csd_service| or |host| go away you need 63// to call Cancel(). We keep the |database_manager| alive in a ref pointer for 64// as long as it takes. 65class ClientSideDetectionHost::ShouldClassifyUrlRequest 66 : public base::RefCountedThreadSafe< 67 ClientSideDetectionHost::ShouldClassifyUrlRequest> { 68 public: 69 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params, 70 WebContents* web_contents, 71 ClientSideDetectionService* csd_service, 72 SafeBrowsingDatabaseManager* database_manager, 73 ClientSideDetectionHost* host) 74 : canceled_(false), 75 params_(params), 76 web_contents_(web_contents), 77 csd_service_(csd_service), 78 database_manager_(database_manager), 79 host_(host) { 80 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 81 DCHECK(web_contents_); 82 DCHECK(csd_service_); 83 DCHECK(database_manager_.get()); 84 DCHECK(host_); 85 } 86 87 void Start() { 88 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 89 90 // We start by doing some simple checks that can run on the UI thread. 91 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1); 92 93 // Only classify [X]HTML documents. 94 if (params_.contents_mime_type != "text/html" && 95 params_.contents_mime_type != "application/xhtml+xml") { 96 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 97 << " because it has an unsupported MIME type: " 98 << params_.contents_mime_type; 99 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 100 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 101 NO_CLASSIFY_MAX); 102 return; 103 } 104 105 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) { 106 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 107 << " because of hosting on private IP: " 108 << params_.socket_address.host(); 109 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 110 NO_CLASSIFY_PRIVATE_IP, 111 NO_CLASSIFY_MAX); 112 return; 113 } 114 115 // Don't run the phishing classifier if the tab is incognito. 116 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) { 117 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 118 << " because we're browsing incognito."; 119 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 120 NO_CLASSIFY_OFF_THE_RECORD, 121 NO_CLASSIFY_MAX); 122 123 return; 124 } 125 126 // We lookup the csd-whitelist before we lookup the cache because 127 // a URL may have recently been whitelisted. If the URL matches 128 // the csd-whitelist we won't start classification. The 129 // csd-whitelist check has to be done on the IO thread because it 130 // uses the SafeBrowsing service class. 131 BrowserThread::PostTask( 132 BrowserThread::IO, 133 FROM_HERE, 134 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist, 135 this, params_.url)); 136 } 137 138 void Cancel() { 139 canceled_ = true; 140 // Just to make sure we don't do anything stupid we reset all these 141 // pointers except for the safebrowsing service class which may be 142 // accessed by CheckCsdWhitelist(). 143 web_contents_ = NULL; 144 csd_service_ = NULL; 145 host_ = NULL; 146 } 147 148 private: 149 friend class base::RefCountedThreadSafe< 150 ClientSideDetectionHost::ShouldClassifyUrlRequest>; 151 152 // Enum used to keep stats about why the pre-classification check failed. 153 enum PreClassificationCheckFailures { 154 OBSOLETE_NO_CLASSIFY_PROXY_FETCH, 155 NO_CLASSIFY_PRIVATE_IP, 156 NO_CLASSIFY_OFF_THE_RECORD, 157 NO_CLASSIFY_MATCH_CSD_WHITELIST, 158 NO_CLASSIFY_TOO_MANY_REPORTS, 159 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 160 161 NO_CLASSIFY_MAX // Always add new values before this one. 162 }; 163 164 // The destructor can be called either from the UI or the IO thread. 165 virtual ~ShouldClassifyUrlRequest() { } 166 167 void CheckCsdWhitelist(const GURL& url) { 168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 169 if (!database_manager_.get() || 170 database_manager_->MatchCsdWhitelistUrl(url)) { 171 // We're done. There is no point in going back to the UI thread. 172 VLOG(1) << "Skipping phishing classification for URL: " << url 173 << " because it matches the csd whitelist"; 174 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 175 NO_CLASSIFY_MATCH_CSD_WHITELIST, 176 NO_CLASSIFY_MAX); 177 return; 178 } 179 180 BrowserThread::PostTask( 181 BrowserThread::UI, 182 FROM_HERE, 183 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this)); 184 } 185 186 void CheckCache() { 187 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 188 if (canceled_) { 189 return; 190 } 191 192 // If result is cached, we don't want to run classification again 193 bool is_phishing; 194 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) { 195 VLOG(1) << "Satisfying request for " << params_.url << " from cache"; 196 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); 197 // Since we are already on the UI thread, this is safe. 198 host_->MaybeShowPhishingWarning(params_.url, is_phishing); 199 return; 200 } 201 202 // We want to limit the number of requests, though we will ignore the 203 // limit for urls in the cache. We don't want to start classifying 204 // too many pages as phishing, but for those that we already think are 205 // phishing we want to give ourselves a chance to fix false positives. 206 if (csd_service_->IsInCache(params_.url)) { 207 VLOG(1) << "Reporting limit skipped for " << params_.url 208 << " as it was in the cache."; 209 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1); 210 } else if (csd_service_->OverPhishingReportLimit()) { 211 VLOG(1) << "Too many report phishing requests sent recently, " 212 << "not running classification for " << params_.url; 213 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 214 NO_CLASSIFY_TOO_MANY_REPORTS, 215 NO_CLASSIFY_MAX); 216 return; 217 } 218 219 // Everything checks out, so start classification. 220 // |web_contents_| is safe to call as we will be destructed 221 // before it is. 222 VLOG(1) << "Instruct renderer to start phishing detection for URL: " 223 << params_.url; 224 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost(); 225 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection( 226 rvh->GetRoutingID(), params_.url)); 227 } 228 229 // No need to protect |canceled_| with a lock because it is only read and 230 // written by the UI thread. 231 bool canceled_; 232 content::FrameNavigateParams params_; 233 WebContents* web_contents_; 234 ClientSideDetectionService* csd_service_; 235 // We keep a ref pointer here just to make sure the safe browsing 236 // database manager stays alive long enough. 237 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_; 238 ClientSideDetectionHost* host_; 239 240 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); 241}; 242 243// static 244ClientSideDetectionHost* ClientSideDetectionHost::Create( 245 WebContents* tab) { 246 return new ClientSideDetectionHost(tab); 247} 248 249ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab) 250 : content::WebContentsObserver(tab), 251 csd_service_(NULL), 252 weak_factory_(this), 253 unsafe_unique_page_id_(-1), 254 malware_report_enabled_(false) { 255 DCHECK(tab); 256 // Note: csd_service_ and sb_service will be NULL here in testing. 257 csd_service_ = g_browser_process->safe_browsing_detection_service(); 258 feature_extractor_.reset(new BrowserFeatureExtractor(tab, csd_service_)); 259 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED, 260 content::Source<WebContents>(tab)); 261 262 scoped_refptr<SafeBrowsingService> sb_service = 263 g_browser_process->safe_browsing_service(); 264 if (sb_service.get()) { 265 ui_manager_ = sb_service->ui_manager(); 266 database_manager_ = sb_service->database_manager(); 267 ui_manager_->AddObserver(this); 268 } 269 270 // Only enable the malware bad IP matching and report feature for canary 271 // and dev channel. 272 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 273 malware_report_enabled_ = ( 274 channel == chrome::VersionInfo::CHANNEL_DEV || 275 channel == chrome::VersionInfo::CHANNEL_CANARY); 276} 277 278ClientSideDetectionHost::~ClientSideDetectionHost() { 279 if (ui_manager_.get()) 280 ui_manager_->RemoveObserver(this); 281} 282 283bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) { 284 bool handled = true; 285 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message) 286 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone, 287 OnPhishingDetectionDone) 288 IPC_MESSAGE_UNHANDLED(handled = false) 289 IPC_END_MESSAGE_MAP() 290 return handled; 291} 292 293void ClientSideDetectionHost::DidNavigateMainFrame( 294 const content::LoadCommittedDetails& details, 295 const content::FrameNavigateParams& params) { 296 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests 297 // that don't call this method on the UI thread. 298 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 299 if (details.is_in_page) { 300 // If the navigation is within the same page, the user isn't really 301 // navigating away. We don't need to cancel a pending callback or 302 // begin a new classification. 303 return; 304 } 305 // If we navigate away and there currently is a pending phishing 306 // report request we have to cancel it to make sure we don't display 307 // an interstitial for the wrong page. Note that this won't cancel 308 // the server ping back but only cancel the showing of the 309 // interstial. 310 weak_factory_.InvalidateWeakPtrs(); 311 312 if (!csd_service_) { 313 return; 314 } 315 316 // Cancel any pending classification request. 317 if (classification_request_.get()) { 318 classification_request_->Cancel(); 319 } 320 browse_info_.reset(new BrowseInfo); 321 322 // Store redirect chain information. 323 if (params.url.host() != cur_host_) { 324 cur_host_ = params.url.host(); 325 cur_host_redirects_ = params.redirects; 326 } 327 browse_info_->host_redirects = cur_host_redirects_; 328 browse_info_->url_redirects = params.redirects; 329 browse_info_->http_status_code = details.http_status_code; 330 331 // Notify the renderer if it should classify this URL. 332 classification_request_ = new ShouldClassifyUrlRequest( 333 params, web_contents(), csd_service_, database_manager_.get(), this); 334 classification_request_->Start(); 335} 336 337void ClientSideDetectionHost::OnSafeBrowsingHit( 338 const SafeBrowsingUIManager::UnsafeResource& resource) { 339 // Check that this notification is really for us and that it corresponds to 340 // either a malware or phishing hit. In this case we store the unique page 341 // ID for later. 342 if (web_contents() && 343 web_contents()->GetRenderProcessHost()->GetID() == 344 resource.render_process_host_id && 345 web_contents()->GetRenderViewHost()->GetRoutingID() == 346 resource.render_view_id && 347 (resource.threat_type == SB_THREAT_TYPE_URL_PHISHING || 348 resource.threat_type == SB_THREAT_TYPE_URL_MALWARE) && 349 web_contents()->GetController().GetActiveEntry()) { 350 unsafe_unique_page_id_ = 351 web_contents()->GetController().GetActiveEntry()->GetUniqueID(); 352 // We also keep the resource around in order to be able to send the 353 // malicious URL to the server. 354 unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource)); 355 unsafe_resource_->callback.Reset(); // Don't do anything stupid. 356 } 357} 358 359void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) { 360 DCHECK(tab); 361 // Tell any pending classification request that it is being canceled. 362 if (classification_request_.get()) { 363 classification_request_->Cancel(); 364 } 365 // Cancel all pending feature extractions. 366 feature_extractor_.reset(); 367} 368 369void ClientSideDetectionHost::OnPhishingDetectionDone( 370 const std::string& verdict_str) { 371 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 372 // There is something seriously wrong if there is no service class but 373 // this method is called. The renderer should not start phishing detection 374 // if there isn't any service class in the browser. 375 DCHECK(csd_service_); 376 // There shouldn't be any pending requests because we revoke them everytime 377 // we navigate away. 378 DCHECK(!weak_factory_.HasWeakPtrs()); 379 DCHECK(browse_info_.get()); 380 381 // We parse the protocol buffer here. If we're unable to parse it we won't 382 // send the verdict further. 383 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); 384 if (csd_service_ && 385 !weak_factory_.HasWeakPtrs() && 386 browse_info_.get() && 387 verdict->ParseFromString(verdict_str) && 388 verdict->IsInitialized()) { 389 if (malware_report_enabled_) { 390 scoped_ptr<ClientMalwareRequest> malware_verdict( 391 new ClientMalwareRequest); 392 // Start browser-side malware feature extraction. Once we're done it will 393 // send the malware client verdict request. 394 malware_verdict->set_url(verdict->url()); 395 feature_extractor_->ExtractMalwareFeatures( 396 browse_info_.get(), 397 malware_verdict.get()); 398 MalwareFeatureExtractionDone(malware_verdict.Pass()); 399 } 400 401 // We only send phishing verdict to the server if the verdict is phishing or 402 // if a SafeBrowsing interstitial was already shown for this site. E.g., a 403 // malware or phishing interstitial was shown but the user clicked 404 // through. 405 if (verdict->is_phishing() || DidShowSBInterstitial()) { 406 if (DidShowSBInterstitial()) { 407 browse_info_->unsafe_resource.reset(unsafe_resource_.release()); 408 } 409 // Start browser-side feature extraction. Once we're done it will send 410 // the client verdict request. 411 feature_extractor_->ExtractFeatures( 412 browse_info_.get(), 413 verdict.release(), 414 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone, 415 weak_factory_.GetWeakPtr())); 416 } 417 } 418 browse_info_.reset(); 419} 420 421void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, 422 bool is_phishing) { 423 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 424 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url 425 << " is_phishing:" << is_phishing; 426 if (is_phishing) { 427 DCHECK(web_contents()); 428 if (ui_manager_.get()) { 429 SafeBrowsingUIManager::UnsafeResource resource; 430 resource.url = phishing_url; 431 resource.original_url = phishing_url; 432 resource.is_subresource = false; 433 resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL; 434 resource.render_process_host_id = 435 web_contents()->GetRenderProcessHost()->GetID(); 436 resource.render_view_id = 437 web_contents()->GetRenderViewHost()->GetRoutingID(); 438 if (!ui_manager_->IsWhitelisted(resource)) { 439 // We need to stop any pending navigations, otherwise the interstital 440 // might not get created properly. 441 web_contents()->GetController().DiscardNonCommittedEntries(); 442 resource.callback = base::Bind(&EmptyUrlCheckCallback); 443 ui_manager_->DoDisplayBlockingPage(resource); 444 } 445 } 446 } 447} 448 449void ClientSideDetectionHost::FeatureExtractionDone( 450 bool success, 451 ClientPhishingRequest* request) { 452 if (!request) { 453 DLOG(FATAL) << "Invalid request object in FeatureExtractionDone"; 454 return; 455 } 456 VLOG(2) << "Feature extraction done (success:" << success << ") for URL: " 457 << request->url() << ". Start sending client phishing request."; 458 ClientSideDetectionService::ClientReportPhishingRequestCallback callback; 459 // If the client-side verdict isn't phishing we don't care about the server 460 // response because we aren't going to display a warning. 461 if (request->is_phishing()) { 462 callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning, 463 weak_factory_.GetWeakPtr()); 464 } 465 // Send ping even if the browser feature extraction failed. 466 csd_service_->SendClientReportPhishingRequest( 467 request, // The service takes ownership of the request object. 468 callback); 469} 470 471void ClientSideDetectionHost::MalwareFeatureExtractionDone( 472 scoped_ptr<ClientMalwareRequest> request) { 473 if (!request) { 474 DLOG(FATAL) << "Invalid request object in MalwareFeatureExtractionDone"; 475 return; 476 } 477 VLOG(2) << "Malware Feature extraction done for URL: " << request->url() 478 << ", with features count:" << request->feature_map_size(); 479 480 // Send ping if there is matching features. 481 if (request->feature_map_size() > 0) { 482 VLOG(1) << "Start sending client malware request."; 483 ClientSideDetectionService::ClientReportMalwareRequestCallback callback; 484 csd_service_->SendClientReportMalwareRequest( 485 request.release(), // The service takes ownership of the request object 486 callback); // no action after request sent for now 487 } 488} 489 490void ClientSideDetectionHost::UpdateIPHostMap(const std::string& ip, 491 const std::string& host) { 492 if (ip.empty() || host.empty()) 493 return; 494 495 IPHostMap::iterator it = browse_info_->ips.find(ip); 496 if (it == browse_info_->ips.end()) { 497 if (int(browse_info_->ips.size()) < kMaxIPsPerBrowse) { 498 std::set<std::string> hosts; 499 hosts.insert(host); 500 browse_info_->ips.insert(make_pair(ip, hosts)); 501 } 502 } else if (int(it->second.size()) < kMaxHostsPerIP) { 503 it->second.insert(host); 504 } 505} 506 507void ClientSideDetectionHost::Observe( 508 int type, 509 const content::NotificationSource& source, 510 const content::NotificationDetails& details) { 511 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 512 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED); 513 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>( 514 details).ptr(); 515 if (req && browse_info_.get()) { 516 UpdateIPHostMap(req->socket_address.host() /* ip */, 517 req->url.host() /* url host */); 518 } 519} 520 521bool ClientSideDetectionHost::DidShowSBInterstitial() { 522 if (unsafe_unique_page_id_ <= 0 || !web_contents()) { 523 return false; 524 } 525 const NavigationEntry* nav_entry = 526 web_contents()->GetController().GetActiveEntry(); 527 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_); 528} 529 530void ClientSideDetectionHost::set_client_side_detection_service( 531 ClientSideDetectionService* service) { 532 csd_service_ = service; 533} 534 535void ClientSideDetectionHost::set_safe_browsing_managers( 536 SafeBrowsingUIManager* ui_manager, 537 SafeBrowsingDatabaseManager* database_manager) { 538 if (ui_manager_.get()) 539 ui_manager_->RemoveObserver(this); 540 541 ui_manager_ = ui_manager; 542 if (ui_manager) 543 ui_manager_->AddObserver(this); 544 545 database_manager_ = database_manager; 546} 547 548} // namespace safe_browsing 549