client_side_detection_service.cc revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/safe_browsing/client_side_detection_service.h" 6 7#include "base/command_line.h" 8#include "base/file_path.h" 9#include "base/file_util_proxy.h" 10#include "base/logging.h" 11#include "base/message_loop.h" 12#include "base/metrics/histogram.h" 13#include "base/platform_file.h" 14#include "base/scoped_ptr.h" 15#include "base/stl_util-inl.h" 16#include "base/task.h" 17#include "base/time.h" 18#include "chrome/browser/safe_browsing/csd.pb.h" 19#include "chrome/common/net/http_return.h" 20#include "chrome/common/net/url_fetcher.h" 21#include "chrome/common/net/url_request_context_getter.h" 22#include "content/browser/browser_thread.h" 23#include "googleurl/src/gurl.h" 24#include "net/base/load_flags.h" 25#include "net/url_request/url_request_status.h" 26 27namespace safe_browsing { 28 29const int ClientSideDetectionService::kMaxReportsPerInterval = 3; 30 31const base::TimeDelta ClientSideDetectionService::kReportsInterval = 32 base::TimeDelta::FromDays(1); 33const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval = 34 base::TimeDelta::FromDays(1); 35const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = 36 base::TimeDelta::FromMinutes(30); 37 38const char ClientSideDetectionService::kClientReportPhishingUrl[] = 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; 40const char ClientSideDetectionService::kClientModelUrl[] = 41 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; 42 43struct ClientSideDetectionService::ClientReportInfo { 44 scoped_ptr<ClientReportPhishingRequestCallback> callback; 45 GURL phishing_url; 46}; 47 48ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) 49 : is_phishing(phish), 50 timestamp(time) {} 51 52ClientSideDetectionService::ClientSideDetectionService( 53 const FilePath& model_path, 54 URLRequestContextGetter* request_context_getter) 55 : model_path_(model_path), 56 model_status_(UNKNOWN_STATUS), 57 model_file_(base::kInvalidPlatformFileValue), 58 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), 59 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), 60 request_context_getter_(request_context_getter) {} 61 62ClientSideDetectionService::~ClientSideDetectionService() { 63 method_factory_.RevokeAll(); 64 STLDeleteContainerPairPointers(client_phishing_reports_.begin(), 65 client_phishing_reports_.end()); 66 client_phishing_reports_.clear(); 67 STLDeleteElements(&open_callbacks_); 68 CloseModelFile(); 69} 70 71/* static */ 72ClientSideDetectionService* ClientSideDetectionService::Create( 73 const FilePath& model_path, 74 URLRequestContextGetter* request_context_getter) { 75 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 76 scoped_ptr<ClientSideDetectionService> service( 77 new ClientSideDetectionService(model_path, request_context_getter)); 78 if (!service->InitializePrivateNetworks()) { 79 UMA_HISTOGRAM_COUNTS("SBClientPhishing.InitPrivateNetworksFailed", 1); 80 return NULL; 81 } 82 83 // We try to open the model file right away and start fetching it if 84 // it does not already exist on disk. 85 base::FileUtilProxy::CreateOrOpenCallback* cb = 86 service.get()->callback_factory_.NewCallback( 87 &ClientSideDetectionService::OpenModelFileDone); 88 if (!base::FileUtilProxy::CreateOrOpen( 89 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 90 model_path, 91 base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ, 92 cb)) { 93 delete cb; 94 return NULL; 95 } 96 return service.release(); 97} 98 99void ClientSideDetectionService::GetModelFile(OpenModelDoneCallback* callback) { 100 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 101 MessageLoop::current()->PostTask( 102 FROM_HERE, 103 method_factory_.NewRunnableMethod( 104 &ClientSideDetectionService::StartGetModelFile, callback)); 105} 106 107void ClientSideDetectionService::SendClientReportPhishingRequest( 108 const GURL& phishing_url, 109 double score, 110 ClientReportPhishingRequestCallback* callback) { 111 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 112 MessageLoop::current()->PostTask( 113 FROM_HERE, 114 method_factory_.NewRunnableMethod( 115 &ClientSideDetectionService::StartClientReportPhishingRequest, 116 phishing_url, score, callback)); 117} 118 119bool ClientSideDetectionService::IsPrivateIPAddress( 120 const std::string& ip_address) const { 121 net::IPAddressNumber ip_number; 122 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) { 123 DLOG(WARNING) << "Unable to parse IP address: " << ip_address; 124 // Err on the side of safety and assume this might be private. 125 return true; 126 } 127 128 for (std::vector<AddressRange>::const_iterator it = 129 private_networks_.begin(); 130 it != private_networks_.end(); ++it) { 131 if (net::IPNumberMatchesPrefix(ip_number, it->first, it->second)) { 132 return true; 133 } 134 } 135 return false; 136} 137 138void ClientSideDetectionService::OnURLFetchComplete( 139 const URLFetcher* source, 140 const GURL& url, 141 const net::URLRequestStatus& status, 142 int response_code, 143 const ResponseCookies& cookies, 144 const std::string& data) { 145 if (source == model_fetcher_.get()) { 146 HandleModelResponse(source, url, status, response_code, cookies, data); 147 } else if (client_phishing_reports_.find(source) != 148 client_phishing_reports_.end()) { 149 HandlePhishingVerdict(source, url, status, response_code, cookies, data); 150 } else { 151 NOTREACHED(); 152 } 153} 154 155void ClientSideDetectionService::SetModelStatus(ModelStatus status) { 156 DCHECK_NE(READY_STATUS, model_status_); 157 model_status_ = status; 158 if (READY_STATUS == status || ERROR_STATUS == status) { 159 for (size_t i = 0; i < open_callbacks_.size(); ++i) { 160 open_callbacks_[i]->Run(model_file_); 161 } 162 STLDeleteElements(&open_callbacks_); 163 } else { 164 NOTREACHED(); 165 } 166} 167 168void ClientSideDetectionService::OpenModelFileDone( 169 base::PlatformFileError error_code, 170 base::PassPlatformFile file, 171 bool created) { 172 DCHECK(!created); 173 if (base::PLATFORM_FILE_OK == error_code) { 174 // The model file already exists. There is no need to fetch the model. 175 model_file_ = file.ReleaseValue(); 176 SetModelStatus(READY_STATUS); 177 } else if (base::PLATFORM_FILE_ERROR_NOT_FOUND == error_code) { 178 // We need to fetch the model since it does not exist yet. 179 model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */, 180 GURL(kClientModelUrl), 181 URLFetcher::GET, 182 this)); 183 model_fetcher_->set_request_context(request_context_getter_.get()); 184 model_fetcher_->Start(); 185 } else { 186 // It is not clear what we should do in this case. For now we simply fail. 187 // Hopefully, we'll be able to read the model during the next browser 188 // restart. 189 SetModelStatus(ERROR_STATUS); 190 } 191} 192 193void ClientSideDetectionService::CreateModelFileDone( 194 base::PlatformFileError error_code, 195 base::PassPlatformFile file, 196 bool created) { 197 model_file_ = file.ReleaseValue(); 198 base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback( 199 &ClientSideDetectionService::WriteModelFileDone); 200 if (!created || 201 base::PLATFORM_FILE_OK != error_code || 202 !base::FileUtilProxy::Write( 203 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 204 model_file_, 205 0 /* offset */, tmp_model_string_->data(), tmp_model_string_->size(), 206 cb)) { 207 delete cb; 208 // An error occurred somewhere. We close the model file if necessary and 209 // then run all the pending callbacks giving them an invalid model file. 210 CloseModelFile(); 211 SetModelStatus(ERROR_STATUS); 212 } 213} 214 215void ClientSideDetectionService::WriteModelFileDone( 216 base::PlatformFileError error_code, 217 int bytes_written) { 218 if (base::PLATFORM_FILE_OK == error_code) { 219 SetModelStatus(READY_STATUS); 220 } else { 221 // TODO(noelutz): maybe we should retry writing the model since we 222 // did already fetch the model? 223 CloseModelFile(); 224 SetModelStatus(ERROR_STATUS); 225 } 226 // Delete the model string that we kept around while we were writing the 227 // string to disk - we don't need it anymore. 228 tmp_model_string_.reset(); 229} 230 231void ClientSideDetectionService::CloseModelFile() { 232 if (model_file_ != base::kInvalidPlatformFileValue) { 233 base::FileUtilProxy::Close( 234 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 235 model_file_, 236 NULL); 237 } 238 model_file_ = base::kInvalidPlatformFileValue; 239} 240 241void ClientSideDetectionService::StartGetModelFile( 242 OpenModelDoneCallback* callback) { 243 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 244 if (UNKNOWN_STATUS == model_status_) { 245 // Store the callback which will be called once we know the status of the 246 // model file. 247 open_callbacks_.push_back(callback); 248 } else { 249 // The model is either in READY or ERROR state which means we can 250 // call the callback right away. 251 callback->Run(model_file_); 252 delete callback; 253 } 254} 255 256void ClientSideDetectionService::StartClientReportPhishingRequest( 257 const GURL& phishing_url, 258 double score, 259 ClientReportPhishingRequestCallback* callback) { 260 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 261 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); 262 263 bool is_phishing; 264 if (GetCachedResult(phishing_url, &is_phishing)) { 265 VLOG(1) << "Satisfying request for " << phishing_url << " from cache"; 266 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); 267 cb->Run(phishing_url, is_phishing); 268 return; 269 } 270 271 // We limit the number of distinct pings to kMaxReports, but we don't count 272 // urls already in the cache against this number. We don't want to start 273 // classifying too many pages as phishing, but for those that we already 274 // think are phishing we want to give ourselves a chance to fix false 275 // positives. 276 if (cache_.find(phishing_url) != cache_.end()) { 277 VLOG(1) << "Refreshing cache for " << phishing_url; 278 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1); 279 } else if (GetNumReports() > kMaxReportsPerInterval) { 280 VLOG(1) << "Too many report phishing requests sent in the last " 281 << kReportsInterval.InHours() << " hours, not checking " 282 << phishing_url; 283 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); 284 cb->Run(phishing_url, false); 285 return; 286 } 287 288 ClientPhishingRequest request; 289 request.set_url(phishing_url.spec()); 290 request.set_client_score(static_cast<float>(score)); 291 std::string request_data; 292 if (!request.SerializeToString(&request_data)) { 293 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); 294 VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; 295 cb->Run(phishing_url, false); 296 return; 297 } 298 299 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, 300 GURL(kClientReportPhishingUrl), 301 URLFetcher::POST, 302 this); 303 304 // Remember which callback and URL correspond to the current fetcher object. 305 ClientReportInfo* info = new ClientReportInfo; 306 info->callback.swap(cb); // takes ownership of the callback. 307 info->phishing_url = phishing_url; 308 client_phishing_reports_[fetcher] = info; 309 310 fetcher->set_load_flags(net::LOAD_DISABLE_CACHE); 311 fetcher->set_request_context(request_context_getter_.get()); 312 fetcher->set_upload_data("application/octet-stream", request_data); 313 fetcher->Start(); 314 315 // Record that we made a request 316 phishing_report_times_.push(base::Time::Now()); 317} 318 319void ClientSideDetectionService::HandleModelResponse( 320 const URLFetcher* source, 321 const GURL& url, 322 const net::URLRequestStatus& status, 323 int response_code, 324 const ResponseCookies& cookies, 325 const std::string& data) { 326 if (status.is_success() && RC_REQUEST_OK == response_code) { 327 // Copy the model because it has to be accessible after this function 328 // returns. Once we have written the model to a file we will delete the 329 // temporary model string. TODO(noelutz): don't store the model to disk if 330 // it's invalid. 331 tmp_model_string_.reset(new std::string(data)); 332 base::FileUtilProxy::CreateOrOpenCallback* cb = 333 callback_factory_.NewCallback( 334 &ClientSideDetectionService::CreateModelFileDone); 335 if (!base::FileUtilProxy::CreateOrOpen( 336 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 337 model_path_, 338 base::PLATFORM_FILE_CREATE_ALWAYS | 339 base::PLATFORM_FILE_WRITE | 340 base::PLATFORM_FILE_READ, 341 cb)) { 342 delete cb; 343 SetModelStatus(ERROR_STATUS); 344 } 345 } else { 346 SetModelStatus(ERROR_STATUS); 347 } 348} 349 350void ClientSideDetectionService::HandlePhishingVerdict( 351 const URLFetcher* source, 352 const GURL& url, 353 const net::URLRequestStatus& status, 354 int response_code, 355 const ResponseCookies& cookies, 356 const std::string& data) { 357 ClientPhishingResponse response; 358 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); 359 if (status.is_success() && RC_REQUEST_OK == response_code && 360 response.ParseFromString(data)) { 361 // Cache response, possibly flushing an old one. 362 cache_[info->phishing_url] = 363 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); 364 info->callback->Run(info->phishing_url, response.phishy()); 365 } else { 366 DLOG(ERROR) << "Unable to get the server verdict for URL: " 367 << info->phishing_url << " status: " << status.status() << " " 368 << "response_code:" << response_code; 369 info->callback->Run(info->phishing_url, false); 370 } 371 client_phishing_reports_.erase(source); 372 delete source; 373} 374 375bool ClientSideDetectionService::GetCachedResult(const GURL& url, 376 bool* is_phishing) { 377 UpdateCache(); 378 379 PhishingCache::iterator it = cache_.find(url); 380 if (it == cache_.end()) { 381 return false; 382 } 383 384 // We still need to check if the result is valid. 385 const CacheState& cache_state = *it->second; 386 if (cache_state.is_phishing ? 387 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval : 388 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) { 389 *is_phishing = cache_state.is_phishing; 390 return true; 391 } 392 return false; 393} 394 395void ClientSideDetectionService::UpdateCache() { 396 // Since we limit the number of requests but allow pass-through for cache 397 // refreshes, we don't want to remove elements from the cache if they 398 // could be used for this purpose even if we will not use the entry to 399 // satisfy the request from the cache. 400 base::TimeDelta positive_cache_interval = 401 std::max(kPositiveCacheInterval, kReportsInterval); 402 base::TimeDelta negative_cache_interval = 403 std::max(kNegativeCacheInterval, kReportsInterval); 404 405 // Remove elements from the cache that will no longer be used. 406 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) { 407 const CacheState& cache_state = *it->second; 408 if (cache_state.is_phishing ? 409 cache_state.timestamp > base::Time::Now() - positive_cache_interval : 410 cache_state.timestamp > base::Time::Now() - negative_cache_interval) { 411 ++it; 412 } else { 413 cache_.erase(it++); 414 } 415 } 416} 417 418int ClientSideDetectionService::GetNumReports() { 419 base::Time cutoff = base::Time::Now() - kReportsInterval; 420 421 // Erase items older than cutoff because we will never care about them again. 422 while (!phishing_report_times_.empty() && 423 phishing_report_times_.front() < cutoff) { 424 phishing_report_times_.pop(); 425 } 426 427 // Return the number of elements that are above the cutoff. 428 return phishing_report_times_.size(); 429} 430 431bool ClientSideDetectionService::InitializePrivateNetworks() { 432 static const char* const kPrivateNetworks[] = { 433 "10.0.0.0/8", 434 "127.0.0.0/8", 435 "172.16.0.0/12", 436 "192.168.0.0/16", 437 // IPv6 address ranges 438 "fc00::/7", 439 "fec0::/10", 440 "::1/128", 441 }; 442 443 for (size_t i = 0; i < arraysize(kPrivateNetworks); ++i) { 444 net::IPAddressNumber ip_number; 445 size_t prefix_length; 446 if (net::ParseCIDRBlock(kPrivateNetworks[i], &ip_number, &prefix_length)) { 447 private_networks_.push_back(std::make_pair(ip_number, prefix_length)); 448 } else { 449 DLOG(FATAL) << "Unable to parse IP address range: " 450 << kPrivateNetworks[i]; 451 return false; 452 } 453 } 454 return true; 455} 456 457} // namespace safe_browsing 458