1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/safe_browsing/client_side_detection_service.h" 6 7#include "base/command_line.h" 8#include "base/file_path.h" 9#include "base/file_util_proxy.h" 10#include "base/logging.h" 11#include "base/memory/scoped_ptr.h" 12#include "base/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/platform_file.h" 15#include "base/stl_util-inl.h" 16#include "base/task.h" 17#include "base/time.h" 18#include "chrome/common/net/http_return.h" 19#include "chrome/common/net/url_fetcher.h" 20#include "chrome/common/safe_browsing/csd.pb.h" 21#include "content/browser/browser_thread.h" 22#include "googleurl/src/gurl.h" 23#include "net/base/load_flags.h" 24#include "net/url_request/url_request_context_getter.h" 25#include "net/url_request/url_request_status.h" 26 27namespace safe_browsing { 28 29const int ClientSideDetectionService::kMaxReportsPerInterval = 3; 30 31const base::TimeDelta ClientSideDetectionService::kReportsInterval = 32 base::TimeDelta::FromDays(1); 33const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval = 34 base::TimeDelta::FromDays(1); 35const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = 36 base::TimeDelta::FromMinutes(30); 37 38const char ClientSideDetectionService::kClientReportPhishingUrl[] = 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; 40// Note: when updatng the model version, don't forget to change the filename 41// in chrome/common/chrome_constants.cc as well, or else existing users won't 42// download the new model. 43// 44// TODO(bryner): add version metadata so that clients can download new models 45// without needing a new model filename. 46const char ClientSideDetectionService::kClientModelUrl[] = 47 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v1.pb"; 48 49struct ClientSideDetectionService::ClientReportInfo { 50 scoped_ptr<ClientReportPhishingRequestCallback> callback; 51 GURL phishing_url; 52}; 53 54ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) 55 : is_phishing(phish), 56 timestamp(time) {} 57 58ClientSideDetectionService::ClientSideDetectionService( 59 const FilePath& model_path, 60 net::URLRequestContextGetter* request_context_getter) 61 : model_path_(model_path), 62 model_status_(UNKNOWN_STATUS), 63 model_file_(base::kInvalidPlatformFileValue), 64 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), 65 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), 66 request_context_getter_(request_context_getter) {} 67 68ClientSideDetectionService::~ClientSideDetectionService() { 69 method_factory_.RevokeAll(); 70 STLDeleteContainerPairPointers(client_phishing_reports_.begin(), 71 client_phishing_reports_.end()); 72 client_phishing_reports_.clear(); 73 STLDeleteElements(&open_callbacks_); 74 CloseModelFile(); 75} 76 77/* static */ 78ClientSideDetectionService* ClientSideDetectionService::Create( 79 const FilePath& model_path, 80 net::URLRequestContextGetter* request_context_getter) { 81 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 82 scoped_ptr<ClientSideDetectionService> service( 83 new ClientSideDetectionService(model_path, request_context_getter)); 84 if (!service->InitializePrivateNetworks()) { 85 UMA_HISTOGRAM_COUNTS("SBClientPhishing.InitPrivateNetworksFailed", 1); 86 return NULL; 87 } 88 89 // We try to open the model file right away and start fetching it if 90 // it does not already exist on disk. 91 base::FileUtilProxy::CreateOrOpenCallback* cb = 92 service.get()->callback_factory_.NewCallback( 93 &ClientSideDetectionService::OpenModelFileDone); 94 if (!base::FileUtilProxy::CreateOrOpen( 95 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 96 model_path, 97 base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ, 98 cb)) { 99 delete cb; 100 return NULL; 101 } 102 103 // Delete the previous-version model file. 104 // TODO(bryner): Remove this for M14. 105 base::FileUtilProxy::Delete( 106 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 107 model_path.DirName().AppendASCII("Safe Browsing Phishing Model"), 108 false /* not recursive */, 109 NULL /* not interested in result */); 110 return service.release(); 111} 112 113void ClientSideDetectionService::GetModelFile(OpenModelDoneCallback* callback) { 114 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 115 MessageLoop::current()->PostTask( 116 FROM_HERE, 117 method_factory_.NewRunnableMethod( 118 &ClientSideDetectionService::StartGetModelFile, callback)); 119} 120 121void ClientSideDetectionService::SendClientReportPhishingRequest( 122 ClientPhishingRequest* verdict, 123 ClientReportPhishingRequestCallback* callback) { 124 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 125 MessageLoop::current()->PostTask( 126 FROM_HERE, 127 method_factory_.NewRunnableMethod( 128 &ClientSideDetectionService::StartClientReportPhishingRequest, 129 verdict, callback)); 130} 131 132bool ClientSideDetectionService::IsPrivateIPAddress( 133 const std::string& ip_address) const { 134 net::IPAddressNumber ip_number; 135 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) { 136 DLOG(WARNING) << "Unable to parse IP address: " << ip_address; 137 // Err on the side of safety and assume this might be private. 138 return true; 139 } 140 141 for (std::vector<AddressRange>::const_iterator it = 142 private_networks_.begin(); 143 it != private_networks_.end(); ++it) { 144 if (net::IPNumberMatchesPrefix(ip_number, it->first, it->second)) { 145 return true; 146 } 147 } 148 return false; 149} 150 151void ClientSideDetectionService::OnURLFetchComplete( 152 const URLFetcher* source, 153 const GURL& url, 154 const net::URLRequestStatus& status, 155 int response_code, 156 const ResponseCookies& cookies, 157 const std::string& data) { 158 if (source == model_fetcher_.get()) { 159 HandleModelResponse(source, url, status, response_code, cookies, data); 160 } else if (client_phishing_reports_.find(source) != 161 client_phishing_reports_.end()) { 162 HandlePhishingVerdict(source, url, status, response_code, cookies, data); 163 } else { 164 NOTREACHED(); 165 } 166} 167 168void ClientSideDetectionService::SetModelStatus(ModelStatus status) { 169 DCHECK_NE(READY_STATUS, model_status_); 170 model_status_ = status; 171 if (READY_STATUS == status || ERROR_STATUS == status) { 172 for (size_t i = 0; i < open_callbacks_.size(); ++i) { 173 open_callbacks_[i]->Run(model_file_); 174 } 175 STLDeleteElements(&open_callbacks_); 176 } else { 177 NOTREACHED(); 178 } 179} 180 181void ClientSideDetectionService::OpenModelFileDone( 182 base::PlatformFileError error_code, 183 base::PassPlatformFile file, 184 bool created) { 185 DCHECK(!created); 186 if (base::PLATFORM_FILE_OK == error_code) { 187 // The model file already exists. There is no need to fetch the model. 188 model_file_ = file.ReleaseValue(); 189 SetModelStatus(READY_STATUS); 190 } else if (base::PLATFORM_FILE_ERROR_NOT_FOUND == error_code) { 191 // We need to fetch the model since it does not exist yet. 192 model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */, 193 GURL(kClientModelUrl), 194 URLFetcher::GET, 195 this)); 196 model_fetcher_->set_request_context(request_context_getter_.get()); 197 model_fetcher_->Start(); 198 } else { 199 // It is not clear what we should do in this case. For now we simply fail. 200 // Hopefully, we'll be able to read the model during the next browser 201 // restart. 202 SetModelStatus(ERROR_STATUS); 203 } 204} 205 206void ClientSideDetectionService::CreateModelFileDone( 207 base::PlatformFileError error_code, 208 base::PassPlatformFile file, 209 bool created) { 210 model_file_ = file.ReleaseValue(); 211 base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback( 212 &ClientSideDetectionService::WriteModelFileDone); 213 if (!created || 214 base::PLATFORM_FILE_OK != error_code || 215 !base::FileUtilProxy::Write( 216 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 217 model_file_, 218 0 /* offset */, tmp_model_string_->data(), tmp_model_string_->size(), 219 cb)) { 220 delete cb; 221 // An error occurred somewhere. We close the model file if necessary and 222 // then run all the pending callbacks giving them an invalid model file. 223 CloseModelFile(); 224 SetModelStatus(ERROR_STATUS); 225 } 226} 227 228void ClientSideDetectionService::WriteModelFileDone( 229 base::PlatformFileError error_code, 230 int bytes_written) { 231 if (base::PLATFORM_FILE_OK == error_code) { 232 SetModelStatus(READY_STATUS); 233 } else { 234 // TODO(noelutz): maybe we should retry writing the model since we 235 // did already fetch the model? 236 CloseModelFile(); 237 SetModelStatus(ERROR_STATUS); 238 } 239 // Delete the model string that we kept around while we were writing the 240 // string to disk - we don't need it anymore. 241 tmp_model_string_.reset(); 242} 243 244void ClientSideDetectionService::CloseModelFile() { 245 if (model_file_ != base::kInvalidPlatformFileValue) { 246 base::FileUtilProxy::Close( 247 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 248 model_file_, 249 NULL); 250 } 251 model_file_ = base::kInvalidPlatformFileValue; 252} 253 254void ClientSideDetectionService::StartGetModelFile( 255 OpenModelDoneCallback* callback) { 256 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 257 if (UNKNOWN_STATUS == model_status_) { 258 // Store the callback which will be called once we know the status of the 259 // model file. 260 open_callbacks_.push_back(callback); 261 } else { 262 // The model is either in READY or ERROR state which means we can 263 // call the callback right away. 264 callback->Run(model_file_); 265 delete callback; 266 } 267} 268 269void ClientSideDetectionService::StartClientReportPhishingRequest( 270 ClientPhishingRequest* verdict, 271 ClientReportPhishingRequestCallback* callback) { 272 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 273 scoped_ptr<ClientPhishingRequest> request(verdict); 274 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); 275 276 std::string request_data; 277 if (!request->SerializeToString(&request_data)) { 278 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); 279 VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; 280 cb->Run(GURL(request->url()), false); 281 return; 282 } 283 284 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, 285 GURL(kClientReportPhishingUrl), 286 URLFetcher::POST, 287 this); 288 289 // Remember which callback and URL correspond to the current fetcher object. 290 ClientReportInfo* info = new ClientReportInfo; 291 info->callback.swap(cb); // takes ownership of the callback. 292 info->phishing_url = GURL(request->url()); 293 client_phishing_reports_[fetcher] = info; 294 295 fetcher->set_load_flags(net::LOAD_DISABLE_CACHE); 296 fetcher->set_request_context(request_context_getter_.get()); 297 fetcher->set_upload_data("application/octet-stream", request_data); 298 fetcher->Start(); 299 300 // Record that we made a request 301 phishing_report_times_.push(base::Time::Now()); 302} 303 304void ClientSideDetectionService::HandleModelResponse( 305 const URLFetcher* source, 306 const GURL& url, 307 const net::URLRequestStatus& status, 308 int response_code, 309 const ResponseCookies& cookies, 310 const std::string& data) { 311 if (status.is_success() && RC_REQUEST_OK == response_code) { 312 // Copy the model because it has to be accessible after this function 313 // returns. Once we have written the model to a file we will delete the 314 // temporary model string. TODO(noelutz): don't store the model to disk if 315 // it's invalid. 316 tmp_model_string_.reset(new std::string(data)); 317 base::FileUtilProxy::CreateOrOpenCallback* cb = 318 callback_factory_.NewCallback( 319 &ClientSideDetectionService::CreateModelFileDone); 320 if (!base::FileUtilProxy::CreateOrOpen( 321 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), 322 model_path_, 323 base::PLATFORM_FILE_CREATE_ALWAYS | 324 base::PLATFORM_FILE_WRITE | 325 base::PLATFORM_FILE_READ, 326 cb)) { 327 delete cb; 328 SetModelStatus(ERROR_STATUS); 329 } 330 } else { 331 SetModelStatus(ERROR_STATUS); 332 } 333} 334 335void ClientSideDetectionService::HandlePhishingVerdict( 336 const URLFetcher* source, 337 const GURL& url, 338 const net::URLRequestStatus& status, 339 int response_code, 340 const ResponseCookies& cookies, 341 const std::string& data) { 342 ClientPhishingResponse response; 343 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); 344 if (status.is_success() && RC_REQUEST_OK == response_code && 345 response.ParseFromString(data)) { 346 // Cache response, possibly flushing an old one. 347 cache_[info->phishing_url] = 348 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); 349 info->callback->Run(info->phishing_url, response.phishy()); 350 } else { 351 DLOG(ERROR) << "Unable to get the server verdict for URL: " 352 << info->phishing_url << " status: " << status.status() << " " 353 << "response_code:" << response_code; 354 info->callback->Run(info->phishing_url, false); 355 } 356 client_phishing_reports_.erase(source); 357 delete source; 358} 359 360bool ClientSideDetectionService::IsInCache(const GURL& url) { 361 UpdateCache(); 362 363 return cache_.find(url) != cache_.end(); 364} 365 366bool ClientSideDetectionService::GetValidCachedResult(const GURL& url, 367 bool* is_phishing) { 368 UpdateCache(); 369 370 PhishingCache::iterator it = cache_.find(url); 371 if (it == cache_.end()) { 372 return false; 373 } 374 375 // We still need to check if the result is valid. 376 const CacheState& cache_state = *it->second; 377 if (cache_state.is_phishing ? 378 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval : 379 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) { 380 *is_phishing = cache_state.is_phishing; 381 return true; 382 } 383 return false; 384} 385 386void ClientSideDetectionService::UpdateCache() { 387 // Since we limit the number of requests but allow pass-through for cache 388 // refreshes, we don't want to remove elements from the cache if they 389 // could be used for this purpose even if we will not use the entry to 390 // satisfy the request from the cache. 391 base::TimeDelta positive_cache_interval = 392 std::max(kPositiveCacheInterval, kReportsInterval); 393 base::TimeDelta negative_cache_interval = 394 std::max(kNegativeCacheInterval, kReportsInterval); 395 396 // Remove elements from the cache that will no longer be used. 397 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) { 398 const CacheState& cache_state = *it->second; 399 if (cache_state.is_phishing ? 400 cache_state.timestamp > base::Time::Now() - positive_cache_interval : 401 cache_state.timestamp > base::Time::Now() - negative_cache_interval) { 402 ++it; 403 } else { 404 cache_.erase(it++); 405 } 406 } 407} 408 409bool ClientSideDetectionService::OverReportLimit() { 410 return GetNumReports() > kMaxReportsPerInterval; 411} 412 413int ClientSideDetectionService::GetNumReports() { 414 base::Time cutoff = base::Time::Now() - kReportsInterval; 415 416 // Erase items older than cutoff because we will never care about them again. 417 while (!phishing_report_times_.empty() && 418 phishing_report_times_.front() < cutoff) { 419 phishing_report_times_.pop(); 420 } 421 422 // Return the number of elements that are above the cutoff. 423 return phishing_report_times_.size(); 424} 425 426bool ClientSideDetectionService::InitializePrivateNetworks() { 427 static const char* const kPrivateNetworks[] = { 428 "10.0.0.0/8", 429 "127.0.0.0/8", 430 "172.16.0.0/12", 431 "192.168.0.0/16", 432 // IPv6 address ranges 433 "fc00::/7", 434 "fec0::/10", 435 "::1/128", 436 }; 437 438 for (size_t i = 0; i < arraysize(kPrivateNetworks); ++i) { 439 net::IPAddressNumber ip_number; 440 size_t prefix_length; 441 if (net::ParseCIDRBlock(kPrivateNetworks[i], &ip_number, &prefix_length)) { 442 private_networks_.push_back(std::make_pair(ip_number, prefix_length)); 443 } else { 444 DLOG(FATAL) << "Unable to parse IP address range: " 445 << kPrivateNetworks[i]; 446 return false; 447 } 448 } 449 return true; 450} 451 452} // namespace safe_browsing 453