client_side_detection_service.cc revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/client_side_detection_service.h"
6
7#include "base/command_line.h"
8#include "base/file_path.h"
9#include "base/file_util_proxy.h"
10#include "base/logging.h"
11#include "base/message_loop.h"
12#include "base/metrics/histogram.h"
13#include "base/platform_file.h"
14#include "base/scoped_ptr.h"
15#include "base/stl_util-inl.h"
16#include "base/task.h"
17#include "base/time.h"
18#include "chrome/browser/safe_browsing/csd.pb.h"
19#include "chrome/common/net/http_return.h"
20#include "chrome/common/net/url_fetcher.h"
21#include "chrome/common/net/url_request_context_getter.h"
22#include "content/browser/browser_thread.h"
23#include "googleurl/src/gurl.h"
24#include "net/base/load_flags.h"
25#include "net/url_request/url_request_status.h"
26
27namespace safe_browsing {
28
29const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
30
31const base::TimeDelta ClientSideDetectionService::kReportsInterval =
32    base::TimeDelta::FromDays(1);
33const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
34    base::TimeDelta::FromDays(1);
35const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
36    base::TimeDelta::FromMinutes(30);
37
38const char ClientSideDetectionService::kClientReportPhishingUrl[] =
39    "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
40const char ClientSideDetectionService::kClientModelUrl[] =
41    "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb";
42
43struct ClientSideDetectionService::ClientReportInfo {
44  scoped_ptr<ClientReportPhishingRequestCallback> callback;
45  GURL phishing_url;
46};
47
48ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
49    : is_phishing(phish),
50      timestamp(time) {}
51
52ClientSideDetectionService::ClientSideDetectionService(
53    const FilePath& model_path,
54    URLRequestContextGetter* request_context_getter)
55    : model_path_(model_path),
56      model_status_(UNKNOWN_STATUS),
57      model_file_(base::kInvalidPlatformFileValue),
58      ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
59      ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)),
60      request_context_getter_(request_context_getter) {}
61
62ClientSideDetectionService::~ClientSideDetectionService() {
63  method_factory_.RevokeAll();
64  STLDeleteContainerPairPointers(client_phishing_reports_.begin(),
65                                 client_phishing_reports_.end());
66  client_phishing_reports_.clear();
67  STLDeleteElements(&open_callbacks_);
68  CloseModelFile();
69}
70
71/* static */
72ClientSideDetectionService* ClientSideDetectionService::Create(
73    const FilePath& model_path,
74    URLRequestContextGetter* request_context_getter) {
75  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
76  scoped_ptr<ClientSideDetectionService> service(
77      new ClientSideDetectionService(model_path, request_context_getter));
78  if (!service->InitializePrivateNetworks()) {
79    UMA_HISTOGRAM_COUNTS("SBClientPhishing.InitPrivateNetworksFailed", 1);
80    return NULL;
81  }
82
83  // We try to open the model file right away and start fetching it if
84  // it does not already exist on disk.
85  base::FileUtilProxy::CreateOrOpenCallback* cb =
86      service.get()->callback_factory_.NewCallback(
87          &ClientSideDetectionService::OpenModelFileDone);
88  if (!base::FileUtilProxy::CreateOrOpen(
89          BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
90          model_path,
91          base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ,
92          cb)) {
93    delete cb;
94    return NULL;
95  }
96  return service.release();
97}
98
99void ClientSideDetectionService::GetModelFile(OpenModelDoneCallback* callback) {
100  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
101  MessageLoop::current()->PostTask(
102      FROM_HERE,
103      method_factory_.NewRunnableMethod(
104          &ClientSideDetectionService::StartGetModelFile, callback));
105}
106
107void ClientSideDetectionService::SendClientReportPhishingRequest(
108    const GURL& phishing_url,
109    double score,
110    ClientReportPhishingRequestCallback* callback) {
111  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
112  MessageLoop::current()->PostTask(
113      FROM_HERE,
114      method_factory_.NewRunnableMethod(
115          &ClientSideDetectionService::StartClientReportPhishingRequest,
116          phishing_url, score, callback));
117}
118
119bool ClientSideDetectionService::IsPrivateIPAddress(
120    const std::string& ip_address) const {
121  net::IPAddressNumber ip_number;
122  if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
123    DLOG(WARNING) << "Unable to parse IP address: " << ip_address;
124    // Err on the side of safety and assume this might be private.
125    return true;
126  }
127
128  for (std::vector<AddressRange>::const_iterator it =
129           private_networks_.begin();
130       it != private_networks_.end(); ++it) {
131    if (net::IPNumberMatchesPrefix(ip_number, it->first, it->second)) {
132      return true;
133    }
134  }
135  return false;
136}
137
138void ClientSideDetectionService::OnURLFetchComplete(
139    const URLFetcher* source,
140    const GURL& url,
141    const net::URLRequestStatus& status,
142    int response_code,
143    const ResponseCookies& cookies,
144    const std::string& data) {
145  if (source == model_fetcher_.get()) {
146    HandleModelResponse(source, url, status, response_code, cookies, data);
147  } else if (client_phishing_reports_.find(source) !=
148             client_phishing_reports_.end()) {
149    HandlePhishingVerdict(source, url, status, response_code, cookies, data);
150  } else {
151    NOTREACHED();
152  }
153}
154
155void ClientSideDetectionService::SetModelStatus(ModelStatus status) {
156  DCHECK_NE(READY_STATUS, model_status_);
157  model_status_ = status;
158  if (READY_STATUS == status || ERROR_STATUS == status) {
159    for (size_t i = 0; i < open_callbacks_.size(); ++i) {
160      open_callbacks_[i]->Run(model_file_);
161    }
162    STLDeleteElements(&open_callbacks_);
163  } else {
164    NOTREACHED();
165  }
166}
167
168void ClientSideDetectionService::OpenModelFileDone(
169    base::PlatformFileError error_code,
170    base::PassPlatformFile file,
171    bool created) {
172  DCHECK(!created);
173  if (base::PLATFORM_FILE_OK == error_code) {
174    // The model file already exists.  There is no need to fetch the model.
175    model_file_ = file.ReleaseValue();
176    SetModelStatus(READY_STATUS);
177  } else if (base::PLATFORM_FILE_ERROR_NOT_FOUND == error_code) {
178    // We need to fetch the model since it does not exist yet.
179    model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */,
180                                            GURL(kClientModelUrl),
181                                            URLFetcher::GET,
182                                            this));
183    model_fetcher_->set_request_context(request_context_getter_.get());
184    model_fetcher_->Start();
185  } else {
186    // It is not clear what we should do in this case.  For now we simply fail.
187    // Hopefully, we'll be able to read the model during the next browser
188    // restart.
189    SetModelStatus(ERROR_STATUS);
190  }
191}
192
193void ClientSideDetectionService::CreateModelFileDone(
194    base::PlatformFileError error_code,
195    base::PassPlatformFile file,
196    bool created) {
197  model_file_ = file.ReleaseValue();
198  base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback(
199      &ClientSideDetectionService::WriteModelFileDone);
200  if (!created ||
201      base::PLATFORM_FILE_OK != error_code ||
202      !base::FileUtilProxy::Write(
203          BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
204          model_file_,
205          0 /* offset */, tmp_model_string_->data(), tmp_model_string_->size(),
206          cb)) {
207    delete cb;
208    // An error occurred somewhere.  We close the model file if necessary and
209    // then run all the pending callbacks giving them an invalid model file.
210    CloseModelFile();
211    SetModelStatus(ERROR_STATUS);
212  }
213}
214
215void ClientSideDetectionService::WriteModelFileDone(
216    base::PlatformFileError error_code,
217    int bytes_written) {
218  if (base::PLATFORM_FILE_OK == error_code) {
219    SetModelStatus(READY_STATUS);
220  } else {
221    // TODO(noelutz): maybe we should retry writing the model since we
222    // did already fetch the model?
223    CloseModelFile();
224    SetModelStatus(ERROR_STATUS);
225  }
226  // Delete the model string that we kept around while we were writing the
227  // string to disk - we don't need it anymore.
228  tmp_model_string_.reset();
229}
230
231void ClientSideDetectionService::CloseModelFile() {
232  if (model_file_ != base::kInvalidPlatformFileValue) {
233    base::FileUtilProxy::Close(
234        BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
235        model_file_,
236        NULL);
237  }
238  model_file_ = base::kInvalidPlatformFileValue;
239}
240
241void ClientSideDetectionService::StartGetModelFile(
242    OpenModelDoneCallback* callback) {
243  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
244  if (UNKNOWN_STATUS == model_status_) {
245    // Store the callback which will be called once we know the status of the
246    // model file.
247    open_callbacks_.push_back(callback);
248  } else {
249    // The model is either in READY or ERROR state which means we can
250    // call the callback right away.
251    callback->Run(model_file_);
252    delete callback;
253  }
254}
255
256void ClientSideDetectionService::StartClientReportPhishingRequest(
257    const GURL& phishing_url,
258    double score,
259    ClientReportPhishingRequestCallback* callback) {
260  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
261  scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
262
263  bool is_phishing;
264  if (GetCachedResult(phishing_url, &is_phishing)) {
265    VLOG(1) << "Satisfying request for " << phishing_url << " from cache";
266    UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
267    cb->Run(phishing_url, is_phishing);
268    return;
269  }
270
271  // We limit the number of distinct pings to kMaxReports, but we don't count
272  // urls already in the cache against this number. We don't want to start
273  // classifying too many pages as phishing, but for those that we already
274  // think are phishing we want to give ourselves a chance to fix false
275  // positives.
276  if (cache_.find(phishing_url) != cache_.end()) {
277    VLOG(1) << "Refreshing cache for " << phishing_url;
278    UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1);
279  } else if (GetNumReports() > kMaxReportsPerInterval) {
280    VLOG(1) << "Too many report phishing requests sent in the last "
281            << kReportsInterval.InHours() << " hours, not checking "
282            << phishing_url;
283    UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1);
284    cb->Run(phishing_url, false);
285    return;
286  }
287
288  ClientPhishingRequest request;
289  request.set_url(phishing_url.spec());
290  request.set_client_score(static_cast<float>(score));
291  std::string request_data;
292  if (!request.SerializeToString(&request_data)) {
293    UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
294    VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
295    cb->Run(phishing_url, false);
296    return;
297  }
298
299  URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */,
300                                           GURL(kClientReportPhishingUrl),
301                                           URLFetcher::POST,
302                                           this);
303
304  // Remember which callback and URL correspond to the current fetcher object.
305  ClientReportInfo* info = new ClientReportInfo;
306  info->callback.swap(cb);  // takes ownership of the callback.
307  info->phishing_url = phishing_url;
308  client_phishing_reports_[fetcher] = info;
309
310  fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
311  fetcher->set_request_context(request_context_getter_.get());
312  fetcher->set_upload_data("application/octet-stream", request_data);
313  fetcher->Start();
314
315  // Record that we made a request
316  phishing_report_times_.push(base::Time::Now());
317}
318
319void ClientSideDetectionService::HandleModelResponse(
320    const URLFetcher* source,
321    const GURL& url,
322    const net::URLRequestStatus& status,
323    int response_code,
324    const ResponseCookies& cookies,
325    const std::string& data) {
326  if (status.is_success() && RC_REQUEST_OK == response_code) {
327    // Copy the model because it has to be accessible after this function
328    // returns.  Once we have written the model to a file we will delete the
329    // temporary model string. TODO(noelutz): don't store the model to disk if
330    // it's invalid.
331    tmp_model_string_.reset(new std::string(data));
332    base::FileUtilProxy::CreateOrOpenCallback* cb =
333        callback_factory_.NewCallback(
334            &ClientSideDetectionService::CreateModelFileDone);
335    if (!base::FileUtilProxy::CreateOrOpen(
336            BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
337            model_path_,
338            base::PLATFORM_FILE_CREATE_ALWAYS |
339            base::PLATFORM_FILE_WRITE |
340            base::PLATFORM_FILE_READ,
341            cb)) {
342      delete cb;
343      SetModelStatus(ERROR_STATUS);
344    }
345  } else {
346    SetModelStatus(ERROR_STATUS);
347  }
348}
349
350void ClientSideDetectionService::HandlePhishingVerdict(
351    const URLFetcher* source,
352    const GURL& url,
353    const net::URLRequestStatus& status,
354    int response_code,
355    const ResponseCookies& cookies,
356    const std::string& data) {
357  ClientPhishingResponse response;
358  scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
359  if (status.is_success() && RC_REQUEST_OK == response_code &&
360      response.ParseFromString(data)) {
361    // Cache response, possibly flushing an old one.
362    cache_[info->phishing_url] =
363        make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
364    info->callback->Run(info->phishing_url, response.phishy());
365  } else {
366    DLOG(ERROR) << "Unable to get the server verdict for URL: "
367                << info->phishing_url << " status: " << status.status() << " "
368                << "response_code:" << response_code;
369    info->callback->Run(info->phishing_url, false);
370  }
371  client_phishing_reports_.erase(source);
372  delete source;
373}
374
375bool ClientSideDetectionService::GetCachedResult(const GURL& url,
376                                                 bool* is_phishing) {
377  UpdateCache();
378
379  PhishingCache::iterator it = cache_.find(url);
380  if (it == cache_.end()) {
381    return false;
382  }
383
384  // We still need to check if the result is valid.
385  const CacheState& cache_state = *it->second;
386  if (cache_state.is_phishing ?
387      cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
388      cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
389    *is_phishing = cache_state.is_phishing;
390    return true;
391  }
392  return false;
393}
394
395void ClientSideDetectionService::UpdateCache() {
396  // Since we limit the number of requests but allow pass-through for cache
397  // refreshes, we don't want to remove elements from the cache if they
398  // could be used for this purpose even if we will not use the entry to
399  // satisfy the request from the cache.
400  base::TimeDelta positive_cache_interval =
401      std::max(kPositiveCacheInterval, kReportsInterval);
402  base::TimeDelta negative_cache_interval =
403      std::max(kNegativeCacheInterval, kReportsInterval);
404
405  // Remove elements from the cache that will no longer be used.
406  for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
407    const CacheState& cache_state = *it->second;
408    if (cache_state.is_phishing ?
409        cache_state.timestamp > base::Time::Now() - positive_cache_interval :
410        cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
411      ++it;
412    } else {
413      cache_.erase(it++);
414    }
415  }
416}
417
418int ClientSideDetectionService::GetNumReports() {
419  base::Time cutoff = base::Time::Now() - kReportsInterval;
420
421  // Erase items older than cutoff because we will never care about them again.
422  while (!phishing_report_times_.empty() &&
423         phishing_report_times_.front() < cutoff) {
424    phishing_report_times_.pop();
425  }
426
427  // Return the number of elements that are above the cutoff.
428  return phishing_report_times_.size();
429}
430
431bool ClientSideDetectionService::InitializePrivateNetworks() {
432  static const char* const kPrivateNetworks[] = {
433    "10.0.0.0/8",
434    "127.0.0.0/8",
435    "172.16.0.0/12",
436    "192.168.0.0/16",
437    // IPv6 address ranges
438    "fc00::/7",
439    "fec0::/10",
440    "::1/128",
441  };
442
443  for (size_t i = 0; i < arraysize(kPrivateNetworks); ++i) {
444    net::IPAddressNumber ip_number;
445    size_t prefix_length;
446    if (net::ParseCIDRBlock(kPrivateNetworks[i], &ip_number, &prefix_length)) {
447      private_networks_.push_back(std::make_pair(ip_number, prefix_length));
448    } else {
449      DLOG(FATAL) << "Unable to parse IP address range: "
450                  << kPrivateNetworks[i];
451      return false;
452    }
453  }
454  return true;
455}
456
457}  // namespace safe_browsing
458