1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/client_side_detection_service.h"
6
7#include "base/command_line.h"
8#include "base/file_path.h"
9#include "base/file_util_proxy.h"
10#include "base/logging.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/platform_file.h"
15#include "base/stl_util-inl.h"
16#include "base/task.h"
17#include "base/time.h"
18#include "chrome/common/net/http_return.h"
19#include "chrome/common/net/url_fetcher.h"
20#include "chrome/common/safe_browsing/csd.pb.h"
21#include "content/browser/browser_thread.h"
22#include "googleurl/src/gurl.h"
23#include "net/base/load_flags.h"
24#include "net/url_request/url_request_context_getter.h"
25#include "net/url_request/url_request_status.h"
26
27namespace safe_browsing {
28
29const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
30
31const base::TimeDelta ClientSideDetectionService::kReportsInterval =
32    base::TimeDelta::FromDays(1);
33const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
34    base::TimeDelta::FromDays(1);
35const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
36    base::TimeDelta::FromMinutes(30);
37
38const char ClientSideDetectionService::kClientReportPhishingUrl[] =
39    "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
40// Note: when updatng the model version, don't forget to change the filename
41// in chrome/common/chrome_constants.cc as well, or else existing users won't
42// download the new model.
43//
44// TODO(bryner): add version metadata so that clients can download new models
45// without needing a new model filename.
46const char ClientSideDetectionService::kClientModelUrl[] =
47    "https://ssl.gstatic.com/safebrowsing/csd/client_model_v1.pb";
48
49struct ClientSideDetectionService::ClientReportInfo {
50  scoped_ptr<ClientReportPhishingRequestCallback> callback;
51  GURL phishing_url;
52};
53
54ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
55    : is_phishing(phish),
56      timestamp(time) {}
57
58ClientSideDetectionService::ClientSideDetectionService(
59    const FilePath& model_path,
60    net::URLRequestContextGetter* request_context_getter)
61    : model_path_(model_path),
62      model_status_(UNKNOWN_STATUS),
63      model_file_(base::kInvalidPlatformFileValue),
64      ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
65      ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)),
66      request_context_getter_(request_context_getter) {}
67
68ClientSideDetectionService::~ClientSideDetectionService() {
69  method_factory_.RevokeAll();
70  STLDeleteContainerPairPointers(client_phishing_reports_.begin(),
71                                 client_phishing_reports_.end());
72  client_phishing_reports_.clear();
73  STLDeleteElements(&open_callbacks_);
74  CloseModelFile();
75}
76
77/* static */
78ClientSideDetectionService* ClientSideDetectionService::Create(
79    const FilePath& model_path,
80    net::URLRequestContextGetter* request_context_getter) {
81  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
82  scoped_ptr<ClientSideDetectionService> service(
83      new ClientSideDetectionService(model_path, request_context_getter));
84  if (!service->InitializePrivateNetworks()) {
85    UMA_HISTOGRAM_COUNTS("SBClientPhishing.InitPrivateNetworksFailed", 1);
86    return NULL;
87  }
88
89  // We try to open the model file right away and start fetching it if
90  // it does not already exist on disk.
91  base::FileUtilProxy::CreateOrOpenCallback* cb =
92      service.get()->callback_factory_.NewCallback(
93          &ClientSideDetectionService::OpenModelFileDone);
94  if (!base::FileUtilProxy::CreateOrOpen(
95          BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
96          model_path,
97          base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ,
98          cb)) {
99    delete cb;
100    return NULL;
101  }
102
103  // Delete the previous-version model file.
104  // TODO(bryner): Remove this for M14.
105  base::FileUtilProxy::Delete(
106      BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
107      model_path.DirName().AppendASCII("Safe Browsing Phishing Model"),
108      false /* not recursive */,
109      NULL /* not interested in result */);
110  return service.release();
111}
112
113void ClientSideDetectionService::GetModelFile(OpenModelDoneCallback* callback) {
114  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
115  MessageLoop::current()->PostTask(
116      FROM_HERE,
117      method_factory_.NewRunnableMethod(
118          &ClientSideDetectionService::StartGetModelFile, callback));
119}
120
121void ClientSideDetectionService::SendClientReportPhishingRequest(
122    ClientPhishingRequest* verdict,
123    ClientReportPhishingRequestCallback* callback) {
124  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
125  MessageLoop::current()->PostTask(
126      FROM_HERE,
127      method_factory_.NewRunnableMethod(
128          &ClientSideDetectionService::StartClientReportPhishingRequest,
129          verdict, callback));
130}
131
132bool ClientSideDetectionService::IsPrivateIPAddress(
133    const std::string& ip_address) const {
134  net::IPAddressNumber ip_number;
135  if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
136    DLOG(WARNING) << "Unable to parse IP address: " << ip_address;
137    // Err on the side of safety and assume this might be private.
138    return true;
139  }
140
141  for (std::vector<AddressRange>::const_iterator it =
142           private_networks_.begin();
143       it != private_networks_.end(); ++it) {
144    if (net::IPNumberMatchesPrefix(ip_number, it->first, it->second)) {
145      return true;
146    }
147  }
148  return false;
149}
150
151void ClientSideDetectionService::OnURLFetchComplete(
152    const URLFetcher* source,
153    const GURL& url,
154    const net::URLRequestStatus& status,
155    int response_code,
156    const ResponseCookies& cookies,
157    const std::string& data) {
158  if (source == model_fetcher_.get()) {
159    HandleModelResponse(source, url, status, response_code, cookies, data);
160  } else if (client_phishing_reports_.find(source) !=
161             client_phishing_reports_.end()) {
162    HandlePhishingVerdict(source, url, status, response_code, cookies, data);
163  } else {
164    NOTREACHED();
165  }
166}
167
168void ClientSideDetectionService::SetModelStatus(ModelStatus status) {
169  DCHECK_NE(READY_STATUS, model_status_);
170  model_status_ = status;
171  if (READY_STATUS == status || ERROR_STATUS == status) {
172    for (size_t i = 0; i < open_callbacks_.size(); ++i) {
173      open_callbacks_[i]->Run(model_file_);
174    }
175    STLDeleteElements(&open_callbacks_);
176  } else {
177    NOTREACHED();
178  }
179}
180
181void ClientSideDetectionService::OpenModelFileDone(
182    base::PlatformFileError error_code,
183    base::PassPlatformFile file,
184    bool created) {
185  DCHECK(!created);
186  if (base::PLATFORM_FILE_OK == error_code) {
187    // The model file already exists.  There is no need to fetch the model.
188    model_file_ = file.ReleaseValue();
189    SetModelStatus(READY_STATUS);
190  } else if (base::PLATFORM_FILE_ERROR_NOT_FOUND == error_code) {
191    // We need to fetch the model since it does not exist yet.
192    model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */,
193                                            GURL(kClientModelUrl),
194                                            URLFetcher::GET,
195                                            this));
196    model_fetcher_->set_request_context(request_context_getter_.get());
197    model_fetcher_->Start();
198  } else {
199    // It is not clear what we should do in this case.  For now we simply fail.
200    // Hopefully, we'll be able to read the model during the next browser
201    // restart.
202    SetModelStatus(ERROR_STATUS);
203  }
204}
205
206void ClientSideDetectionService::CreateModelFileDone(
207    base::PlatformFileError error_code,
208    base::PassPlatformFile file,
209    bool created) {
210  model_file_ = file.ReleaseValue();
211  base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback(
212      &ClientSideDetectionService::WriteModelFileDone);
213  if (!created ||
214      base::PLATFORM_FILE_OK != error_code ||
215      !base::FileUtilProxy::Write(
216          BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
217          model_file_,
218          0 /* offset */, tmp_model_string_->data(), tmp_model_string_->size(),
219          cb)) {
220    delete cb;
221    // An error occurred somewhere.  We close the model file if necessary and
222    // then run all the pending callbacks giving them an invalid model file.
223    CloseModelFile();
224    SetModelStatus(ERROR_STATUS);
225  }
226}
227
228void ClientSideDetectionService::WriteModelFileDone(
229    base::PlatformFileError error_code,
230    int bytes_written) {
231  if (base::PLATFORM_FILE_OK == error_code) {
232    SetModelStatus(READY_STATUS);
233  } else {
234    // TODO(noelutz): maybe we should retry writing the model since we
235    // did already fetch the model?
236    CloseModelFile();
237    SetModelStatus(ERROR_STATUS);
238  }
239  // Delete the model string that we kept around while we were writing the
240  // string to disk - we don't need it anymore.
241  tmp_model_string_.reset();
242}
243
244void ClientSideDetectionService::CloseModelFile() {
245  if (model_file_ != base::kInvalidPlatformFileValue) {
246    base::FileUtilProxy::Close(
247        BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
248        model_file_,
249        NULL);
250  }
251  model_file_ = base::kInvalidPlatformFileValue;
252}
253
254void ClientSideDetectionService::StartGetModelFile(
255    OpenModelDoneCallback* callback) {
256  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
257  if (UNKNOWN_STATUS == model_status_) {
258    // Store the callback which will be called once we know the status of the
259    // model file.
260    open_callbacks_.push_back(callback);
261  } else {
262    // The model is either in READY or ERROR state which means we can
263    // call the callback right away.
264    callback->Run(model_file_);
265    delete callback;
266  }
267}
268
269void ClientSideDetectionService::StartClientReportPhishingRequest(
270    ClientPhishingRequest* verdict,
271    ClientReportPhishingRequestCallback* callback) {
272  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
273  scoped_ptr<ClientPhishingRequest> request(verdict);
274  scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
275
276  std::string request_data;
277  if (!request->SerializeToString(&request_data)) {
278    UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
279    VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
280    cb->Run(GURL(request->url()), false);
281    return;
282  }
283
284  URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */,
285                                           GURL(kClientReportPhishingUrl),
286                                           URLFetcher::POST,
287                                           this);
288
289  // Remember which callback and URL correspond to the current fetcher object.
290  ClientReportInfo* info = new ClientReportInfo;
291  info->callback.swap(cb);  // takes ownership of the callback.
292  info->phishing_url = GURL(request->url());
293  client_phishing_reports_[fetcher] = info;
294
295  fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
296  fetcher->set_request_context(request_context_getter_.get());
297  fetcher->set_upload_data("application/octet-stream", request_data);
298  fetcher->Start();
299
300  // Record that we made a request
301  phishing_report_times_.push(base::Time::Now());
302}
303
304void ClientSideDetectionService::HandleModelResponse(
305    const URLFetcher* source,
306    const GURL& url,
307    const net::URLRequestStatus& status,
308    int response_code,
309    const ResponseCookies& cookies,
310    const std::string& data) {
311  if (status.is_success() && RC_REQUEST_OK == response_code) {
312    // Copy the model because it has to be accessible after this function
313    // returns.  Once we have written the model to a file we will delete the
314    // temporary model string. TODO(noelutz): don't store the model to disk if
315    // it's invalid.
316    tmp_model_string_.reset(new std::string(data));
317    base::FileUtilProxy::CreateOrOpenCallback* cb =
318        callback_factory_.NewCallback(
319            &ClientSideDetectionService::CreateModelFileDone);
320    if (!base::FileUtilProxy::CreateOrOpen(
321            BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
322            model_path_,
323            base::PLATFORM_FILE_CREATE_ALWAYS |
324            base::PLATFORM_FILE_WRITE |
325            base::PLATFORM_FILE_READ,
326            cb)) {
327      delete cb;
328      SetModelStatus(ERROR_STATUS);
329    }
330  } else {
331    SetModelStatus(ERROR_STATUS);
332  }
333}
334
335void ClientSideDetectionService::HandlePhishingVerdict(
336    const URLFetcher* source,
337    const GURL& url,
338    const net::URLRequestStatus& status,
339    int response_code,
340    const ResponseCookies& cookies,
341    const std::string& data) {
342  ClientPhishingResponse response;
343  scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
344  if (status.is_success() && RC_REQUEST_OK == response_code &&
345      response.ParseFromString(data)) {
346    // Cache response, possibly flushing an old one.
347    cache_[info->phishing_url] =
348        make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
349    info->callback->Run(info->phishing_url, response.phishy());
350  } else {
351    DLOG(ERROR) << "Unable to get the server verdict for URL: "
352                << info->phishing_url << " status: " << status.status() << " "
353                << "response_code:" << response_code;
354    info->callback->Run(info->phishing_url, false);
355  }
356  client_phishing_reports_.erase(source);
357  delete source;
358}
359
360bool ClientSideDetectionService::IsInCache(const GURL& url) {
361  UpdateCache();
362
363  return cache_.find(url) != cache_.end();
364}
365
366bool ClientSideDetectionService::GetValidCachedResult(const GURL& url,
367                                                      bool* is_phishing) {
368  UpdateCache();
369
370  PhishingCache::iterator it = cache_.find(url);
371  if (it == cache_.end()) {
372    return false;
373  }
374
375  // We still need to check if the result is valid.
376  const CacheState& cache_state = *it->second;
377  if (cache_state.is_phishing ?
378      cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
379      cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
380    *is_phishing = cache_state.is_phishing;
381    return true;
382  }
383  return false;
384}
385
386void ClientSideDetectionService::UpdateCache() {
387  // Since we limit the number of requests but allow pass-through for cache
388  // refreshes, we don't want to remove elements from the cache if they
389  // could be used for this purpose even if we will not use the entry to
390  // satisfy the request from the cache.
391  base::TimeDelta positive_cache_interval =
392      std::max(kPositiveCacheInterval, kReportsInterval);
393  base::TimeDelta negative_cache_interval =
394      std::max(kNegativeCacheInterval, kReportsInterval);
395
396  // Remove elements from the cache that will no longer be used.
397  for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
398    const CacheState& cache_state = *it->second;
399    if (cache_state.is_phishing ?
400        cache_state.timestamp > base::Time::Now() - positive_cache_interval :
401        cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
402      ++it;
403    } else {
404      cache_.erase(it++);
405    }
406  }
407}
408
409bool ClientSideDetectionService::OverReportLimit() {
410  return GetNumReports() > kMaxReportsPerInterval;
411}
412
413int ClientSideDetectionService::GetNumReports() {
414  base::Time cutoff = base::Time::Now() - kReportsInterval;
415
416  // Erase items older than cutoff because we will never care about them again.
417  while (!phishing_report_times_.empty() &&
418         phishing_report_times_.front() < cutoff) {
419    phishing_report_times_.pop();
420  }
421
422  // Return the number of elements that are above the cutoff.
423  return phishing_report_times_.size();
424}
425
426bool ClientSideDetectionService::InitializePrivateNetworks() {
427  static const char* const kPrivateNetworks[] = {
428    "10.0.0.0/8",
429    "127.0.0.0/8",
430    "172.16.0.0/12",
431    "192.168.0.0/16",
432    // IPv6 address ranges
433    "fc00::/7",
434    "fec0::/10",
435    "::1/128",
436  };
437
438  for (size_t i = 0; i < arraysize(kPrivateNetworks); ++i) {
439    net::IPAddressNumber ip_number;
440    size_t prefix_length;
441    if (net::ParseCIDRBlock(kPrivateNetworks[i], &ip_number, &prefix_length)) {
442      private_networks_.push_back(std::make_pair(ip_number, prefix_length));
443    } else {
444      DLOG(FATAL) << "Unable to parse IP address range: "
445                  << kPrivateNetworks[i];
446      return false;
447    }
448  }
449  return true;
450}
451
452}  // namespace safe_browsing
453