1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7#include <map>
8#include <string>
9#include <vector>
10
11#include "base/memory/scoped_ptr.h"
12#include "base/message_loop/message_loop.h"
13#include "base/strings/stringprintf.h"
14#include "base/time/time.h"
15#include "chrome/browser/history/history_backend.h"
16#include "chrome/browser/history/history_service.h"
17#include "chrome/browser/history/history_service_factory.h"
18#include "chrome/browser/profiles/profile.h"
19#include "chrome/browser/safe_browsing/browser_features.h"
20#include "chrome/browser/safe_browsing/client_side_detection_host.h"
21#include "chrome/browser/safe_browsing/database_manager.h"
22#include "chrome/browser/safe_browsing/safe_browsing_service.h"
23#include "chrome/browser/safe_browsing/ui_manager.h"
24#include "chrome/common/safe_browsing/csd.pb.h"
25#include "chrome/test/base/chrome_render_view_host_test_harness.h"
26#include "chrome/test/base/testing_profile.h"
27#include "content/public/browser/navigation_controller.h"
28#include "content/public/browser/web_contents.h"
29#include "content/public/common/referrer.h"
30#include "content/public/test/test_browser_thread.h"
31#include "content/public/test/web_contents_tester.h"
32#include "testing/gmock/include/gmock/gmock.h"
33#include "testing/gtest/include/gtest/gtest.h"
34#include "ui/base/page_transition_types.h"
35#include "url/gurl.h"
36
37using content::BrowserThread;
38using content::ResourceType;
39using content::WebContentsTester;
40
41using testing::DoAll;
42using testing::Return;
43using testing::StrictMock;
44
45namespace safe_browsing {
46
47namespace {
48
49class MockSafeBrowsingDatabaseManager : public SafeBrowsingDatabaseManager {
50 public:
51  explicit MockSafeBrowsingDatabaseManager(
52      const scoped_refptr<SafeBrowsingService>& service)
53      : SafeBrowsingDatabaseManager(service) { }
54
55  MOCK_METHOD1(MatchMalwareIP, bool(const std::string& ip_address));
56
57 protected:
58  virtual ~MockSafeBrowsingDatabaseManager() {}
59
60 private:
61  DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingDatabaseManager);
62};
63
64class MockClientSideDetectionHost : public ClientSideDetectionHost {
65 public:
66  MockClientSideDetectionHost(
67      content::WebContents* tab,
68      SafeBrowsingDatabaseManager* database_manager)
69      : ClientSideDetectionHost(tab) {
70    set_safe_browsing_managers(NULL, database_manager);
71  }
72
73  virtual ~MockClientSideDetectionHost() {}
74
75  MOCK_METHOD1(IsBadIpAddress, bool(const std::string&));
76};
77}  // namespace
78
79class BrowserFeatureExtractorTest : public ChromeRenderViewHostTestHarness {
80 protected:
81  virtual void SetUp() {
82    ChromeRenderViewHostTestHarness::SetUp();
83    ASSERT_TRUE(profile()->CreateHistoryService(
84        true /* delete_file */, false /* no_db */));
85
86    db_manager_ = new StrictMock<MockSafeBrowsingDatabaseManager>(
87        SafeBrowsingService::CreateSafeBrowsingService());
88    host_.reset(new StrictMock<MockClientSideDetectionHost>(
89        web_contents(), db_manager_.get()));
90    extractor_.reset(
91        new BrowserFeatureExtractor(web_contents(), host_.get()));
92    num_pending_ = 0;
93    browse_info_.reset(new BrowseInfo);
94  }
95
96  virtual void TearDown() {
97    extractor_.reset();
98    host_.reset();
99    db_manager_ = NULL;
100    profile()->DestroyHistoryService();
101    ChromeRenderViewHostTestHarness::TearDown();
102    ASSERT_EQ(0, num_pending_);
103  }
104
105  HistoryService* history_service() {
106    return HistoryServiceFactory::GetForProfile(profile(),
107                                                Profile::EXPLICIT_ACCESS);
108  }
109
110  void SetRedirectChain(const std::vector<GURL>& redirect_chain,
111                        bool new_host) {
112    browse_info_->url_redirects = redirect_chain;
113    if (new_host) {
114      browse_info_->host_redirects = redirect_chain;
115    }
116  }
117
118  // Wrapper around NavigateAndCommit that also sets the redirect chain to
119  // a sane value.
120  void SimpleNavigateAndCommit(const GURL& url) {
121    std::vector<GURL> redirect_chain;
122    redirect_chain.push_back(url);
123    SetRedirectChain(redirect_chain, true);
124    NavigateAndCommit(url, GURL(), ui::PAGE_TRANSITION_LINK);
125  }
126
127  // This is similar to NavigateAndCommit that is in WebContentsTester, but
128  // allows us to specify the referrer and page_transition_type.
129  void NavigateAndCommit(const GURL& url,
130                         const GURL& referrer,
131                         ui::PageTransition type) {
132    web_contents()->GetController().LoadURL(
133        url, content::Referrer(referrer, blink::WebReferrerPolicyDefault),
134        type, std::string());
135
136    static int page_id = 0;
137    content::RenderFrameHost* rfh =
138        WebContentsTester::For(web_contents())->GetPendingMainFrame();
139    if (!rfh) {
140      rfh = web_contents()->GetMainFrame();
141    }
142    WebContentsTester::For(web_contents())->ProceedWithCrossSiteNavigation();
143    WebContentsTester::For(web_contents())->TestDidNavigateWithReferrer(
144        rfh, ++page_id, url,
145        content::Referrer(referrer, blink::WebReferrerPolicyDefault), type);
146  }
147
148  bool ExtractFeatures(ClientPhishingRequest* request) {
149    StartExtractFeatures(request);
150    base::MessageLoop::current()->Run();
151    EXPECT_EQ(1U, success_.count(request));
152    return success_.count(request) ? success_[request] : false;
153  }
154
155  void StartExtractFeatures(ClientPhishingRequest* request) {
156    success_.erase(request);
157    ++num_pending_;
158    extractor_->ExtractFeatures(
159        browse_info_.get(),
160        request,
161        base::Bind(&BrowserFeatureExtractorTest::ExtractFeaturesDone,
162                   base::Unretained(this)));
163  }
164
165  void GetFeatureMap(const ClientPhishingRequest& request,
166                     std::map<std::string, double>* features) {
167    for (int i = 0; i < request.non_model_feature_map_size(); ++i) {
168      const ClientPhishingRequest::Feature& feature =
169          request.non_model_feature_map(i);
170      EXPECT_EQ(0U, features->count(feature.name()));
171      (*features)[feature.name()] = feature.value();
172    }
173  }
174
175  void ExtractMalwareFeatures(ClientMalwareRequest* request) {
176    // Feature extraction takes ownership of the request object
177    // and passes it along to the done callback in the end.
178    StartExtractMalwareFeatures(request);
179    base::MessageLoopForUI::current()->Run();
180    EXPECT_EQ(1U, success_.count(request));
181    EXPECT_TRUE(success_[request]);
182  }
183
184  void StartExtractMalwareFeatures(ClientMalwareRequest* request) {
185    success_.erase(request);
186    ++num_pending_;
187    // We temporarily give up ownership of request to ExtractMalwareFeatures
188    // but we'll regain ownership of it in ExtractMalwareFeaturesDone.
189    extractor_->ExtractMalwareFeatures(
190        browse_info_.get(),
191        request,
192        base::Bind(&BrowserFeatureExtractorTest::ExtractMalwareFeaturesDone,
193                   base::Unretained(this)));
194  }
195
196  void GetMalwareUrls(
197      const ClientMalwareRequest& request,
198      std::map<std::string, std::set<std::string> >* urls) {
199    for (int i = 0; i < request.bad_ip_url_info_size(); ++i) {
200      const ClientMalwareRequest::UrlInfo& urlinfo =
201          request.bad_ip_url_info(i);
202      (*urls)[urlinfo.ip()].insert(urlinfo.url());
203    }
204  }
205
206  int num_pending_;  // Number of pending feature extractions.
207  scoped_ptr<BrowserFeatureExtractor> extractor_;
208  std::map<void*, bool> success_;
209  scoped_ptr<BrowseInfo> browse_info_;
210  scoped_ptr<StrictMock<MockClientSideDetectionHost> > host_;
211  scoped_refptr<StrictMock<MockSafeBrowsingDatabaseManager> > db_manager_;
212
213 private:
214  void ExtractFeaturesDone(bool success,
215                           scoped_ptr<ClientPhishingRequest> request) {
216    EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
217    ASSERT_EQ(0U, success_.count(request.get()));
218    // The pointer doesn't really belong to us.  It belongs to
219    // the test case which passed it to ExtractFeatures above.
220    success_[request.release()] = success;
221    if (--num_pending_ == 0) {
222      base::MessageLoop::current()->Quit();
223    }
224  }
225
226  void ExtractMalwareFeaturesDone(
227      bool success,
228      scoped_ptr<ClientMalwareRequest> request) {
229    EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
230    ASSERT_EQ(0U, success_.count(request.get()));
231    // The pointer doesn't really belong to us.  It belongs to
232    // the test case which passed it to ExtractMalwareFeatures above.
233    success_[request.release()] = success;
234    if (--num_pending_ == 0) {
235      base::MessageLoopForUI::current()->Quit();
236    }
237  }
238};
239
240TEST_F(BrowserFeatureExtractorTest, UrlNotInHistory) {
241  ClientPhishingRequest request;
242  SimpleNavigateAndCommit(GURL("http://www.google.com"));
243  request.set_url("http://www.google.com/");
244  request.set_client_score(0.5);
245  EXPECT_FALSE(ExtractFeatures(&request));
246}
247
248TEST_F(BrowserFeatureExtractorTest, RequestNotInitialized) {
249  ClientPhishingRequest request;
250  request.set_url("http://www.google.com/");
251  // Request is missing the score value.
252  SimpleNavigateAndCommit(GURL("http://www.google.com"));
253  EXPECT_FALSE(ExtractFeatures(&request));
254}
255
256TEST_F(BrowserFeatureExtractorTest, UrlInHistory) {
257  history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
258                             base::Time::Now(),
259                             history::SOURCE_BROWSED);
260  history_service()->AddPage(GURL("https://www.foo.com/gaa.html"),
261                             base::Time::Now(),
262                             history::SOURCE_BROWSED);  // same host HTTPS.
263  history_service()->AddPage(GURL("http://www.foo.com/gaa.html"),
264                             base::Time::Now(),
265                             history::SOURCE_BROWSED);  // same host HTTP.
266  history_service()->AddPage(GURL("http://bar.foo.com/gaa.html"),
267                             base::Time::Now(),
268                             history::SOURCE_BROWSED);  // different host.
269  history_service()->AddPage(GURL("http://www.foo.com/bar.html?a=b"),
270                             base::Time::Now() - base::TimeDelta::FromHours(23),
271                             NULL, 0, GURL(), history::RedirectList(),
272                             ui::PAGE_TRANSITION_LINK,
273                             history::SOURCE_BROWSED, false);
274  history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
275                             base::Time::Now() - base::TimeDelta::FromHours(25),
276                             NULL, 0, GURL(), history::RedirectList(),
277                             ui::PAGE_TRANSITION_TYPED,
278                             history::SOURCE_BROWSED, false);
279  history_service()->AddPage(GURL("https://www.foo.com/goo.html"),
280                             base::Time::Now() - base::TimeDelta::FromDays(5),
281                             NULL, 0, GURL(), history::RedirectList(),
282                             ui::PAGE_TRANSITION_TYPED,
283                             history::SOURCE_BROWSED, false);
284
285  SimpleNavigateAndCommit(GURL("http://www.foo.com/bar.html"));
286
287  ClientPhishingRequest request;
288  request.set_url("http://www.foo.com/bar.html");
289  request.set_client_score(0.5);
290  EXPECT_TRUE(ExtractFeatures(&request));
291  std::map<std::string, double> features;
292  GetFeatureMap(request, &features);
293
294  EXPECT_EQ(12U, features.size());
295  EXPECT_DOUBLE_EQ(2.0, features[features::kUrlHistoryVisitCount]);
296  EXPECT_DOUBLE_EQ(1.0,
297                   features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
298  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryTypedCount]);
299  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
300  EXPECT_DOUBLE_EQ(4.0, features[features::kHttpHostVisitCount]);
301  EXPECT_DOUBLE_EQ(2.0, features[features::kHttpsHostVisitCount]);
302  EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
303  EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpsHostVisitMoreThan24hAgo]);
304
305  request.Clear();
306  request.set_url("http://bar.foo.com/gaa.html");
307  request.set_client_score(0.5);
308  EXPECT_TRUE(ExtractFeatures(&request));
309  features.clear();
310  GetFeatureMap(request, &features);
311  // We have less features because we didn't Navigate to this page, so we don't
312  // have Referrer, IsFirstNavigation, HasSSLReferrer, etc.
313  EXPECT_EQ(7U, features.size());
314  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryVisitCount]);
315  EXPECT_DOUBLE_EQ(0.0,
316                   features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
317  EXPECT_DOUBLE_EQ(0.0, features[features::kUrlHistoryTypedCount]);
318  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
319  EXPECT_DOUBLE_EQ(1.0, features[features::kHttpHostVisitCount]);
320  EXPECT_DOUBLE_EQ(0.0, features[features::kHttpsHostVisitCount]);
321  EXPECT_DOUBLE_EQ(0.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
322  EXPECT_FALSE(features.count(features::kFirstHttpsHostVisitMoreThan24hAgo));
323}
324
325TEST_F(BrowserFeatureExtractorTest, MultipleRequestsAtOnce) {
326  history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
327                             base::Time::Now(),
328                             history::SOURCE_BROWSED);
329  SimpleNavigateAndCommit(GURL("http:/www.foo.com/bar.html"));
330  ClientPhishingRequest request;
331  request.set_url("http://www.foo.com/bar.html");
332  request.set_client_score(0.5);
333  StartExtractFeatures(&request);
334
335  SimpleNavigateAndCommit(GURL("http://www.foo.com/goo.html"));
336  ClientPhishingRequest request2;
337  request2.set_url("http://www.foo.com/goo.html");
338  request2.set_client_score(1.0);
339  StartExtractFeatures(&request2);
340
341  base::MessageLoop::current()->Run();
342  EXPECT_TRUE(success_[&request]);
343  // Success is false because the second URL is not in the history and we are
344  // not able to distinguish between a missing URL in the history and an error.
345  EXPECT_FALSE(success_[&request2]);
346}
347
348TEST_F(BrowserFeatureExtractorTest, BrowseFeatures) {
349  history_service()->AddPage(GURL("http://www.foo.com/"),
350                             base::Time::Now(),
351                             history::SOURCE_BROWSED);
352  history_service()->AddPage(GURL("http://www.foo.com/page.html"),
353                             base::Time::Now(),
354                             history::SOURCE_BROWSED);
355  history_service()->AddPage(GURL("http://www.bar.com/"),
356                             base::Time::Now(),
357                             history::SOURCE_BROWSED);
358  history_service()->AddPage(GURL("http://www.bar.com/other_page.html"),
359                             base::Time::Now(),
360                             history::SOURCE_BROWSED);
361  history_service()->AddPage(GURL("http://www.baz.com/"),
362                             base::Time::Now(),
363                             history::SOURCE_BROWSED);
364
365  ClientPhishingRequest request;
366  request.set_url("http://www.foo.com/");
367  request.set_client_score(0.5);
368  std::vector<GURL> redirect_chain;
369  redirect_chain.push_back(GURL("http://somerandomwebsite.com/"));
370  redirect_chain.push_back(GURL("http://www.foo.com/"));
371  SetRedirectChain(redirect_chain, true);
372  browse_info_->http_status_code = 200;
373  NavigateAndCommit(GURL("http://www.foo.com/"),
374                    GURL("http://google.com/"),
375                    ui::PageTransitionFromInt(
376                        ui::PAGE_TRANSITION_AUTO_BOOKMARK |
377                        ui::PAGE_TRANSITION_FORWARD_BACK));
378
379  EXPECT_TRUE(ExtractFeatures(&request));
380  std::map<std::string, double> features;
381  GetFeatureMap(request, &features);
382
383  EXPECT_EQ(1.0,
384            features[base::StringPrintf("%s=%s",
385                                        features::kReferrer,
386                                        "http://google.com/")]);
387  EXPECT_EQ(1.0,
388            features[base::StringPrintf("%s[0]=%s",
389                                        features::kRedirect,
390                                        "http://somerandomwebsite.com/")]);
391  // We shouldn't have a feature for the last redirect in the chain, since it
392  // should always be the URL that we navigated to.
393  EXPECT_EQ(0.0,
394            features[base::StringPrintf("%s[1]=%s",
395                                        features::kRedirect,
396                                        "http://foo.com/")]);
397  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
398  EXPECT_EQ(2.0, features[features::kPageTransitionType]);
399  EXPECT_EQ(1.0, features[features::kIsFirstNavigation]);
400  EXPECT_EQ(200.0, features[features::kHttpStatusCode]);
401
402  request.Clear();
403  request.set_url("http://www.foo.com/page.html");
404  request.set_client_score(0.5);
405  redirect_chain.clear();
406  redirect_chain.push_back(GURL("http://www.foo.com/redirect"));
407  redirect_chain.push_back(GURL("http://www.foo.com/second_redirect"));
408  redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
409  SetRedirectChain(redirect_chain, false);
410  browse_info_->http_status_code = 404;
411  NavigateAndCommit(GURL("http://www.foo.com/page.html"),
412                    GURL("http://www.foo.com"),
413                    ui::PageTransitionFromInt(
414                        ui::PAGE_TRANSITION_TYPED |
415                        ui::PAGE_TRANSITION_CHAIN_START |
416                        ui::PAGE_TRANSITION_CLIENT_REDIRECT));
417
418  EXPECT_TRUE(ExtractFeatures(&request));
419  features.clear();
420  GetFeatureMap(request, &features);
421
422  EXPECT_EQ(1,
423            features[base::StringPrintf("%s=%s",
424                                        features::kReferrer,
425                                        "http://www.foo.com/")]);
426  EXPECT_EQ(1.0,
427            features[base::StringPrintf("%s[0]=%s",
428                                        features::kRedirect,
429                                        "http://www.foo.com/redirect")]);
430  EXPECT_EQ(1.0,
431            features[base::StringPrintf("%s[1]=%s",
432                                        features::kRedirect,
433                                        "http://www.foo.com/second_redirect")]);
434  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
435  EXPECT_EQ(1.0, features[features::kPageTransitionType]);
436  EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
437  EXPECT_EQ(1.0,
438            features[base::StringPrintf("%s%s=%s",
439                                        features::kHostPrefix,
440                                        features::kReferrer,
441                                        "http://google.com/")]);
442  EXPECT_EQ(1.0,
443            features[base::StringPrintf("%s%s[0]=%s",
444                                        features::kHostPrefix,
445                                        features::kRedirect,
446                                        "http://somerandomwebsite.com/")]);
447  EXPECT_EQ(2.0,
448            features[base::StringPrintf("%s%s",
449                                        features::kHostPrefix,
450                                        features::kPageTransitionType)]);
451  EXPECT_EQ(1.0,
452            features[base::StringPrintf("%s%s",
453                                        features::kHostPrefix,
454                                        features::kIsFirstNavigation)]);
455  EXPECT_EQ(404.0, features[features::kHttpStatusCode]);
456
457  request.Clear();
458  request.set_url("http://www.bar.com/");
459  request.set_client_score(0.5);
460  redirect_chain.clear();
461  redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
462  redirect_chain.push_back(GURL("http://www.bar.com/"));
463  SetRedirectChain(redirect_chain, true);
464  NavigateAndCommit(GURL("http://www.bar.com/"),
465                    GURL("http://www.foo.com/page.html"),
466                    ui::PageTransitionFromInt(
467                        ui::PAGE_TRANSITION_LINK |
468                        ui::PAGE_TRANSITION_CHAIN_END |
469                        ui::PAGE_TRANSITION_CLIENT_REDIRECT));
470
471  EXPECT_TRUE(ExtractFeatures(&request));
472  features.clear();
473  GetFeatureMap(request, &features);
474
475  EXPECT_EQ(1.0,
476            features[base::StringPrintf("%s=%s",
477                                        features::kReferrer,
478                                        "http://www.foo.com/page.html")]);
479  EXPECT_EQ(1.0,
480            features[base::StringPrintf("%s[0]=%s",
481                                        features::kRedirect,
482                                        "http://www.foo.com/page.html")]);
483  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
484  EXPECT_EQ(0.0, features[features::kPageTransitionType]);
485  EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
486
487  // Should not have host features.
488  EXPECT_EQ(0U,
489            features.count(base::StringPrintf("%s%s",
490                                              features::kHostPrefix,
491                                              features::kPageTransitionType)));
492  EXPECT_EQ(0U,
493            features.count(base::StringPrintf("%s%s",
494                                              features::kHostPrefix,
495                                              features::kIsFirstNavigation)));
496
497  request.Clear();
498  request.set_url("http://www.bar.com/other_page.html");
499  request.set_client_score(0.5);
500  redirect_chain.clear();
501  redirect_chain.push_back(GURL("http://www.bar.com/other_page.html"));
502  SetRedirectChain(redirect_chain, false);
503  NavigateAndCommit(GURL("http://www.bar.com/other_page.html"),
504                    GURL("http://www.bar.com/"),
505                    ui::PAGE_TRANSITION_LINK);
506
507  EXPECT_TRUE(ExtractFeatures(&request));
508  features.clear();
509  GetFeatureMap(request, &features);
510
511  EXPECT_EQ(1.0,
512            features[base::StringPrintf("%s=%s",
513                                        features::kReferrer,
514                                        "http://www.bar.com/")]);
515  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
516  EXPECT_EQ(0.0, features[features::kPageTransitionType]);
517  EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
518  EXPECT_EQ(1.0,
519            features[base::StringPrintf("%s%s=%s",
520                                        features::kHostPrefix,
521                                        features::kReferrer,
522                                        "http://www.foo.com/page.html")]);
523  EXPECT_EQ(1.0,
524            features[base::StringPrintf("%s%s[0]=%s",
525                                        features::kHostPrefix,
526                                        features::kRedirect,
527                                        "http://www.foo.com/page.html")]);
528  EXPECT_EQ(0.0,
529            features[base::StringPrintf("%s%s",
530                                        features::kHostPrefix,
531                                        features::kPageTransitionType)]);
532  EXPECT_EQ(0.0,
533            features[base::StringPrintf("%s%s",
534                                        features::kHostPrefix,
535                                        features::kIsFirstNavigation)]);
536  request.Clear();
537  request.set_url("http://www.baz.com/");
538  request.set_client_score(0.5);
539  redirect_chain.clear();
540  redirect_chain.push_back(GURL("https://bankofamerica.com"));
541  redirect_chain.push_back(GURL("http://www.baz.com/"));
542  SetRedirectChain(redirect_chain, true);
543  NavigateAndCommit(GURL("http://www.baz.com"),
544                    GURL("https://bankofamerica.com"),
545                    ui::PAGE_TRANSITION_GENERATED);
546
547  EXPECT_TRUE(ExtractFeatures(&request));
548  features.clear();
549  GetFeatureMap(request, &features);
550
551  EXPECT_EQ(1.0,
552            features[base::StringPrintf("%s[0]=%s",
553                                        features::kRedirect,
554                                        features::kSecureRedirectValue)]);
555  EXPECT_EQ(1.0, features[features::kHasSSLReferrer]);
556  EXPECT_EQ(5.0, features[features::kPageTransitionType]);
557  // Should not have redirect or host features.
558  EXPECT_EQ(0U,
559            features.count(base::StringPrintf("%s%s",
560                                              features::kHostPrefix,
561                                              features::kPageTransitionType)));
562  EXPECT_EQ(0U,
563            features.count(base::StringPrintf("%s%s",
564                                              features::kHostPrefix,
565                                              features::kIsFirstNavigation)));
566  EXPECT_EQ(5.0, features[features::kPageTransitionType]);
567}
568
569TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) {
570  SimpleNavigateAndCommit(GURL("http://www.foo.com/malware.html"));
571  ClientPhishingRequest request;
572  request.set_url("http://www.foo.com/malware.html");
573  request.set_client_score(0.5);
574
575  browse_info_->unsafe_resource.reset(
576      new SafeBrowsingUIManager::UnsafeResource);
577  browse_info_->unsafe_resource->url = GURL("http://www.malware.com/");
578  browse_info_->unsafe_resource->original_url = GURL("http://www.good.com/");
579  browse_info_->unsafe_resource->is_subresource = true;
580  browse_info_->unsafe_resource->threat_type = SB_THREAT_TYPE_URL_MALWARE;
581
582  ExtractFeatures(&request);
583  std::map<std::string, double> features;
584  GetFeatureMap(request, &features);
585  EXPECT_TRUE(features.count(base::StringPrintf(
586      "%s%s",
587      features::kSafeBrowsingMaliciousUrl,
588      "http://www.malware.com/")));
589  EXPECT_TRUE(features.count(base::StringPrintf(
590      "%s%s",
591       features::kSafeBrowsingOriginalUrl,
592        "http://www.good.com/")));
593  EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]);
594  EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]);
595}
596
597TEST_F(BrowserFeatureExtractorTest, MalwareFeatures) {
598  ClientMalwareRequest request;
599  request.set_url("http://www.foo.com/");
600
601  std::vector<IPUrlInfo> bad_urls;
602  bad_urls.push_back(
603      IPUrlInfo("http://bad.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
604  bad_urls.push_back(
605      IPUrlInfo("http://evil.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
606  browse_info_->ips.insert(std::make_pair("193.5.163.8", bad_urls));
607  browse_info_->ips.insert(std::make_pair("92.92.92.92", bad_urls));
608  std::vector<IPUrlInfo> good_urls;
609  good_urls.push_back(
610      IPUrlInfo("http://ok.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
611  browse_info_->ips.insert(std::make_pair("23.94.78.1", good_urls));
612  EXPECT_CALL(*db_manager_, MatchMalwareIP("193.5.163.8"))
613      .WillOnce(Return(true));
614  EXPECT_CALL(*db_manager_, MatchMalwareIP("92.92.92.92"))
615      .WillOnce(Return(true));
616  EXPECT_CALL(*db_manager_, MatchMalwareIP("23.94.78.1"))
617      .WillOnce(Return(false));
618
619  ExtractMalwareFeatures(&request);
620  EXPECT_EQ(4, request.bad_ip_url_info_size());
621  std::map<std::string, std::set<std::string> > result_urls;
622  GetMalwareUrls(request, &result_urls);
623
624  EXPECT_EQ(2U, result_urls.size());
625  EXPECT_TRUE(result_urls.count("193.5.163.8"));
626  std::set<std::string> urls = result_urls["193.5.163.8"];
627  EXPECT_EQ(2U, urls.size());
628  EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
629  EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
630  EXPECT_TRUE(result_urls.count("92.92.92.92"));
631  urls = result_urls["92.92.92.92"];
632  EXPECT_EQ(2U, urls.size());
633  EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
634  EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
635}
636
637TEST_F(BrowserFeatureExtractorTest, MalwareFeatures_ExceedLimit) {
638  ClientMalwareRequest request;
639  request.set_url("http://www.foo.com/");
640
641  std::vector<IPUrlInfo> bad_urls;
642  bad_urls.push_back(
643      IPUrlInfo("http://bad.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
644  std::vector<std::string> ips;
645  for (int i = 0; i < 7; ++i) {  // Add 7 ips
646    std::string ip = base::StringPrintf("%d.%d.%d.%d", i, i, i, i);
647    ips.push_back(ip);
648    browse_info_->ips.insert(std::make_pair(ip, bad_urls));
649
650    // First ip is good but all the others are bad.
651    EXPECT_CALL(*db_manager_, MatchMalwareIP(ip)).WillOnce(Return(i > 0));
652  }
653
654  ExtractMalwareFeatures(&request);
655  // The number of IP matched url we store is capped at 5 IPs per request.
656  EXPECT_EQ(5, request.bad_ip_url_info_size());
657}
658
659}  // namespace safe_browsing
660