scorer_unittest.cc revision 9ab5563a3196760eb381d102cbb2bc0f7abc6a50
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/renderer/safe_browsing/scorer.h"
6
7#include "base/containers/hash_tables.h"
8#include "base/files/file_path.h"
9#include "base/files/scoped_temp_dir.h"
10#include "base/format_macros.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/message_loop/message_loop.h"
13#include "base/threading/thread.h"
14#include "chrome/common/safe_browsing/client_model.pb.h"
15#include "chrome/renderer/safe_browsing/features.h"
16#include "testing/gmock/include/gmock/gmock.h"
17#include "testing/gtest/include/gtest/gtest.h"
18
19namespace safe_browsing {
20
21class PhishingScorerTest : public ::testing::Test {
22 protected:
23  virtual void SetUp() {
24    // Setup a simple model.  Note that the scorer does not care about
25    // how features are encoded so we use readable strings here to make
26    // the test simpler to follow.
27    model_.Clear();
28    model_.add_hashes("feature1");
29    model_.add_hashes("feature2");
30    model_.add_hashes("feature3");
31    model_.add_hashes("token one");
32    model_.add_hashes("token two");
33
34    ClientSideModel::Rule* rule;
35    rule = model_.add_rule();
36    rule->set_weight(0.5);
37
38    rule = model_.add_rule();
39    rule->add_feature(0);  // feature1
40    rule->set_weight(2.0);
41
42    rule = model_.add_rule();
43    rule->add_feature(0);  // feature1
44    rule->add_feature(1);  // feature2
45    rule->set_weight(3.0);
46
47    model_.add_page_term(3);  // token one
48    model_.add_page_term(4);  // token two
49
50    // These will be murmur3 hashes, but for this test it's not necessary
51    // that the hashes correspond to actual words.
52    model_.add_page_word(1000U);
53    model_.add_page_word(2000U);
54    model_.add_page_word(3000U);
55
56    model_.set_max_words_per_term(2);
57    model_.set_murmur_hash_seed(12345U);
58  }
59
60  ClientSideModel model_;
61};
62
63TEST_F(PhishingScorerTest, HasValidModel) {
64  scoped_ptr<Scorer> scorer;
65  scorer.reset(Scorer::Create(model_.SerializeAsString()));
66  EXPECT_TRUE(scorer.get() != NULL);
67
68  // Invalid model string.
69  scorer.reset(Scorer::Create("bogus string"));
70  EXPECT_FALSE(scorer.get());
71
72  // Mode is missing a required field.
73  model_.clear_max_words_per_term();
74  scorer.reset(Scorer::Create(model_.SerializePartialAsString()));
75  EXPECT_FALSE(scorer.get());
76}
77
78TEST_F(PhishingScorerTest, PageTerms) {
79  scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
80  ASSERT_TRUE(scorer.get());
81  base::hash_set<std::string> expected_page_terms;
82  expected_page_terms.insert("token one");
83  expected_page_terms.insert("token two");
84  EXPECT_THAT(scorer->page_terms(),
85              ::testing::ContainerEq(expected_page_terms));
86}
87
88TEST_F(PhishingScorerTest, PageWords) {
89  scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
90  ASSERT_TRUE(scorer.get());
91  base::hash_set<uint32> expected_page_words;
92  expected_page_words.insert(1000U);
93  expected_page_words.insert(2000U);
94  expected_page_words.insert(3000U);
95  EXPECT_THAT(scorer->page_words(),
96              ::testing::ContainerEq(expected_page_words));
97  EXPECT_EQ(2U, scorer->max_words_per_term());
98  EXPECT_EQ(12345U, scorer->murmurhash3_seed());
99}
100
101TEST_F(PhishingScorerTest, ComputeScore) {
102  scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
103  ASSERT_TRUE(scorer.get());
104
105  // An empty feature map should match the empty rule.
106  FeatureMap features;
107  // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
108  // => 0.62245933120185459
109  EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
110  // Same if the feature does not match any rule.
111  EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));
112  EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
113
114  // Feature 1 matches which means that the logodds will be:
115  //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
116  //   => p = 0.6899744811276125
117  EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));
118  EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));
119
120  // Now, both feature 1 and feature 2 match.  Expected logodds:
121  //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
122  //   3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
123  //   => p = 0.99999627336071584
124  EXPECT_TRUE(features.AddBooleanFeature("feature2"));
125  EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
126}
127}  // namespace safe_browsing
128