1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/renderer/safe_browsing/features.h"
6
7#include "base/logging.h"
8#include "base/metrics/histogram.h"
9
10namespace safe_browsing {
11
12const size_t FeatureMap::kMaxFeatureMapSize = 10000;
13
14FeatureMap::FeatureMap() {}
15FeatureMap::~FeatureMap() {}
16
17bool FeatureMap::AddBooleanFeature(const std::string& name) {
18  return AddRealFeature(name, 1.0);
19}
20
21bool FeatureMap::AddRealFeature(const std::string& name, double value) {
22  if (features_.size() >= kMaxFeatureMapSize) {
23    // If we hit this case, it indicates that either kMaxFeatureMapSize is
24    // too small, or there is a bug causing too many features to be added.
25    // In this case, we'll log to a histogram so we can see that this is
26    // happening, and make phishing classification fail silently.
27    LOG(ERROR) << "Not adding feature: " << name << " because the "
28               << "feature map is too large.";
29    UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
30    return false;
31  }
32  // We only expect features in the range [0.0, 1.0], so fail if the feature is
33  // outside this range.
34  if (value < 0.0 || value > 1.0) {
35    LOG(ERROR) << "Not adding feature: " << name << " because the value "
36               << value << " is not in the range [0.0, 1.0].";
37    UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
38    return false;
39  }
40
41  features_[name] = value;
42  return true;
43}
44
45void FeatureMap::Clear() {
46  features_.clear();
47}
48
49namespace features {
50// URL host features
51const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
52const char kUrlTldToken[] = "UrlTld=";
53const char kUrlDomainToken[] = "UrlDomain=";
54const char kUrlOtherHostToken[] = "UrlOtherHostToken=";
55
56// URL host aggregate features
57const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
58const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";
59
60// URL path features
61const char kUrlPathToken[] = "UrlPathToken=";
62
63// DOM HTML form features
64const char kPageHasForms[] = "PageHasForms";
65const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
66const char kPageHasTextInputs[] = "PageHasTextInputs";
67const char kPageHasPswdInputs[] = "PageHasPswdInputs";
68const char kPageHasRadioInputs[] = "PageHasRadioInputs";
69const char kPageHasCheckInputs[] = "PageHasCheckInputs";
70
71// DOM HTML link features
72const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
73const char kPageLinkDomain[] = "PageLinkDomain=";
74const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";
75
76// DOM HTML script features
77const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
78const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";
79
80// Other DOM HTML features
81const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";
82
83// Page term features
84const char kPageTerm[] = "PageTerm=";
85
86}  // namespace features
87}  // namespace safe_browsing
88