1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/renderer/safe_browsing/features.h" 6 7#include "base/logging.h" 8#include "base/metrics/histogram.h" 9 10namespace safe_browsing { 11 12const size_t FeatureMap::kMaxFeatureMapSize = 10000; 13 14FeatureMap::FeatureMap() {} 15FeatureMap::~FeatureMap() {} 16 17bool FeatureMap::AddBooleanFeature(const std::string& name) { 18 return AddRealFeature(name, 1.0); 19} 20 21bool FeatureMap::AddRealFeature(const std::string& name, double value) { 22 if (features_.size() >= kMaxFeatureMapSize) { 23 // If we hit this case, it indicates that either kMaxFeatureMapSize is 24 // too small, or there is a bug causing too many features to be added. 25 // In this case, we'll log to a histogram so we can see that this is 26 // happening, and make phishing classification fail silently. 27 LOG(ERROR) << "Not adding feature: " << name << " because the " 28 << "feature map is too large."; 29 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1); 30 return false; 31 } 32 // We only expect features in the range [0.0, 1.0], so fail if the feature is 33 // outside this range. 34 if (value < 0.0 || value > 1.0) { 35 LOG(ERROR) << "Not adding feature: " << name << " because the value " 36 << value << " is not in the range [0.0, 1.0]."; 37 UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1); 38 return false; 39 } 40 41 features_[name] = value; 42 return true; 43} 44 45void FeatureMap::Clear() { 46 features_.clear(); 47} 48 49namespace features { 50// URL host features 51const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress"; 52const char kUrlTldToken[] = "UrlTld="; 53const char kUrlDomainToken[] = "UrlDomain="; 54const char kUrlOtherHostToken[] = "UrlOtherHostToken="; 55 56// URL host aggregate features 57const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1"; 58const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3"; 59 60// URL path features 61const char kUrlPathToken[] = "UrlPathToken="; 62 63// DOM HTML form features 64const char kPageHasForms[] = "PageHasForms"; 65const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq"; 66const char kPageHasTextInputs[] = "PageHasTextInputs"; 67const char kPageHasPswdInputs[] = "PageHasPswdInputs"; 68const char kPageHasRadioInputs[] = "PageHasRadioInputs"; 69const char kPageHasCheckInputs[] = "PageHasCheckInputs"; 70 71// DOM HTML link features 72const char kPageExternalLinksFreq[] = "PageExternalLinksFreq"; 73const char kPageLinkDomain[] = "PageLinkDomain="; 74const char kPageSecureLinksFreq[] = "PageSecureLinksFreq"; 75 76// DOM HTML script features 77const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1"; 78const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6"; 79 80// Other DOM HTML features 81const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq"; 82 83// Page term features 84const char kPageTerm[] = "PageTerm="; 85 86} // namespace features 87} // namespace safe_browsing 88