ssl_error_classification.cc revision 6e8cce623b6e4fe0c9e4af605d675dd9d0338c38
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <vector> 6 7#include "chrome/browser/ssl/ssl_error_classification.h" 8 9#include "base/build_time.h" 10#include "base/metrics/field_trial.h" 11#include "base/metrics/histogram.h" 12#include "base/strings/string_split.h" 13#include "base/strings/utf_string_conversions.h" 14#include "base/time/time.h" 15#include "chrome/browser/ssl/ssl_error_info.h" 16#include "net/base/net_util.h" 17#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 18#include "net/cert/x509_cert_types.h" 19#include "net/cert/x509_certificate.h" 20#include "url/gurl.h" 21 22using base::Time; 23using base::TimeTicks; 24using base::TimeDelta; 25 26#if defined(OS_WIN) 27#include "base/win/windows_version.h" 28#endif 29 30namespace { 31 32// Events for UMA. Do not reorder or change! 33enum SSLInterstitialCause { 34 CLOCK_PAST, 35 CLOCK_FUTURE, 36 WWW_SUBDOMAIN_MATCH, 37 SUBDOMAIN_MATCH, 38 SUBDOMAIN_INVERSE_MATCH, 39 SUBDOMAIN_OUTSIDE_WILDCARD, 40 HOST_NAME_NOT_KNOWN_TLD, 41 LIKELY_MULTI_TENANT_HOSTING, 42 UNUSED_INTERSTITIAL_CAUSE_ENTRY, 43}; 44 45// Scores/weights which will be constant through all the SSL error types. 46static const float kServerWeight = 0.5f; 47static const float kClientWeight = 0.5f; 48 49void RecordSSLInterstitialCause(bool overridable, SSLInterstitialCause event) { 50 if (overridable) { 51 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.cause.overridable", event, 52 UNUSED_INTERSTITIAL_CAUSE_ENTRY); 53 } else { 54 UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.cause.nonoverridable", event, 55 UNUSED_INTERSTITIAL_CAUSE_ENTRY); 56 } 57} 58 59int GetLevensteinDistance(const std::string& str1, 60 const std::string& str2) { 61 if (str1 == str2) 62 return 0; 63 if (str1.size() == 0) 64 return str2.size(); 65 if (str2.size() == 0) 66 return str1.size(); 67 std::vector<int> kFirstRow(str2.size() + 1, 0); 68 std::vector<int> kSecondRow(str2.size() + 1, 0); 69 70 for (size_t i = 0; i < kFirstRow.size(); ++i) 71 kFirstRow[i] = i; 72 for (size_t i = 0; i < str1.size(); ++i) { 73 kSecondRow[0] = i + 1; 74 for (size_t j = 0; j < str2.size(); ++j) { 75 int cost = str1[i] == str2[j] ? 0 : 1; 76 kSecondRow[j+1] = std::min(std::min( 77 kSecondRow[j] + 1, kFirstRow[j + 1] + 1), kFirstRow[j] + cost); 78 } 79 for (size_t j = 0; j < kFirstRow.size(); j++) 80 kFirstRow[j] = kSecondRow[j]; 81 } 82 return kSecondRow[str2.size()]; 83} 84 85} // namespace 86 87SSLErrorClassification::SSLErrorClassification( 88 const base::Time& current_time, 89 const GURL& url, 90 const net::X509Certificate& cert) 91 : current_time_(current_time), 92 request_url_(url), 93 cert_(cert) { } 94 95SSLErrorClassification::~SSLErrorClassification() { } 96 97float SSLErrorClassification::InvalidDateSeverityScore( 98 int cert_error) const { 99 SSLErrorInfo::ErrorType type = 100 SSLErrorInfo::NetErrorToErrorType(cert_error); 101 DCHECK(type == SSLErrorInfo::CERT_DATE_INVALID); 102 // Client-side characteristics. Check whether or not the system's clock is 103 // wrong and whether or not the user has already encountered this error 104 // before. 105 float severity_date_score = 0.0f; 106 107 static const float kCertificateExpiredWeight = 0.3f; 108 static const float kNotYetValidWeight = 0.2f; 109 110 static const float kSystemClockWeight = 0.75f; 111 static const float kSystemClockWrongWeight = 0.1f; 112 static const float kSystemClockRightWeight = 1.0f; 113 114 if (IsUserClockInThePast(current_time_) || 115 IsUserClockInTheFuture(current_time_)) { 116 severity_date_score += kClientWeight * kSystemClockWeight * 117 kSystemClockWrongWeight; 118 } else { 119 severity_date_score += kClientWeight * kSystemClockWeight * 120 kSystemClockRightWeight; 121 } 122 // TODO(radhikabhar): (crbug.com/393262) Check website settings. 123 124 // Server-side characteristics. Check whether the certificate has expired or 125 // is not yet valid. If the certificate has expired then factor the time which 126 // has passed since expiry. 127 if (cert_.HasExpired()) { 128 severity_date_score += kServerWeight * kCertificateExpiredWeight * 129 CalculateScoreTimePassedSinceExpiry(); 130 } 131 if (current_time_ < cert_.valid_start()) 132 severity_date_score += kServerWeight * kNotYetValidWeight; 133 return severity_date_score; 134} 135 136float SSLErrorClassification::InvalidCommonNameSeverityScore( 137 int cert_error) const { 138 SSLErrorInfo::ErrorType type = 139 SSLErrorInfo::NetErrorToErrorType(cert_error); 140 DCHECK(type == SSLErrorInfo::CERT_COMMON_NAME_INVALID); 141 float severity_name_score = 0.0f; 142 143 static const float kWWWDifferenceWeight = 0.3f; 144 static const float kNameUnderAnyNamesWeight = 0.2f; 145 static const float kAnyNamesUnderNameWeight = 1.0f; 146 static const float kLikelyMultiTenantHostingWeight = 0.1f; 147 148 std::string host_name = request_url_.host(); 149 if (IsHostNameKnownTLD(host_name)) { 150 Tokens host_name_tokens = Tokenize(host_name); 151 if (IsWWWSubDomainMatch()) 152 severity_name_score += kServerWeight * kWWWDifferenceWeight; 153 if (IsSubDomainOutsideWildcard(host_name_tokens)) 154 severity_name_score += kServerWeight * kWWWDifferenceWeight; 155 156 std::vector<std::string> dns_names; 157 cert_.GetDNSNames(&dns_names); 158 std::vector<Tokens> dns_name_tokens = GetTokenizedDNSNames(dns_names); 159 if (NameUnderAnyNames(host_name_tokens, dns_name_tokens)) 160 severity_name_score += kServerWeight * kNameUnderAnyNamesWeight; 161 // Inverse case is more likely to be a MITM attack. 162 if (AnyNamesUnderName(dns_name_tokens, host_name_tokens)) 163 severity_name_score += kServerWeight * kAnyNamesUnderNameWeight; 164 if (IsCertLikelyFromMultiTenantHosting()) 165 severity_name_score += kServerWeight * kLikelyMultiTenantHostingWeight; 166 } 167 return severity_name_score; 168} 169 170void SSLErrorClassification::RecordUMAStatistics(bool overridable, 171 int cert_error) { 172 SSLErrorInfo::ErrorType type = 173 SSLErrorInfo::NetErrorToErrorType(cert_error); 174 switch (type) { 175 case SSLErrorInfo::CERT_DATE_INVALID: { 176 if (IsUserClockInThePast(base::Time::NowFromSystemTime())) 177 RecordSSLInterstitialCause(overridable, CLOCK_PAST); 178 if (IsUserClockInTheFuture(base::Time::NowFromSystemTime())) 179 RecordSSLInterstitialCause(overridable, CLOCK_FUTURE); 180 break; 181 } 182 case SSLErrorInfo::CERT_COMMON_NAME_INVALID: { 183 std::string host_name = request_url_.host(); 184 if (IsHostNameKnownTLD(host_name)) { 185 Tokens host_name_tokens = Tokenize(host_name); 186 if (IsWWWSubDomainMatch()) 187 RecordSSLInterstitialCause(overridable, WWW_SUBDOMAIN_MATCH); 188 if (IsSubDomainOutsideWildcard(host_name_tokens)) 189 RecordSSLInterstitialCause(overridable, SUBDOMAIN_OUTSIDE_WILDCARD); 190 std::vector<std::string> dns_names; 191 cert_.GetDNSNames(&dns_names); 192 std::vector<Tokens> dns_name_tokens = GetTokenizedDNSNames(dns_names); 193 if (NameUnderAnyNames(host_name_tokens, dns_name_tokens)) 194 RecordSSLInterstitialCause(overridable, SUBDOMAIN_MATCH); 195 if (AnyNamesUnderName(dns_name_tokens, host_name_tokens)) 196 RecordSSLInterstitialCause(overridable, SUBDOMAIN_INVERSE_MATCH); 197 if (IsCertLikelyFromMultiTenantHosting()) 198 RecordSSLInterstitialCause(overridable, LIKELY_MULTI_TENANT_HOSTING); 199 } else { 200 RecordSSLInterstitialCause(overridable, HOST_NAME_NOT_KNOWN_TLD); 201 } 202 break; 203 } 204 default: { 205 break; 206 } 207 } 208} 209 210base::TimeDelta SSLErrorClassification::TimePassedSinceExpiry() const { 211 base::TimeDelta delta = current_time_ - cert_.valid_expiry(); 212 return delta; 213} 214 215float SSLErrorClassification::CalculateScoreTimePassedSinceExpiry() const { 216 base::TimeDelta delta = TimePassedSinceExpiry(); 217 int64 time_passed = delta.InDays(); 218 const int64 kHighThreshold = 7; 219 const int64 kLowThreshold = 4; 220 static const float kHighThresholdWeight = 0.4f; 221 static const float kMediumThresholdWeight = 0.3f; 222 static const float kLowThresholdWeight = 0.2f; 223 if (time_passed >= kHighThreshold) 224 return kHighThresholdWeight; 225 else if (time_passed >= kLowThreshold) 226 return kMediumThresholdWeight; 227 else 228 return kLowThresholdWeight; 229} 230 231bool SSLErrorClassification::IsUserClockInThePast(const base::Time& time_now) { 232 base::Time build_time = base::GetBuildTime(); 233 if (time_now < build_time - base::TimeDelta::FromDays(2)) 234 return true; 235 return false; 236} 237 238bool SSLErrorClassification::IsUserClockInTheFuture( 239 const base::Time& time_now) { 240 base::Time build_time = base::GetBuildTime(); 241 if (time_now > build_time + base::TimeDelta::FromDays(365)) 242 return true; 243 return false; 244} 245 246bool SSLErrorClassification::IsWindowsVersionSP3OrLower() { 247#if defined(OS_WIN) 248 const base::win::OSInfo* os_info = base::win::OSInfo::GetInstance(); 249 base::win::OSInfo::ServicePack service_pack = os_info->service_pack(); 250 if (os_info->version() < base::win::VERSION_VISTA && service_pack.major < 3) 251 return true; 252#endif 253 return false; 254} 255 256bool SSLErrorClassification::IsHostNameKnownTLD(const std::string& host_name) { 257 size_t tld_length = 258 net::registry_controlled_domains::GetRegistryLength( 259 host_name, 260 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 261 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); 262 if (tld_length == 0 || tld_length == std::string::npos) 263 return false; 264 return true; 265} 266 267std::vector<SSLErrorClassification::Tokens> SSLErrorClassification:: 268GetTokenizedDNSNames(const std::vector<std::string>& dns_names) { 269 std::vector<std::vector<std::string>> dns_name_tokens; 270 for (size_t i = 0; i < dns_names.size(); ++i) { 271 std::vector<std::string> dns_name_token_single; 272 if (dns_names[i].empty() || dns_names[i].find('\0') != std::string::npos 273 || !(IsHostNameKnownTLD(dns_names[i]))) { 274 dns_name_token_single.push_back(std::string()); 275 } else { 276 dns_name_token_single = Tokenize(dns_names[i]); 277 } 278 dns_name_tokens.push_back(dns_name_token_single); 279 } 280 return dns_name_tokens; 281} 282 283size_t SSLErrorClassification::FindSubDomainDifference( 284 const Tokens& potential_subdomain, const Tokens& parent) const { 285 // A check to ensure that the number of tokens in the tokenized_parent is 286 // less than the tokenized_potential_subdomain. 287 if (parent.size() >= potential_subdomain.size()) 288 return 0; 289 290 size_t tokens_match = 0; 291 size_t diff_size = potential_subdomain.size() - parent.size(); 292 for (size_t i = 0; i < parent.size(); ++i) { 293 if (parent[i] == potential_subdomain[i + diff_size]) 294 tokens_match++; 295 } 296 if (tokens_match == parent.size()) 297 return diff_size; 298 return 0; 299} 300 301SSLErrorClassification::Tokens SSLErrorClassification:: 302Tokenize(const std::string& name) { 303 Tokens name_tokens; 304 base::SplitStringDontTrim(name, '.', &name_tokens); 305 return name_tokens; 306} 307 308// We accept the inverse case for www for historical reasons. 309bool SSLErrorClassification::IsWWWSubDomainMatch() const { 310 std::string host_name = request_url_.host(); 311 if (IsHostNameKnownTLD(host_name)) { 312 std::vector<std::string> dns_names; 313 cert_.GetDNSNames(&dns_names); 314 bool result = false; 315 // Need to account for all possible domains given in the SSL certificate. 316 for (size_t i = 0; i < dns_names.size(); ++i) { 317 if (dns_names[i].empty() || dns_names[i].find('\0') != std::string::npos 318 || dns_names[i].length() == host_name.length() 319 || !(IsHostNameKnownTLD(dns_names[i]))) { 320 result = result || false; 321 } else if (dns_names[i].length() > host_name.length()) { 322 result = result || 323 net::StripWWW(base::ASCIIToUTF16(dns_names[i])) == 324 base::ASCIIToUTF16(host_name); 325 } else { 326 result = result || 327 net::StripWWW(base::ASCIIToUTF16(host_name)) == 328 base::ASCIIToUTF16(dns_names[i]); 329 } 330 } 331 return result; 332 } 333 return false; 334} 335 336bool SSLErrorClassification::NameUnderAnyNames( 337 const Tokens& child, 338 const std::vector<Tokens>& potential_parents) const { 339 bool result = false; 340 // Need to account for all the possible domains given in the SSL certificate. 341 for (size_t i = 0; i < potential_parents.size(); ++i) { 342 if (potential_parents[i].empty() || 343 potential_parents[i].size() >= child.size()) { 344 result = result || false; 345 } else { 346 size_t domain_diff = FindSubDomainDifference(child, 347 potential_parents[i]); 348 if (domain_diff == 1 && child[0] != "www") 349 result = result || true; 350 } 351 } 352 return result; 353} 354 355bool SSLErrorClassification::AnyNamesUnderName( 356 const std::vector<Tokens>& potential_children, 357 const Tokens& parent) const { 358 bool result = false; 359 // Need to account for all the possible domains given in the SSL certificate. 360 for (size_t i = 0; i < potential_children.size(); ++i) { 361 if (potential_children[i].empty() || 362 potential_children[i].size() <= parent.size()) { 363 result = result || false; 364 } else { 365 size_t domain_diff = FindSubDomainDifference(potential_children[i], 366 parent); 367 if (domain_diff == 1 && potential_children[i][0] != "www") 368 result = result || true; 369 } 370 } 371 return result; 372} 373 374bool SSLErrorClassification::IsSubDomainOutsideWildcard( 375 const Tokens& host_name_tokens) const { 376 std::string host_name = request_url_.host(); 377 std::vector<std::string> dns_names; 378 cert_.GetDNSNames(&dns_names); 379 bool result = false; 380 381 // This method requires that the host name be longer than the dns name on 382 // the certificate. 383 for (size_t i = 0; i < dns_names.size(); ++i) { 384 const std::string& name = dns_names[i]; 385 if (name.length() < 2 || name.length() >= host_name.length() || 386 name.find('\0') != std::string::npos || 387 !IsHostNameKnownTLD(name) 388 || name[0] != '*' || name[1] != '.') { 389 continue; 390 } 391 392 // Move past the "*.". 393 std::string extracted_dns_name = name.substr(2); 394 if (FindSubDomainDifference( 395 host_name_tokens, Tokenize(extracted_dns_name)) == 2) { 396 return true; 397 } 398 } 399 return result; 400} 401 402bool SSLErrorClassification::IsCertLikelyFromMultiTenantHosting() const { 403 std::string host_name = request_url_.host(); 404 std::vector<std::string> dns_names; 405 std::vector<std::string> dns_names_domain; 406 cert_.GetDNSNames(&dns_names); 407 size_t dns_names_size = dns_names.size(); 408 409 // If there is only 1 DNS name then it is definitely not a shared certificate. 410 if (dns_names_size == 0 || dns_names_size == 1) 411 return false; 412 413 // Check to see if all the domains in the SAN field in the SSL certificate are 414 // the same or not. 415 for (size_t i = 0; i < dns_names_size; ++i) { 416 dns_names_domain.push_back( 417 net::registry_controlled_domains:: 418 GetDomainAndRegistry( 419 dns_names[i], 420 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES)); 421 } 422 for (size_t i = 1; i < dns_names_domain.size(); ++i) { 423 if (dns_names_domain[i] != dns_names_domain[0]) 424 return false; 425 } 426 427 // If the number of DNS names is more than 5 then assume that it is a shared 428 // certificate. 429 static const int kDistinctNameThreshold = 5; 430 if (dns_names_size > kDistinctNameThreshold) 431 return true; 432 433 // Heuristic - The edit distance between all the strings should be at least 5 434 // for it to be counted as a shared SSLCertificate. If even one pair of 435 // strings edit distance is below 5 then the certificate is no longer 436 // considered as a shared certificate. Include the host name in the URL also 437 // while comparing. 438 dns_names.push_back(host_name); 439 static const int kMinimumEditDsitance = 5; 440 for (size_t i = 0; i < dns_names_size; ++i) { 441 for (size_t j = i + 1; j < dns_names_size; ++j) { 442 int edit_distance = GetLevensteinDistance(dns_names[i], dns_names[j]); 443 if (edit_distance < kMinimumEditDsitance) 444 return false; 445 } 446 } 447 return true; 448} 449