ssl_error_classification.cc revision 6e8cce623b6e4fe0c9e4af605d675dd9d0338c38
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <vector>
6
7#include "chrome/browser/ssl/ssl_error_classification.h"
8
9#include "base/build_time.h"
10#include "base/metrics/field_trial.h"
11#include "base/metrics/histogram.h"
12#include "base/strings/string_split.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "chrome/browser/ssl/ssl_error_info.h"
16#include "net/base/net_util.h"
17#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
18#include "net/cert/x509_cert_types.h"
19#include "net/cert/x509_certificate.h"
20#include "url/gurl.h"
21
22using base::Time;
23using base::TimeTicks;
24using base::TimeDelta;
25
26#if defined(OS_WIN)
27#include "base/win/windows_version.h"
28#endif
29
30namespace {
31
32// Events for UMA. Do not reorder or change!
33enum SSLInterstitialCause {
34  CLOCK_PAST,
35  CLOCK_FUTURE,
36  WWW_SUBDOMAIN_MATCH,
37  SUBDOMAIN_MATCH,
38  SUBDOMAIN_INVERSE_MATCH,
39  SUBDOMAIN_OUTSIDE_WILDCARD,
40  HOST_NAME_NOT_KNOWN_TLD,
41  LIKELY_MULTI_TENANT_HOSTING,
42  UNUSED_INTERSTITIAL_CAUSE_ENTRY,
43};
44
45// Scores/weights which will be constant through all the SSL error types.
46static const float kServerWeight = 0.5f;
47static const float kClientWeight = 0.5f;
48
49void RecordSSLInterstitialCause(bool overridable, SSLInterstitialCause event) {
50  if (overridable) {
51    UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.cause.overridable", event,
52                              UNUSED_INTERSTITIAL_CAUSE_ENTRY);
53  } else {
54    UMA_HISTOGRAM_ENUMERATION("interstitial.ssl.cause.nonoverridable", event,
55                              UNUSED_INTERSTITIAL_CAUSE_ENTRY);
56  }
57}
58
59int GetLevensteinDistance(const std::string& str1,
60                          const std::string& str2) {
61  if (str1 == str2)
62    return 0;
63  if (str1.size() == 0)
64    return str2.size();
65  if (str2.size() == 0)
66    return str1.size();
67  std::vector<int> kFirstRow(str2.size() + 1, 0);
68  std::vector<int> kSecondRow(str2.size() + 1, 0);
69
70  for (size_t i = 0; i < kFirstRow.size(); ++i)
71    kFirstRow[i] = i;
72  for (size_t i = 0; i < str1.size(); ++i) {
73    kSecondRow[0] = i + 1;
74    for (size_t j = 0; j < str2.size(); ++j) {
75      int cost = str1[i] == str2[j] ? 0 : 1;
76      kSecondRow[j+1] = std::min(std::min(
77          kSecondRow[j] + 1, kFirstRow[j + 1] + 1), kFirstRow[j] + cost);
78    }
79    for (size_t j = 0; j < kFirstRow.size(); j++)
80      kFirstRow[j] = kSecondRow[j];
81  }
82  return kSecondRow[str2.size()];
83}
84
85} // namespace
86
87SSLErrorClassification::SSLErrorClassification(
88    const base::Time& current_time,
89    const GURL& url,
90    const net::X509Certificate& cert)
91  : current_time_(current_time),
92    request_url_(url),
93    cert_(cert) { }
94
95SSLErrorClassification::~SSLErrorClassification() { }
96
97float SSLErrorClassification::InvalidDateSeverityScore(
98    int cert_error) const {
99  SSLErrorInfo::ErrorType type =
100      SSLErrorInfo::NetErrorToErrorType(cert_error);
101  DCHECK(type == SSLErrorInfo::CERT_DATE_INVALID);
102  // Client-side characteristics. Check whether or not the system's clock is
103  // wrong and whether or not the user has already encountered this error
104  // before.
105  float severity_date_score = 0.0f;
106
107  static const float kCertificateExpiredWeight = 0.3f;
108  static const float kNotYetValidWeight = 0.2f;
109
110  static const float kSystemClockWeight = 0.75f;
111  static const float kSystemClockWrongWeight = 0.1f;
112  static const float kSystemClockRightWeight = 1.0f;
113
114  if (IsUserClockInThePast(current_time_)  ||
115      IsUserClockInTheFuture(current_time_)) {
116    severity_date_score += kClientWeight * kSystemClockWeight *
117        kSystemClockWrongWeight;
118  } else {
119    severity_date_score += kClientWeight * kSystemClockWeight *
120        kSystemClockRightWeight;
121  }
122  // TODO(radhikabhar): (crbug.com/393262) Check website settings.
123
124  // Server-side characteristics. Check whether the certificate has expired or
125  // is not yet valid. If the certificate has expired then factor the time which
126  // has passed since expiry.
127  if (cert_.HasExpired()) {
128    severity_date_score += kServerWeight * kCertificateExpiredWeight *
129        CalculateScoreTimePassedSinceExpiry();
130  }
131  if (current_time_ < cert_.valid_start())
132    severity_date_score += kServerWeight * kNotYetValidWeight;
133  return severity_date_score;
134}
135
136float SSLErrorClassification::InvalidCommonNameSeverityScore(
137    int cert_error) const {
138  SSLErrorInfo::ErrorType type =
139      SSLErrorInfo::NetErrorToErrorType(cert_error);
140  DCHECK(type == SSLErrorInfo::CERT_COMMON_NAME_INVALID);
141  float severity_name_score = 0.0f;
142
143  static const float kWWWDifferenceWeight = 0.3f;
144  static const float kNameUnderAnyNamesWeight = 0.2f;
145  static const float kAnyNamesUnderNameWeight = 1.0f;
146  static const float kLikelyMultiTenantHostingWeight = 0.1f;
147
148  std::string host_name = request_url_.host();
149  if (IsHostNameKnownTLD(host_name)) {
150    Tokens host_name_tokens = Tokenize(host_name);
151    if (IsWWWSubDomainMatch())
152      severity_name_score += kServerWeight * kWWWDifferenceWeight;
153    if (IsSubDomainOutsideWildcard(host_name_tokens))
154      severity_name_score += kServerWeight * kWWWDifferenceWeight;
155
156    std::vector<std::string> dns_names;
157    cert_.GetDNSNames(&dns_names);
158    std::vector<Tokens> dns_name_tokens = GetTokenizedDNSNames(dns_names);
159    if (NameUnderAnyNames(host_name_tokens, dns_name_tokens))
160      severity_name_score += kServerWeight * kNameUnderAnyNamesWeight;
161    // Inverse case is more likely to be a MITM attack.
162    if (AnyNamesUnderName(dns_name_tokens, host_name_tokens))
163      severity_name_score += kServerWeight * kAnyNamesUnderNameWeight;
164    if (IsCertLikelyFromMultiTenantHosting())
165      severity_name_score += kServerWeight * kLikelyMultiTenantHostingWeight;
166  }
167  return severity_name_score;
168}
169
170void SSLErrorClassification::RecordUMAStatistics(bool overridable,
171                                                 int cert_error) {
172  SSLErrorInfo::ErrorType type =
173      SSLErrorInfo::NetErrorToErrorType(cert_error);
174  switch (type) {
175    case SSLErrorInfo::CERT_DATE_INVALID: {
176      if (IsUserClockInThePast(base::Time::NowFromSystemTime()))
177        RecordSSLInterstitialCause(overridable, CLOCK_PAST);
178      if (IsUserClockInTheFuture(base::Time::NowFromSystemTime()))
179        RecordSSLInterstitialCause(overridable, CLOCK_FUTURE);
180      break;
181    }
182    case SSLErrorInfo::CERT_COMMON_NAME_INVALID: {
183      std::string host_name = request_url_.host();
184      if (IsHostNameKnownTLD(host_name)) {
185        Tokens host_name_tokens = Tokenize(host_name);
186        if (IsWWWSubDomainMatch())
187          RecordSSLInterstitialCause(overridable, WWW_SUBDOMAIN_MATCH);
188        if (IsSubDomainOutsideWildcard(host_name_tokens))
189          RecordSSLInterstitialCause(overridable, SUBDOMAIN_OUTSIDE_WILDCARD);
190        std::vector<std::string> dns_names;
191        cert_.GetDNSNames(&dns_names);
192        std::vector<Tokens> dns_name_tokens = GetTokenizedDNSNames(dns_names);
193        if (NameUnderAnyNames(host_name_tokens, dns_name_tokens))
194          RecordSSLInterstitialCause(overridable, SUBDOMAIN_MATCH);
195        if (AnyNamesUnderName(dns_name_tokens, host_name_tokens))
196          RecordSSLInterstitialCause(overridable, SUBDOMAIN_INVERSE_MATCH);
197        if (IsCertLikelyFromMultiTenantHosting())
198          RecordSSLInterstitialCause(overridable, LIKELY_MULTI_TENANT_HOSTING);
199      } else {
200         RecordSSLInterstitialCause(overridable, HOST_NAME_NOT_KNOWN_TLD);
201      }
202      break;
203    }
204    default: {
205      break;
206    }
207  }
208}
209
210base::TimeDelta SSLErrorClassification::TimePassedSinceExpiry() const {
211  base::TimeDelta delta = current_time_ - cert_.valid_expiry();
212  return delta;
213}
214
215float SSLErrorClassification::CalculateScoreTimePassedSinceExpiry() const {
216  base::TimeDelta delta = TimePassedSinceExpiry();
217  int64 time_passed = delta.InDays();
218  const int64 kHighThreshold = 7;
219  const int64 kLowThreshold = 4;
220  static const float kHighThresholdWeight = 0.4f;
221  static const float kMediumThresholdWeight = 0.3f;
222  static const float kLowThresholdWeight = 0.2f;
223  if (time_passed >= kHighThreshold)
224    return kHighThresholdWeight;
225  else if (time_passed >= kLowThreshold)
226    return kMediumThresholdWeight;
227  else
228    return kLowThresholdWeight;
229}
230
231bool SSLErrorClassification::IsUserClockInThePast(const base::Time& time_now) {
232  base::Time build_time = base::GetBuildTime();
233  if (time_now < build_time - base::TimeDelta::FromDays(2))
234    return true;
235  return false;
236}
237
238bool SSLErrorClassification::IsUserClockInTheFuture(
239    const base::Time& time_now) {
240  base::Time build_time = base::GetBuildTime();
241  if (time_now > build_time + base::TimeDelta::FromDays(365))
242    return true;
243  return false;
244}
245
246bool SSLErrorClassification::IsWindowsVersionSP3OrLower() {
247#if defined(OS_WIN)
248  const base::win::OSInfo* os_info = base::win::OSInfo::GetInstance();
249  base::win::OSInfo::ServicePack service_pack = os_info->service_pack();
250  if (os_info->version() < base::win::VERSION_VISTA && service_pack.major < 3)
251    return true;
252#endif
253  return false;
254}
255
256bool SSLErrorClassification::IsHostNameKnownTLD(const std::string& host_name) {
257  size_t tld_length =
258      net::registry_controlled_domains::GetRegistryLength(
259          host_name,
260          net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
261          net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
262  if (tld_length == 0 || tld_length == std::string::npos)
263    return false;
264  return true;
265}
266
267std::vector<SSLErrorClassification::Tokens> SSLErrorClassification::
268GetTokenizedDNSNames(const std::vector<std::string>& dns_names) {
269  std::vector<std::vector<std::string>> dns_name_tokens;
270  for (size_t i = 0; i < dns_names.size(); ++i) {
271    std::vector<std::string> dns_name_token_single;
272    if (dns_names[i].empty() || dns_names[i].find('\0') != std::string::npos
273        || !(IsHostNameKnownTLD(dns_names[i]))) {
274      dns_name_token_single.push_back(std::string());
275    } else {
276      dns_name_token_single = Tokenize(dns_names[i]);
277    }
278    dns_name_tokens.push_back(dns_name_token_single);
279  }
280  return dns_name_tokens;
281}
282
283size_t SSLErrorClassification::FindSubDomainDifference(
284    const Tokens& potential_subdomain, const Tokens& parent) const {
285  // A check to ensure that the number of tokens in the tokenized_parent is
286  // less than the tokenized_potential_subdomain.
287  if (parent.size() >= potential_subdomain.size())
288    return 0;
289
290  size_t tokens_match = 0;
291  size_t diff_size = potential_subdomain.size() - parent.size();
292  for (size_t i = 0; i < parent.size(); ++i) {
293    if (parent[i] == potential_subdomain[i + diff_size])
294      tokens_match++;
295  }
296  if (tokens_match == parent.size())
297    return diff_size;
298  return 0;
299}
300
301SSLErrorClassification::Tokens SSLErrorClassification::
302Tokenize(const std::string& name) {
303  Tokens name_tokens;
304  base::SplitStringDontTrim(name, '.', &name_tokens);
305  return name_tokens;
306}
307
308// We accept the inverse case for www for historical reasons.
309bool SSLErrorClassification::IsWWWSubDomainMatch() const {
310  std::string host_name = request_url_.host();
311  if (IsHostNameKnownTLD(host_name)) {
312    std::vector<std::string> dns_names;
313    cert_.GetDNSNames(&dns_names);
314    bool result = false;
315    // Need to account for all possible domains given in the SSL certificate.
316    for (size_t i = 0; i < dns_names.size(); ++i) {
317      if (dns_names[i].empty() || dns_names[i].find('\0') != std::string::npos
318          || dns_names[i].length() == host_name.length()
319          || !(IsHostNameKnownTLD(dns_names[i]))) {
320        result = result || false;
321      } else if (dns_names[i].length() > host_name.length()) {
322        result = result ||
323            net::StripWWW(base::ASCIIToUTF16(dns_names[i])) ==
324            base::ASCIIToUTF16(host_name);
325      } else {
326        result = result ||
327            net::StripWWW(base::ASCIIToUTF16(host_name)) ==
328            base::ASCIIToUTF16(dns_names[i]);
329      }
330    }
331    return result;
332  }
333  return false;
334}
335
336bool SSLErrorClassification::NameUnderAnyNames(
337    const Tokens& child,
338    const std::vector<Tokens>& potential_parents) const {
339  bool result = false;
340  // Need to account for all the possible domains given in the SSL certificate.
341  for (size_t i = 0; i < potential_parents.size(); ++i) {
342    if (potential_parents[i].empty() ||
343        potential_parents[i].size() >= child.size()) {
344      result = result || false;
345    } else {
346      size_t domain_diff = FindSubDomainDifference(child,
347                                                   potential_parents[i]);
348      if (domain_diff == 1 &&  child[0] != "www")
349        result = result || true;
350    }
351  }
352  return result;
353}
354
355bool SSLErrorClassification::AnyNamesUnderName(
356    const std::vector<Tokens>& potential_children,
357    const Tokens& parent) const {
358  bool result = false;
359  // Need to account for all the possible domains given in the SSL certificate.
360  for (size_t i = 0; i < potential_children.size(); ++i) {
361    if (potential_children[i].empty() ||
362        potential_children[i].size() <= parent.size()) {
363      result = result || false;
364    } else {
365      size_t domain_diff = FindSubDomainDifference(potential_children[i],
366                                                   parent);
367      if (domain_diff == 1 &&  potential_children[i][0] != "www")
368        result = result || true;
369    }
370  }
371  return result;
372}
373
374bool SSLErrorClassification::IsSubDomainOutsideWildcard(
375    const Tokens& host_name_tokens) const {
376  std::string host_name = request_url_.host();
377  std::vector<std::string> dns_names;
378  cert_.GetDNSNames(&dns_names);
379  bool result = false;
380
381  // This method requires that the host name be longer than the dns name on
382  // the certificate.
383  for (size_t i = 0; i < dns_names.size(); ++i) {
384    const std::string& name = dns_names[i];
385    if (name.length() < 2 || name.length() >= host_name.length() ||
386        name.find('\0') != std::string::npos ||
387        !IsHostNameKnownTLD(name)
388        || name[0] != '*' || name[1] != '.') {
389      continue;
390    }
391
392    // Move past the "*.".
393    std::string extracted_dns_name = name.substr(2);
394    if (FindSubDomainDifference(
395        host_name_tokens, Tokenize(extracted_dns_name)) == 2) {
396      return true;
397    }
398  }
399  return result;
400}
401
402bool SSLErrorClassification::IsCertLikelyFromMultiTenantHosting() const {
403  std::string host_name = request_url_.host();
404  std::vector<std::string> dns_names;
405  std::vector<std::string> dns_names_domain;
406  cert_.GetDNSNames(&dns_names);
407  size_t dns_names_size = dns_names.size();
408
409  // If there is only 1 DNS name then it is definitely not a shared certificate.
410  if (dns_names_size == 0 || dns_names_size == 1)
411    return false;
412
413  // Check to see if all the domains in the SAN field in the SSL certificate are
414  // the same or not.
415  for (size_t i = 0; i < dns_names_size; ++i) {
416    dns_names_domain.push_back(
417        net::registry_controlled_domains::
418        GetDomainAndRegistry(
419            dns_names[i],
420            net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES));
421  }
422  for (size_t i = 1; i < dns_names_domain.size(); ++i) {
423    if (dns_names_domain[i] != dns_names_domain[0])
424      return false;
425  }
426
427  // If the number of DNS names is more than 5 then assume that it is a shared
428  // certificate.
429  static const int kDistinctNameThreshold = 5;
430  if (dns_names_size > kDistinctNameThreshold)
431    return true;
432
433  // Heuristic - The edit distance between all the strings should be at least 5
434  // for it to be counted as a shared SSLCertificate. If even one pair of
435  // strings edit distance is below 5 then the certificate is no longer
436  // considered as a shared certificate. Include the host name in the URL also
437  // while comparing.
438  dns_names.push_back(host_name);
439  static const int kMinimumEditDsitance = 5;
440  for (size_t i = 0; i < dns_names_size; ++i) {
441    for (size_t j = i + 1; j < dns_names_size; ++j) {
442      int edit_distance = GetLevensteinDistance(dns_names[i], dns_names[j]);
443      if (edit_distance < kMinimumEditDsitance)
444        return false;
445    }
446  }
447  return true;
448}
449