1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "webkit/glue/site_isolation_metrics.h"
6
7#include <set>
8
9#include "base/hash_tables.h"
10#include "base/metrics/histogram.h"
11#include "net/base/mime_sniffer.h"
12#include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
13#include "third_party/WebKit/Source/WebKit/chromium/public/WebSecurityOrigin.h"
14#include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
15#include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
16#include "third_party/WebKit/Source/WebKit/chromium/public/WebURLRequest.h"
17#include "third_party/WebKit/Source/WebKit/chromium/public/WebURLResponse.h"
18
19using WebKit::WebFrame;
20using WebKit::WebSecurityOrigin;
21using WebKit::WebString;
22using WebKit::WebURL;
23using WebKit::WebURLRequest;
24using WebKit::WebURLResponse;
25
26namespace webkit_glue {
27
28typedef base::hash_map<unsigned, WebURLRequest::TargetType> TargetTypeMap;
29typedef base::hash_map<std::string, int> MimeTypeMap;
30typedef std::set<std::string> CrossOriginTextHtmlResponseSet;
31
32static TargetTypeMap* GetTargetTypeMap() {
33  static TargetTypeMap target_type_map_;
34  return &target_type_map_;
35}
36
37// Copied from net/base/mime_util.cc, supported_non_image_types[]
38static const char* const kCrossOriginMimeTypesToLog[] = {
39  "text/cache-manifest",
40  "text/html",
41  "text/xml",
42  "text/xsl",
43  "text/plain",
44  "text/vnd.chromium.ftp-dir",
45  "text/",
46  "text/css",
47  "image/svg+xml",
48  "application/xml",
49  "application/xhtml+xml",
50  "application/rss+xml",
51  "application/atom+xml",
52  "application/json",
53  "application/x-x509-user-cert",
54  "multipart/x-mixed-replace",
55  "(NONE)"  // Keep track of missing MIME types as well
56};
57
58static MimeTypeMap* GetMimeTypeMap() {
59  static MimeTypeMap mime_type_map_;
60  if (!mime_type_map_.size()) {
61    for (size_t i = 0; i < arraysize(kCrossOriginMimeTypesToLog); ++i)
62      mime_type_map_[kCrossOriginMimeTypesToLog[i]] = i;
63  }
64  return &mime_type_map_;
65}
66
67// This is set is used to keep track of the response urls that we want to
68// sniff, since we will have to wait for the payload to arrive.
69static CrossOriginTextHtmlResponseSet* GetCrossOriginTextHtmlResponseSet() {
70  static CrossOriginTextHtmlResponseSet cross_origin_text_html_response_set_;
71  return &cross_origin_text_html_response_set_;
72}
73
74static void LogVerifiedTextHtmlResponse() {
75  UMA_HISTOGRAM_COUNTS(
76      "SiteIsolation.CrossSiteNonFrameResponse_verified_texthtml_BLOCK", 1);
77}
78
79static void LogMislabeledTextHtmlResponse() {
80  UMA_HISTOGRAM_COUNTS(
81      "SiteIsolation.CrossSiteNonFrameResponse_mislabeled_texthtml", 1);
82}
83
84void SiteIsolationMetrics::AddRequest(unsigned identifier,
85    WebURLRequest::TargetType target_type) {
86  TargetTypeMap& target_type_map = *GetTargetTypeMap();
87  target_type_map[identifier] = target_type;
88}
89
90// Check whether the given response is allowed due to access control headers.
91// This is basically a copy of the logic of passesAccessControlCheck() in
92// WebCore/loader/CrossOriginAccessControl.cpp.
93bool SiteIsolationMetrics::AllowedByAccessControlHeader(
94    WebFrame* frame, const WebURLResponse& response) {
95  WebString access_control_origin = response.httpHeaderField(
96      WebString::fromUTF8("Access-Control-Allow-Origin"));
97  WebSecurityOrigin security_origin =
98      WebSecurityOrigin::createFromString(access_control_origin);
99  return access_control_origin == WebString::fromUTF8("*") ||
100         frame->securityOrigin().canAccess(security_origin);
101}
102
103// We want to log any cross-site request that we don't think a renderer should
104// be allowed to make. We can safely ignore frame requests (since we'd like
105// those to be in a separate renderer) and plugin requests, even if they are
106// cross-origin.
107//
108// For comparison, we keep counts of:
109//  - All requests made by a renderer
110//  - All cross-site requests
111//
112// Then, for cross-site non-frame/plugin requests, we keep track of:
113//  - Counts for MIME types of interest
114//  - Counts of those MIME types that carry CORS headers
115//  - Counts of mislabeled text/html responses (without CORS)
116// As well as those we would block:
117//  - Counts of verified text/html responses (without CORS)
118//  - Counts of XML/JSON responses (without CORS)
119//
120// This will let us say what percentage of requests we would end up blocking.
121void SiteIsolationMetrics::LogMimeTypeForCrossOriginRequest(
122    WebFrame* frame, unsigned identifier, const WebURLResponse& response) {
123  UMA_HISTOGRAM_COUNTS("SiteIsolation.Requests", 1);
124
125  TargetTypeMap& target_type_map = *GetTargetTypeMap();
126  TargetTypeMap::iterator iter  = target_type_map.find(identifier);
127  if (iter != target_type_map.end()) {
128    WebURLRequest::TargetType target_type = iter->second;
129    target_type_map.erase(iter);
130
131    // Focus on cross-site requests.
132    if (!frame->securityOrigin().canAccess(
133            WebSecurityOrigin::create(response.url()))) {
134      UMA_HISTOGRAM_COUNTS("SiteIsolation.CrossSiteRequests", 1);
135
136      // Now focus on non-frame, non-plugin requests.
137      if (target_type != WebURLRequest::TargetIsMainFrame &&
138          target_type != WebURLRequest::TargetIsSubframe &&
139          target_type != WebURLRequest::TargetIsObject) {
140        // If it is part of a MIME type we might block, log the MIME type.
141        std::string mime_type = response.mimeType().utf8();
142        MimeTypeMap mime_type_map = *GetMimeTypeMap();
143        // Also track it if it lacks a MIME type.
144        // TODO(creis): 304 responses have no MIME type, so we don't handle
145        // them correctly.  Can we look up their MIME type from the cache?
146        if (mime_type == "")
147          mime_type = "(NONE)";
148        MimeTypeMap::iterator mime_type_iter = mime_type_map.find(mime_type);
149        if (mime_type_iter != mime_type_map.end()) {
150          UMA_HISTOGRAM_ENUMERATION(
151              "SiteIsolation.CrossSiteNonFrameResponse_MIME_Type",
152              mime_type_iter->second,
153              arraysize(kCrossOriginMimeTypesToLog));
154
155          // We also check access control headers, in case this
156          // cross-origin request has been explicitly permitted.
157          if (AllowedByAccessControlHeader(frame, response)) {
158            UMA_HISTOGRAM_ENUMERATION(
159                "SiteIsolation.CrossSiteNonFrameResponse_With_CORS_MIME_Type",
160                mime_type_iter->second,
161                arraysize(kCrossOriginMimeTypesToLog));
162          } else {
163            // Without access control headers, we might block this request.
164            // Sometimes resources are mislabled as text/html, though, and we
165            // should only block them if we can verify that.  To do so, we sniff
166            // the content once we have some of the payload.
167            if (mime_type == "text/html") {
168              // Remember the response until we can sniff its contents.
169              GetCrossOriginTextHtmlResponseSet()->insert(
170                  response.url().spec());
171            } else if (mime_type == "text/xml" ||
172                       mime_type == "text/xsl" ||
173                       mime_type == "application/xml" ||
174                       mime_type == "application/xhtml+xml" ||
175                       mime_type == "application/rss+xml" ||
176                       mime_type == "application/atom+xml" ||
177                       mime_type == "application/json") {
178              // We will also block XML and JSON MIME types for cross-site
179              // non-frame requests without CORS headers.
180              UMA_HISTOGRAM_COUNTS(
181                  "SiteIsolation.CrossSiteNonFrameResponse_xml_or_json_BLOCK",
182                  1);
183            }
184          }
185        }
186      }
187    }
188  }
189}
190
191void SiteIsolationMetrics::SniffCrossOriginHTML(const WebURL& response_url,
192                                                const char* data,
193                                                int len) {
194  if (!response_url.isValid())
195    return;
196
197  // Look up the URL to see if it is a text/html request we are tracking.
198  CrossOriginTextHtmlResponseSet& cross_origin_text_html_response_set =
199      *GetCrossOriginTextHtmlResponseSet();
200  CrossOriginTextHtmlResponseSet::iterator request_iter =
201      cross_origin_text_html_response_set.find(response_url.spec());
202  if (request_iter != cross_origin_text_html_response_set.end()) {
203    // Log whether it actually looks like HTML.
204    std::string sniffed_mime_type;
205    bool successful = net::SniffMimeType(data, len, response_url,
206                                         "", &sniffed_mime_type);
207    if (successful && sniffed_mime_type == "text/html")
208      LogVerifiedTextHtmlResponse();
209    else
210      LogMislabeledTextHtmlResponse();
211    cross_origin_text_html_response_set.erase(request_iter);
212  }
213}
214
215void SiteIsolationMetrics::RemoveCompletedResponse(
216    const WebURL& response_url) {
217  if (!response_url.isValid())
218    return;
219
220  // Ensure we don't leave responses in the set after they've completed.
221  CrossOriginTextHtmlResponseSet& cross_origin_text_html_response_set =
222      *GetCrossOriginTextHtmlResponseSet();
223  CrossOriginTextHtmlResponseSet::iterator request_iter =
224      cross_origin_text_html_response_set.find(response_url.spec());
225  if (request_iter != cross_origin_text_html_response_set.end()) {
226    LogMislabeledTextHtmlResponse();
227    cross_origin_text_html_response_set.erase(request_iter);
228  }
229}
230
231}  // namespace webkit_glue
232