1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/prerender/prerender_util.h" 6 7#include "base/logging.h" 8#include "base/metrics/histogram.h" 9#include "base/metrics/sparse_histogram.h" 10#include "base/strings/string_util.h" 11#include "content/public/browser/resource_request_info.h" 12#include "content/public/common/resource_type.h" 13#include "net/http/http_response_headers.h" 14#include "net/url_request/url_request.h" 15#include "url/url_canon.h" 16#include "url/url_parse.h" 17#include "url/url_util.h" 18 19using content::ResourceType; 20 21namespace prerender { 22 23namespace { 24 25const char kModPagespeedHeader[] = "X-Mod-Pagespeed"; 26const char kPageSpeedHeader[] = "X-Page-Speed"; 27const char kPagespeedServerHistogram[] = 28 "Prerender.PagespeedHeader.ServerCounts"; 29const char kPagespeedVersionHistogram[] = 30 "Prerender.PagespeedHeader.VersionCounts"; 31 32enum PagespeedHeaderServerType { 33 PAGESPEED_TOTAL_RESPONSES = 0, 34 PAGESPEED_MOD_PAGESPEED_SERVER = 1, 35 PAGESPEED_NGX_PAGESPEED_SERVER = 2, 36 PAGESPEED_PAGESPEED_SERVICE_SERVER = 3, 37 PAGESPEED_UNKNOWN_SERVER = 4, 38 PAGESPEED_SERVER_MAXIMUM = 5 39}; 40 41// Private function to parse the PageSpeed version number and encode it in 42// buckets 2 through 99: if it is in the format a.b.c.d-e the bucket will be 43// 2 + 2 * (max(c, 10) - 10) + (d > 1 ? 1 : 0); if it is not in this format 44// we return zero. 45int GetXModPagespeedBucketFromVersion(const std::string& version) { 46 int a, b, c, d, e; 47 int num_parsed = sscanf(version.c_str(), "%d.%d.%d.%d-%d", 48 &a, &b, &c, &d, &e); 49 int output = 0; 50 if (num_parsed == 5) { 51 output = 2; 52 if (c > 10) 53 output += 2 * (c - 10); 54 if (d > 1) 55 output++; 56 if (output < 2 || output > 99) 57 output = 0; 58 } 59 return output; 60} 61 62// Private function to parse the X-Page-Speed header value and determine 63// whether it is in the PageSpeed Service format, namely m_n_dc were m_n is 64// a version number and dc is an encoded 2-character value. 65bool IsPageSpeedServiceVersionNumber(const std::string& version) { 66 int a, b; 67 char c, d, e; // e is to detect EOL as we check that it /isn't/ converted. 68 int num_parsed = sscanf(version.c_str(), "%d_%d_%c%c%c", &a, &b, &c, &d, &e); 69 return (num_parsed == 4); 70} 71 72enum PrerenderSchemeCancelReason { 73 PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL, 74 PRERENDER_SCHEME_CANCEL_REASON_DATA, 75 PRERENDER_SCHEME_CANCEL_REASON_BLOB, 76 PRERENDER_SCHEME_CANCEL_REASON_FILE, 77 PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM, 78 PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET, 79 PRERENDER_SCHEME_CANCEL_REASON_FTP, 80 PRERENDER_SCHEME_CANCEL_REASON_CHROME, 81 PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION, 82 PRERENDER_SCHEME_CANCEL_REASON_ABOUT, 83 PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN, 84 PRERENDER_SCHEME_CANCEL_REASON_MAX, 85}; 86 87void ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason) { 88 UMA_HISTOGRAM_ENUMERATION( 89 "Prerender.SchemeCancelReason", reason, 90 PRERENDER_SCHEME_CANCEL_REASON_MAX); 91} 92 93} // namespace 94 95const char kChromeNavigateExtraDataKey[] = "chrome_navigate"; 96 97bool MaybeGetQueryStringBasedAliasURL( 98 const GURL& url, GURL* alias_url) { 99 DCHECK(alias_url); 100 url::Parsed parsed; 101 url::ParseStandardURL(url.spec().c_str(), url.spec().length(), &parsed); 102 url::Component query = parsed.query; 103 url::Component key, value; 104 while (url::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, &value)) { 105 if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "url", key.len)) 106 continue; 107 // We found a url= query string component. 108 if (value.len < 1) 109 continue; 110 url::RawCanonOutputW<1024> decoded_url; 111 url::DecodeURLEscapeSequences(url.spec().c_str() + value.begin, value.len, 112 &decoded_url); 113 GURL new_url(base::string16(decoded_url.data(), decoded_url.length())); 114 if (!new_url.is_empty() && new_url.is_valid()) { 115 *alias_url = new_url; 116 return true; 117 } 118 return false; 119 } 120 return false; 121} 122 123uint8 GetQueryStringBasedExperiment(const GURL& url) { 124 url::Parsed parsed; 125 url::ParseStandardURL(url.spec().c_str(), url.spec().length(), &parsed); 126 url::Component query = parsed.query; 127 url::Component key, value; 128 while (url::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, &value)) { 129 if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "lpe", key.len)) 130 continue; 131 132 // We found a lpe= query string component. 133 if (value.len != 1) 134 continue; 135 uint8 exp = *(url.spec().c_str() + value.begin) - '0'; 136 if (exp < 1 || exp > 9) 137 continue; 138 return exp; 139 } 140 return kNoExperiment; 141} 142 143bool IsGoogleDomain(const GURL& url) { 144 return StartsWithASCII(url.host(), std::string("www.google."), true); 145} 146 147bool IsGoogleSearchResultURL(const GURL& url) { 148 if (!IsGoogleDomain(url)) 149 return false; 150 return (url.path().empty() || 151 StartsWithASCII(url.path(), std::string("/search"), true) || 152 (url.path() == "/") || 153 StartsWithASCII(url.path(), std::string("/webhp"), true)); 154} 155 156bool IsNoSwapInExperiment(uint8 experiment_id) { 157 // Currently, experiments 5 and 6 fall in this category. 158 return experiment_id == 5 || experiment_id == 6; 159} 160 161bool IsControlGroupExperiment(uint8 experiment_id) { 162 // Currently, experiments 7 and 8 fall in this category. 163 return experiment_id == 7 || experiment_id == 8; 164} 165 166void GatherPagespeedData(const ResourceType resource_type, 167 const GURL& request_url, 168 const net::HttpResponseHeaders* response_headers) { 169 if (resource_type != content::RESOURCE_TYPE_MAIN_FRAME || 170 !request_url.SchemeIsHTTPOrHTTPS()) 171 return; 172 173 // bucket 0 counts every response seen. 174 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 175 PAGESPEED_TOTAL_RESPONSES, 176 PAGESPEED_SERVER_MAXIMUM); 177 if (!response_headers) 178 return; 179 180 void* iter = NULL; 181 std::string name; 182 std::string value; 183 while (response_headers->EnumerateHeaderLines(&iter, &name, &value)) { 184 if (name == kModPagespeedHeader) { 185 // Bucket 1 counts occurences of the X-Mod-Pagespeed header. 186 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 187 PAGESPEED_MOD_PAGESPEED_SERVER, 188 PAGESPEED_SERVER_MAXIMUM); 189 if (!value.empty()) { 190 // If the header value is in the X-Mod-Pagespeed version number format 191 // then increment the appropriate bucket, otherwise increment bucket 1, 192 // which is the catch-all "unknown version number" bucket. 193 int bucket = GetXModPagespeedBucketFromVersion(value); 194 if (bucket > 0) { 195 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket); 196 } else { 197 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, 1); 198 } 199 } 200 break; 201 } else if (name == kPageSpeedHeader) { 202 // X-Page-Speed header versions are either in the X-Mod-Pagespeed format, 203 // indicating an nginx installation, or they're in the PageSpeed Service 204 // format, indicating a PSS installation, or in some other format, 205 // indicating an unknown installation [possibly IISpeed]. 206 if (!value.empty()) { 207 int bucket = GetXModPagespeedBucketFromVersion(value); 208 if (bucket > 0) { 209 // Bucket 2 counts occurences of the X-Page-Speed header with a 210 // value in the X-Mod-Pagespeed version number format. We also 211 // count these responses in the version histogram. 212 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 213 PAGESPEED_NGX_PAGESPEED_SERVER, 214 PAGESPEED_SERVER_MAXIMUM); 215 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket); 216 } else if (IsPageSpeedServiceVersionNumber(value)) { 217 // Bucket 3 counts occurences of the X-Page-Speed header with a 218 // value in the PageSpeed Service version number format. 219 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 220 PAGESPEED_PAGESPEED_SERVICE_SERVER, 221 PAGESPEED_SERVER_MAXIMUM); 222 } else { 223 // Bucket 4 counts occurences of all other values. 224 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 225 PAGESPEED_UNKNOWN_SERVER, 226 PAGESPEED_SERVER_MAXIMUM); 227 } 228 } 229 break; 230 } 231 } 232} 233 234void URLRequestResponseStarted(net::URLRequest* request) { 235 const content::ResourceRequestInfo* info = 236 content::ResourceRequestInfo::ForRequest(request); 237 GatherPagespeedData(info->GetResourceType(), 238 request->url(), 239 request->response_headers()); 240} 241 242void ReportPrerenderExternalURL() { 243 ReportPrerenderSchemeCancelReason( 244 PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL); 245} 246 247void ReportUnsupportedPrerenderScheme(const GURL& url) { 248 if (url.SchemeIs("data")) { 249 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_DATA); 250 } else if (url.SchemeIs("blob")) { 251 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_BLOB); 252 } else if (url.SchemeIsFile()) { 253 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FILE); 254 } else if (url.SchemeIsFileSystem()) { 255 ReportPrerenderSchemeCancelReason( 256 PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM); 257 } else if (url.SchemeIs("ws") || url.SchemeIs("wss")) { 258 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET); 259 } else if (url.SchemeIs("ftp")) { 260 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FTP); 261 } else if (url.SchemeIs("chrome")) { 262 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_CHROME); 263 } else if (url.SchemeIs("chrome-extension")) { 264 ReportPrerenderSchemeCancelReason( 265 PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION); 266 } else if (url.SchemeIs("about")) { 267 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_ABOUT); 268 } else { 269 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN); 270 } 271} 272 273} // namespace prerender 274