1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/prerender/prerender_util.h" 6 7#include "base/logging.h" 8#include "base/metrics/histogram.h" 9#include "base/metrics/sparse_histogram.h" 10#include "base/strings/string_util.h" 11#include "content/public/browser/resource_request_info.h" 12#include "net/http/http_response_headers.h" 13#include "net/url_request/url_request.h" 14#include "url/url_canon.h" 15#include "url/url_parse.h" 16#include "url/url_util.h" 17#include "webkit/common/resource_type.h" 18 19namespace prerender { 20 21namespace { 22 23const char kModPagespeedHeader[] = "X-Mod-Pagespeed"; 24const char kPageSpeedHeader[] = "X-Page-Speed"; 25const char kPagespeedServerHistogram[] = 26 "Prerender.PagespeedHeader.ServerCounts"; 27const char kPagespeedVersionHistogram[] = 28 "Prerender.PagespeedHeader.VersionCounts"; 29 30enum PagespeedHeaderServerType { 31 PAGESPEED_TOTAL_RESPONSES = 0, 32 PAGESPEED_MOD_PAGESPEED_SERVER = 1, 33 PAGESPEED_NGX_PAGESPEED_SERVER = 2, 34 PAGESPEED_PAGESPEED_SERVICE_SERVER = 3, 35 PAGESPEED_UNKNOWN_SERVER = 4, 36 PAGESPEED_SERVER_MAXIMUM = 5 37}; 38 39// Private function to parse the PageSpeed version number and encode it in 40// buckets 2 through 99: if it is in the format a.b.c.d-e the bucket will be 41// 2 + 2 * (max(c, 10) - 10) + (d > 1 ? 1 : 0); if it is not in this format 42// we return zero. 43int GetXModPagespeedBucketFromVersion(const std::string& version) { 44 int a, b, c, d, e; 45 int num_parsed = sscanf(version.c_str(), "%d.%d.%d.%d-%d", 46 &a, &b, &c, &d, &e); 47 int output = 0; 48 if (num_parsed == 5) { 49 output = 2; 50 if (c > 10) 51 output += 2 * (c - 10); 52 if (d > 1) 53 output++; 54 if (output < 2 || output > 99) 55 output = 0; 56 } 57 return output; 58} 59 60// Private function to parse the X-Page-Speed header value and determine 61// whether it is in the PageSpeed Service format, namely m_n_dc were m_n is 62// a version number and dc is an encoded 2-character value. 63bool IsPageSpeedServiceVersionNumber(const std::string& version) { 64 int a, b; 65 char c, d, e; // e is to detect EOL as we check that it /isn't/ converted. 66 int num_parsed = sscanf(version.c_str(), "%d_%d_%c%c%c", &a, &b, &c, &d, &e); 67 return (num_parsed == 4); 68} 69 70enum PrerenderSchemeCancelReason { 71 PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL, 72 PRERENDER_SCHEME_CANCEL_REASON_DATA, 73 PRERENDER_SCHEME_CANCEL_REASON_BLOB, 74 PRERENDER_SCHEME_CANCEL_REASON_FILE, 75 PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM, 76 PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET, 77 PRERENDER_SCHEME_CANCEL_REASON_FTP, 78 PRERENDER_SCHEME_CANCEL_REASON_CHROME, 79 PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION, 80 PRERENDER_SCHEME_CANCEL_REASON_ABOUT, 81 PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN, 82 PRERENDER_SCHEME_CANCEL_REASON_MAX, 83}; 84 85void ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason) { 86 UMA_HISTOGRAM_ENUMERATION( 87 "Prerender.SchemeCancelReason", reason, 88 PRERENDER_SCHEME_CANCEL_REASON_MAX); 89} 90 91} // namespace 92 93const char kChromeNavigateExtraDataKey[] = "chrome_navigate"; 94 95bool MaybeGetQueryStringBasedAliasURL( 96 const GURL& url, GURL* alias_url) { 97 DCHECK(alias_url); 98 url_parse::Parsed parsed; 99 url_parse::ParseStandardURL(url.spec().c_str(), url.spec().length(), 100 &parsed); 101 url_parse::Component query = parsed.query; 102 url_parse::Component key, value; 103 while (url_parse::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, 104 &value)) { 105 if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "url", key.len)) 106 continue; 107 // We found a url= query string component. 108 if (value.len < 1) 109 continue; 110 url_canon::RawCanonOutputW<1024> decoded_url; 111 url_util::DecodeURLEscapeSequences(url.spec().c_str() + value.begin, 112 value.len, &decoded_url); 113 GURL new_url(base::string16(decoded_url.data(), decoded_url.length())); 114 if (!new_url.is_empty() && new_url.is_valid()) { 115 *alias_url = new_url; 116 return true; 117 } 118 return false; 119 } 120 return false; 121} 122 123uint8 GetQueryStringBasedExperiment(const GURL& url) { 124 url_parse::Parsed parsed; 125 url_parse::ParseStandardURL(url.spec().c_str(), url.spec().length(), 126 &parsed); 127 url_parse::Component query = parsed.query; 128 url_parse::Component key, value; 129 while (url_parse::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, 130 &value)) { 131 if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "lpe", key.len)) 132 continue; 133 134 // We found a lpe= query string component. 135 if (value.len != 1) 136 continue; 137 uint8 exp = *(url.spec().c_str() + value.begin) - '0'; 138 if (exp < 1 || exp > 9) 139 continue; 140 return exp; 141 } 142 return kNoExperiment; 143} 144 145bool IsGoogleDomain(const GURL& url) { 146 return StartsWithASCII(url.host(), std::string("www.google."), true); 147} 148 149bool IsGoogleSearchResultURL(const GURL& url) { 150 if (!IsGoogleDomain(url)) 151 return false; 152 return (url.path().empty() || 153 StartsWithASCII(url.path(), std::string("/search"), true) || 154 (url.path() == "/") || 155 StartsWithASCII(url.path(), std::string("/webhp"), true)); 156} 157 158bool IsNoSwapInExperiment(uint8 experiment_id) { 159 // Currently, experiments 5 and 6 fall in this category. 160 return experiment_id == 5 || experiment_id == 6; 161} 162 163bool IsControlGroupExperiment(uint8 experiment_id) { 164 // Currently, experiments 7 and 8 fall in this category. 165 return experiment_id == 7 || experiment_id == 8; 166} 167 168void GatherPagespeedData(const ResourceType::Type resource_type, 169 const GURL& request_url, 170 const net::HttpResponseHeaders* response_headers) { 171 if (resource_type != ResourceType::MAIN_FRAME || 172 !request_url.SchemeIsHTTPOrHTTPS()) 173 return; 174 175 // bucket 0 counts every response seen. 176 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 177 PAGESPEED_TOTAL_RESPONSES, 178 PAGESPEED_SERVER_MAXIMUM); 179 if (!response_headers) 180 return; 181 182 void* iter = NULL; 183 std::string name; 184 std::string value; 185 while (response_headers->EnumerateHeaderLines(&iter, &name, &value)) { 186 if (name == kModPagespeedHeader) { 187 // Bucket 1 counts occurences of the X-Mod-Pagespeed header. 188 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 189 PAGESPEED_MOD_PAGESPEED_SERVER, 190 PAGESPEED_SERVER_MAXIMUM); 191 if (!value.empty()) { 192 // If the header value is in the X-Mod-Pagespeed version number format 193 // then increment the appropriate bucket, otherwise increment bucket 1, 194 // which is the catch-all "unknown version number" bucket. 195 int bucket = GetXModPagespeedBucketFromVersion(value); 196 if (bucket > 0) { 197 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket); 198 } else { 199 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, 1); 200 } 201 } 202 break; 203 } else if (name == kPageSpeedHeader) { 204 // X-Page-Speed header versions are either in the X-Mod-Pagespeed format, 205 // indicating an nginx installation, or they're in the PageSpeed Service 206 // format, indicating a PSS installation, or in some other format, 207 // indicating an unknown installation [possibly IISpeed]. 208 if (!value.empty()) { 209 int bucket = GetXModPagespeedBucketFromVersion(value); 210 if (bucket > 0) { 211 // Bucket 2 counts occurences of the X-Page-Speed header with a 212 // value in the X-Mod-Pagespeed version number format. We also 213 // count these responses in the version histogram. 214 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 215 PAGESPEED_NGX_PAGESPEED_SERVER, 216 PAGESPEED_SERVER_MAXIMUM); 217 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket); 218 } else if (IsPageSpeedServiceVersionNumber(value)) { 219 // Bucket 3 counts occurences of the X-Page-Speed header with a 220 // value in the PageSpeed Service version number format. 221 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 222 PAGESPEED_PAGESPEED_SERVICE_SERVER, 223 PAGESPEED_SERVER_MAXIMUM); 224 } else { 225 // Bucket 4 counts occurences of all other values. 226 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram, 227 PAGESPEED_UNKNOWN_SERVER, 228 PAGESPEED_SERVER_MAXIMUM); 229 } 230 } 231 break; 232 } 233 } 234} 235 236void URLRequestResponseStarted(net::URLRequest* request) { 237 const content::ResourceRequestInfo* info = 238 content::ResourceRequestInfo::ForRequest(request); 239 GatherPagespeedData(info->GetResourceType(), 240 request->url(), 241 request->response_headers()); 242} 243 244void ReportPrerenderExternalURL() { 245 ReportPrerenderSchemeCancelReason( 246 PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL); 247} 248 249void ReportUnsupportedPrerenderScheme(const GURL& url) { 250 if (url.SchemeIs("data")) { 251 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_DATA); 252 } else if (url.SchemeIs("blob")) { 253 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_BLOB); 254 } else if (url.SchemeIsFile()) { 255 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FILE); 256 } else if (url.SchemeIsFileSystem()) { 257 ReportPrerenderSchemeCancelReason( 258 PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM); 259 } else if (url.SchemeIs("ws") || url.SchemeIs("wss")) { 260 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET); 261 } else if (url.SchemeIs("ftp")) { 262 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FTP); 263 } else if (url.SchemeIs("chrome")) { 264 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_CHROME); 265 } else if (url.SchemeIs("chrome-extension")) { 266 ReportPrerenderSchemeCancelReason( 267 PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION); 268 } else if (url.SchemeIs("about")) { 269 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_ABOUT); 270 } else { 271 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN); 272 } 273} 274 275} // namespace prerender 276