1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/prerender/prerender_util.h"
6
7#include "base/logging.h"
8#include "base/metrics/histogram.h"
9#include "base/metrics/sparse_histogram.h"
10#include "base/strings/string_util.h"
11#include "content/public/browser/resource_request_info.h"
12#include "net/http/http_response_headers.h"
13#include "net/url_request/url_request.h"
14#include "url/url_canon.h"
15#include "url/url_parse.h"
16#include "url/url_util.h"
17#include "webkit/common/resource_type.h"
18
19namespace prerender {
20
21namespace {
22
23const char kModPagespeedHeader[] = "X-Mod-Pagespeed";
24const char kPageSpeedHeader[] = "X-Page-Speed";
25const char kPagespeedServerHistogram[] =
26    "Prerender.PagespeedHeader.ServerCounts";
27const char kPagespeedVersionHistogram[] =
28    "Prerender.PagespeedHeader.VersionCounts";
29
30enum PagespeedHeaderServerType {
31  PAGESPEED_TOTAL_RESPONSES = 0,
32  PAGESPEED_MOD_PAGESPEED_SERVER = 1,
33  PAGESPEED_NGX_PAGESPEED_SERVER = 2,
34  PAGESPEED_PAGESPEED_SERVICE_SERVER = 3,
35  PAGESPEED_UNKNOWN_SERVER = 4,
36  PAGESPEED_SERVER_MAXIMUM = 5
37};
38
39// Private function to parse the PageSpeed version number and encode it in
40// buckets 2 through 99: if it is in the format a.b.c.d-e the bucket will be
41// 2 + 2 * (max(c, 10) - 10) + (d > 1 ? 1 : 0); if it is not in this format
42// we return zero.
43int GetXModPagespeedBucketFromVersion(const std::string& version) {
44  int a, b, c, d, e;
45  int num_parsed = sscanf(version.c_str(), "%d.%d.%d.%d-%d",
46                          &a, &b, &c, &d, &e);
47  int output = 0;
48  if (num_parsed == 5) {
49    output = 2;
50    if (c > 10)
51      output += 2 * (c - 10);
52    if (d > 1)
53      output++;
54    if (output < 2 || output > 99)
55      output = 0;
56  }
57  return output;
58}
59
60// Private function to parse the X-Page-Speed header value and determine
61// whether it is in the PageSpeed Service format, namely m_n_dc were m_n is
62// a version number and dc is an encoded 2-character value.
63bool IsPageSpeedServiceVersionNumber(const std::string& version) {
64  int a, b;
65  char c, d, e;  // e is to detect EOL as we check that it /isn't/ converted.
66  int num_parsed = sscanf(version.c_str(), "%d_%d_%c%c%c", &a, &b, &c, &d, &e);
67  return (num_parsed == 4);
68}
69
70enum PrerenderSchemeCancelReason {
71  PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL,
72  PRERENDER_SCHEME_CANCEL_REASON_DATA,
73  PRERENDER_SCHEME_CANCEL_REASON_BLOB,
74  PRERENDER_SCHEME_CANCEL_REASON_FILE,
75  PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM,
76  PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET,
77  PRERENDER_SCHEME_CANCEL_REASON_FTP,
78  PRERENDER_SCHEME_CANCEL_REASON_CHROME,
79  PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION,
80  PRERENDER_SCHEME_CANCEL_REASON_ABOUT,
81  PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN,
82  PRERENDER_SCHEME_CANCEL_REASON_MAX,
83};
84
85void ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason) {
86  UMA_HISTOGRAM_ENUMERATION(
87      "Prerender.SchemeCancelReason", reason,
88      PRERENDER_SCHEME_CANCEL_REASON_MAX);
89}
90
91}  // namespace
92
93const char kChromeNavigateExtraDataKey[] = "chrome_navigate";
94
95bool MaybeGetQueryStringBasedAliasURL(
96    const GURL& url, GURL* alias_url) {
97  DCHECK(alias_url);
98  url_parse::Parsed parsed;
99  url_parse::ParseStandardURL(url.spec().c_str(), url.spec().length(),
100                              &parsed);
101  url_parse::Component query = parsed.query;
102  url_parse::Component key, value;
103  while (url_parse::ExtractQueryKeyValue(url.spec().c_str(), &query, &key,
104                                         &value)) {
105    if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "url", key.len))
106      continue;
107    // We found a url= query string component.
108    if (value.len < 1)
109      continue;
110    url_canon::RawCanonOutputW<1024> decoded_url;
111    url_util::DecodeURLEscapeSequences(url.spec().c_str() + value.begin,
112                                       value.len, &decoded_url);
113    GURL new_url(base::string16(decoded_url.data(), decoded_url.length()));
114    if (!new_url.is_empty() && new_url.is_valid()) {
115      *alias_url = new_url;
116      return true;
117    }
118    return false;
119  }
120  return false;
121}
122
123uint8 GetQueryStringBasedExperiment(const GURL& url) {
124  url_parse::Parsed parsed;
125  url_parse::ParseStandardURL(url.spec().c_str(), url.spec().length(),
126                              &parsed);
127  url_parse::Component query = parsed.query;
128  url_parse::Component key, value;
129  while (url_parse::ExtractQueryKeyValue(url.spec().c_str(), &query, &key,
130                                         &value)) {
131    if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "lpe", key.len))
132      continue;
133
134    // We found a lpe= query string component.
135    if (value.len != 1)
136      continue;
137    uint8 exp = *(url.spec().c_str() + value.begin) - '0';
138    if (exp < 1 || exp > 9)
139      continue;
140    return exp;
141  }
142  return kNoExperiment;
143}
144
145bool IsGoogleDomain(const GURL& url) {
146  return StartsWithASCII(url.host(), std::string("www.google."), true);
147}
148
149bool IsGoogleSearchResultURL(const GURL& url) {
150  if (!IsGoogleDomain(url))
151    return false;
152  return (url.path().empty() ||
153          StartsWithASCII(url.path(), std::string("/search"), true) ||
154          (url.path() == "/") ||
155          StartsWithASCII(url.path(), std::string("/webhp"), true));
156}
157
158bool IsNoSwapInExperiment(uint8 experiment_id) {
159  // Currently, experiments 5 and 6 fall in this category.
160  return experiment_id == 5 || experiment_id == 6;
161}
162
163bool IsControlGroupExperiment(uint8 experiment_id) {
164  // Currently, experiments 7 and 8 fall in this category.
165  return experiment_id == 7 || experiment_id == 8;
166}
167
168void GatherPagespeedData(const ResourceType::Type resource_type,
169                         const GURL& request_url,
170                         const net::HttpResponseHeaders* response_headers) {
171  if (resource_type != ResourceType::MAIN_FRAME ||
172      !request_url.SchemeIsHTTPOrHTTPS())
173    return;
174
175  // bucket 0 counts every response seen.
176  UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
177                            PAGESPEED_TOTAL_RESPONSES,
178                            PAGESPEED_SERVER_MAXIMUM);
179  if (!response_headers)
180    return;
181
182  void* iter = NULL;
183  std::string name;
184  std::string value;
185  while (response_headers->EnumerateHeaderLines(&iter, &name, &value)) {
186    if (name == kModPagespeedHeader) {
187      // Bucket 1 counts occurences of the X-Mod-Pagespeed header.
188      UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
189                                PAGESPEED_MOD_PAGESPEED_SERVER,
190                                PAGESPEED_SERVER_MAXIMUM);
191      if (!value.empty()) {
192        // If the header value is in the X-Mod-Pagespeed version number format
193        // then increment the appropriate bucket, otherwise increment bucket 1,
194        // which is the catch-all "unknown version number" bucket.
195        int bucket = GetXModPagespeedBucketFromVersion(value);
196        if (bucket > 0) {
197          UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
198        } else {
199          UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, 1);
200        }
201      }
202      break;
203    } else if (name == kPageSpeedHeader) {
204      // X-Page-Speed header versions are either in the X-Mod-Pagespeed format,
205      // indicating an nginx installation, or they're in the PageSpeed Service
206      // format, indicating a PSS installation, or in some other format,
207      // indicating an unknown installation [possibly IISpeed].
208      if (!value.empty()) {
209        int bucket = GetXModPagespeedBucketFromVersion(value);
210        if (bucket > 0) {
211          // Bucket 2 counts occurences of the X-Page-Speed header with a
212          // value in the X-Mod-Pagespeed version number format. We also
213          // count these responses in the version histogram.
214          UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
215                                    PAGESPEED_NGX_PAGESPEED_SERVER,
216                                    PAGESPEED_SERVER_MAXIMUM);
217          UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
218        } else if (IsPageSpeedServiceVersionNumber(value)) {
219          // Bucket 3 counts occurences of the X-Page-Speed header with a
220          // value in the PageSpeed Service version number format.
221          UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
222                                    PAGESPEED_PAGESPEED_SERVICE_SERVER,
223                                    PAGESPEED_SERVER_MAXIMUM);
224        } else {
225          // Bucket 4 counts occurences of all other values.
226          UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
227                                    PAGESPEED_UNKNOWN_SERVER,
228                                    PAGESPEED_SERVER_MAXIMUM);
229        }
230      }
231      break;
232    }
233  }
234}
235
236void URLRequestResponseStarted(net::URLRequest* request) {
237  const content::ResourceRequestInfo* info =
238      content::ResourceRequestInfo::ForRequest(request);
239  GatherPagespeedData(info->GetResourceType(),
240                      request->url(),
241                      request->response_headers());
242}
243
244void ReportPrerenderExternalURL() {
245  ReportPrerenderSchemeCancelReason(
246      PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL);
247}
248
249void ReportUnsupportedPrerenderScheme(const GURL& url) {
250  if (url.SchemeIs("data")) {
251    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_DATA);
252  } else if (url.SchemeIs("blob")) {
253    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_BLOB);
254  } else if (url.SchemeIsFile()) {
255    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FILE);
256  } else if (url.SchemeIsFileSystem()) {
257    ReportPrerenderSchemeCancelReason(
258        PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM);
259  } else if (url.SchemeIs("ws") || url.SchemeIs("wss")) {
260    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET);
261  } else if (url.SchemeIs("ftp")) {
262    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FTP);
263  } else if (url.SchemeIs("chrome")) {
264    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_CHROME);
265  } else if (url.SchemeIs("chrome-extension")) {
266    ReportPrerenderSchemeCancelReason(
267        PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION);
268  } else if (url.SchemeIs("about")) {
269    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_ABOUT);
270  } else {
271    ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN);
272  }
273}
274
275}  // namespace prerender
276