1effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#!/usr/bin/env python
2effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Copyright 2014 The Chromium Authors. All rights reserved.
3effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Use of this source code is governed by a BSD-style license that can be
4effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# found in the LICENSE file.
5effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
6effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Tool for seeing the real world impact of a patch.
7effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#
8effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Layout Tests can tell you whether something has changed, but this can help
9effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# you determine whether a subtle/controversial change is beneficial or not.
10effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#
11effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# It dumps the rendering of a large number of sites, both with and without a
12effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# patch being evaluated, then sorts them by greatest difference in rendering,
13effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# such that a human reviewer can quickly review the most impacted sites,
14effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# rather than having to manually try sites to see if anything changes.
15effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#
16effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# In future it might be possible to extend this to other kinds of differences,
17effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# e.g. page load times.
18effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
19effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport argparse
20effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom argparse import RawTextHelpFormatter
21effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom contextlib import closing
22effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport datetime
23effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport errno
24effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom distutils.spawn import find_executable
25effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom operator import itemgetter
26effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport multiprocessing
27effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport os
28effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport re
29effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom cStringIO import StringIO
30effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport subprocess
31effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport sys
32effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport textwrap
33effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport time
34effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom urllib2 import urlopen
35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom urlparse import urlparse
36effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport webbrowser
37effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom zipfile import ZipFile
38effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
39effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom nsfw_urls import nsfw_urls
40effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
41effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochaction = None
42effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochallow_js = False
43effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochadditional_content_shell_flags = ""
44effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochchromium_src_root = ""
45effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochchromium_out_dir = ""
46effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimage_diff = ""
47effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochcontent_shell = ""
48effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochoutput_dir = ""
49effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochnum_sites = 100
50effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochurls = []
51effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochprint_lock = multiprocessing.Lock()
52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
53effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
54effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef MakeDirsIfNotExist(dir):
55effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  try:
56effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    os.makedirs(dir)
57effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  except OSError as e:
58effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if e.errno != errno.EEXIST:
59effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      raise
60effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
61effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
62effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef SetupPathsAndOut():
63effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  global chromium_src_root, chromium_out_dir, output_dir
64effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  global image_diff, content_shell
65effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  chromium_src_root = os.path.abspath(os.path.join(os.path.dirname(__file__),
66effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                                                   os.pardir,
67effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                                                   os.pardir))
68effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Find out directory (might be out_linux for users of cr).
69effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  for out_suffix in ["_linux", ""]:
70effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    out_dir = os.path.join(chromium_src_root, "out" + out_suffix)
71effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if os.path.exists(out_dir):
72effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      chromium_out_dir = out_dir
73effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      break
74effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not chromium_out_dir:
75effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return False
76effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
77effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  this_script_name = "real_world_impact"
78effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  output_dir = os.path.join(chromium_out_dir,
79effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                            "Release",
80effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                            this_script_name)
81effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  MakeDirsIfNotExist(output_dir)
82effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
83effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  image_diff = os.path.join(chromium_out_dir, "Release", "image_diff")
84effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
85effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if sys.platform == 'darwin':
86effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    content_shell = os.path.join(chromium_out_dir, "Release",
87effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                    "Content Shell.app/Contents/MacOS/Content Shell")
88effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  elif sys.platform.startswith('linux'):
89effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    content_shell = os.path.join(chromium_out_dir, "Release",
90effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                    "content_shell")
91effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  elif sys.platform.startswith('win'):
92effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    content_shell = os.path.join(chromium_out_dir, "Release",
93effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                    "content_shell.exe")
94effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return True
95effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
96effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
97effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef CheckPrerequisites():
98effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not find_executable("wget"):
99effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "wget not found! Install wget and re-run this."
100effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return False
101effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not os.path.exists(image_diff):
102effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "image_diff not found (%s)!" % image_diff
103effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "Build the image_diff target and re-run this."
104effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return False
105effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not os.path.exists(content_shell):
106effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "Content shell not found (%s)!" % content_shell
107effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "Build Release/content_shell and re-run this."
108effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return False
109effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return True
110effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
111effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
112effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef PickSampleUrls():
113effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  global urls
114effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  data_dir = os.path.join(output_dir, "data")
115effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  MakeDirsIfNotExist(data_dir)
116effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
117effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Download Alexa top 1,000,000 sites
118effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # TODO(johnme): Should probably update this when it gets too stale...
119effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  csv_path = os.path.join(data_dir, "top-1m.csv")
120effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not os.path.exists(csv_path):
121effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "Downloading list of top 1,000,000 sites from Alexa..."
122effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    csv_url = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
123effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    with closing(urlopen(csv_url)) as stream:
124effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      ZipFile(StringIO(stream.read())).extract("top-1m.csv", data_dir)
125effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
126effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  bad_urls_path = os.path.join(data_dir, "bad_urls.txt")
127effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if os.path.exists(bad_urls_path):
128effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    with open(bad_urls_path) as f:
129effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      bad_urls = set(f.read().splitlines())
130effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  else:
131effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    bad_urls = set()
132effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
133effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # See if we've already selected a sample of size num_sites (this way, if you
134effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # call this script with arguments "before N" then "after N", where N is the
135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # same number, we'll use the same sample, as expected!).
136effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  urls_path = os.path.join(data_dir, "%06d_urls.txt" % num_sites)
137effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not os.path.exists(urls_path):
138effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if action == 'compare':
139effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      print ("Error: you must run 'before %d' and 'after %d' before "
140effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch             "running 'compare %d'") % (num_sites, num_sites, num_sites)
141effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      return False
142effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "Picking %d sample urls..." % num_sites
143effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
144effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # TODO(johnme): For now this just gets the top num_sites entries. In future
145effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # this should pick a weighted random sample. For example, it could fit a
146effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # power-law distribution, which is a good model of website popularity
147effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # (http://www.useit.com/alertbox/9704b.html).
148effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    urls = []
149effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    remaining_num_sites = num_sites
150effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    with open(csv_path) as f:
151effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      for entry in f:
152effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        if remaining_num_sites <= 0:
153effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          break
154effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        remaining_num_sites -= 1
155effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        hostname = entry.strip().split(',')[1]
156effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        if not '/' in hostname:  # Skip Alexa 1,000,000 entries that have paths.
157effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          url = "http://%s/" % hostname
158effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          if not url in bad_urls:
159effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            urls.append(url)
160effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # Don't write these to disk yet; we'll do that in SaveWorkingUrls below
161effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # once we have tried to download them and seen which ones fail.
162effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  else:
163effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    with open(urls_path) as f:
164effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      urls = [u for u in f.read().splitlines() if not u in bad_urls]
165effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return True
166effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
167effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
168effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef SaveWorkingUrls():
169effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # TODO(johnme): Update the list if a url that used to work goes offline.
170effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  urls_path = os.path.join(output_dir, "data", "%06d_urls.txt" % num_sites)
171effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not os.path.exists(urls_path):
172effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    with open(urls_path, 'w') as f:
173effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      f.writelines(u + '\n' for u in urls)
174effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
175effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
176effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef PrintElapsedTime(elapsed, detail=""):
177effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  elapsed = round(elapsed * 10) / 10.0
178effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  m = elapsed / 60
179effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  s = elapsed % 60
180effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  print "Took %dm%.1fs" % (m, s), detail
181effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
182effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
183effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef DownloadStaticCopyTask(url):
184effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  url_parts = urlparse(url)
185effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  host_dir = os.path.join(output_dir, "data", url_parts.hostname)
186effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Use wget for now, as does a reasonable job of spidering page dependencies
187effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # (e.g. CSS, JS, images).
188effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  success = True
189effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  try:
190effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    subprocess.check_call(["wget",
191effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--execute", "robots=off",
192effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           ("--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS "
193effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                            "X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) C"
194effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                            "hrome/32.0.1700.14 Safari/537.36"),
195effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--page-requisites",
196effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--span-hosts",
197effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--adjust-extension",
198effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--convert-links",
199effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--directory-prefix=" + host_dir,
200effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--force-directories",
201effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--default-page=index.html",
202effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--no-check-certificate",
203effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--timeout=5", # 5s timeout
204effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--tries=2",
205effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           "--quiet",
206effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           url])
207effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  except KeyboardInterrupt:
208effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    success = False
209effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  except subprocess.CalledProcessError:
210effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # Ignoring these for now, as some sites have issues with their subresources
211effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # yet still produce a renderable index.html
212effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    pass #success = False
213effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if success:
214effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    download_path = os.path.join(host_dir, url_parts.hostname, "index.html")
215effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if not os.path.exists(download_path):
216effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      success = False
217effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    else:
218effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      with print_lock:
219effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        print "Downloaded:", url
220effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not success:
221effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    with print_lock:
222effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      print "Failed to download:", url
223effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return False
224effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return True
225effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
226effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
227effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef DownloadStaticCopies():
228effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  global urls
229effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  new_urls = []
230effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  for url in urls:
231effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    url_parts = urlparse(url)
232effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    host_dir = os.path.join(output_dir, "data", url_parts.hostname)
233effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    download_path = os.path.join(host_dir, url_parts.hostname, "index.html")
234effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if not os.path.exists(download_path):
235effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      new_urls.append(url)
236effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
237effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if new_urls:
238effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    print "Downloading static copies of %d sites..." % len(new_urls)
239effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    start_time = time.time()
240effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
241effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    results = multiprocessing.Pool(20).map(DownloadStaticCopyTask, new_urls)
242effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    failed_urls = [new_urls[i] for i,ret in enumerate(results) if not ret]
243effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if failed_urls:
244effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      bad_urls_path = os.path.join(output_dir, "data", "bad_urls.txt")
245effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      with open(bad_urls_path, 'a') as f:
246effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        f.writelines(u + '\n' for u in failed_urls)
247effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      failed_urls_set = set(failed_urls)
248effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      urls = [u for u in urls if u not in failed_urls_set]
249effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
250effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    PrintElapsedTime(time.time() - start_time)
251effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
252effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  SaveWorkingUrls()
253effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
254effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
255effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef RunDrtTask(url):
256effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  url_parts = urlparse(url)
257effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  host_dir = os.path.join(output_dir, "data", url_parts.hostname)
258effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_path = os.path.join(host_dir, url_parts.hostname, "index.html")
259effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
260effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not allow_js:
261effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    nojs_path = os.path.join(host_dir, url_parts.hostname, "index-nojs.html")
262effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if not os.path.exists(nojs_path):
263effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      with open(html_path) as f:
264effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        html = f.read()
265effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      if not html:
266effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        return False
267effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      # These aren't intended to be XSS safe :)
268effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      block_tags = (r'<\s*(script|object|video|audio|iframe|frameset|frame)'
269effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                    r'\b.*?<\s*\/\s*\1\s*>')
270effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      block_attrs = r'\s(onload|onerror)\s*=\s*(\'[^\']*\'|"[^"]*|\S*)'
271effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      html = re.sub(block_tags, '', html, flags=re.I|re.S)
272effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      html = re.sub(block_attrs, '', html, flags=re.I)
273effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      with open(nojs_path, 'w') as f:
274effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        f.write(html)
275effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    html_path = nojs_path
276effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
277effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  start_time = time.time()
278effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
279effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  with open(os.devnull, "w") as fnull:
280effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    p = subprocess.Popen([content_shell,
281effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          "--dump-render-tree",
282effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          additional_content_shell_flags,
283effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          # The single quote is not a typo, it's a separator!
284effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          html_path + "'--pixel-test"
285effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                         ],
286effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                         shell=False,
287effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                         stdout=subprocess.PIPE,
288effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                         stderr=fnull)
289effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  result = p.stdout.read()
290effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  PNG_START = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"
291effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  PNG_END = b"\x49\x45\x4E\x44\xAE\x42\x60\x82"
292effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  try:
293effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    start = result.index(PNG_START)
294effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    end = result.rindex(PNG_END) + 8
295effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  except ValueError:
296effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return False
297effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
298effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  png_path = os.path.join(output_dir, action, url_parts.hostname + ".png")
299effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  MakeDirsIfNotExist(os.path.dirname(png_path))
300effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  with open(png_path, 'wb') as f:
301effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    f.write(result[start:end])
302effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  elapsed_time = (time.time() - start_time, url)
303effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return elapsed_time
304effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
305effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
306effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef RunDrt():
307effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  print "Taking screenshots of %d pages..." % len(urls)
308effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  start_time = time.time()
309effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
310effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  results = multiprocessing.Pool().map(RunDrtTask, urls, 1)
311effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
312effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  max_time, url = max(t for t in results if t)
313effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  elapsed_detail = "(slowest: %.2fs on %s)" % (max_time, url)
314effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  PrintElapsedTime(time.time() - start_time, elapsed_detail)
315effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
316effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
317effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef CompareResultsTask(url):
318effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  url_parts = urlparse(url)
319effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  before_path = os.path.join(output_dir, "before", url_parts.hostname + ".png")
320effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  after_path = os.path.join(output_dir, "after", url_parts.hostname + ".png")
321effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  diff_path = os.path.join(output_dir, "diff", url_parts.hostname + ".png")
322effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  MakeDirsIfNotExist(os.path.join(output_dir, "diff"))
323effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
324effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # TODO(johnme): Don't hardcode "real_world_impact".
325effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  red_path = ("data:image/gif;base64,R0lGODlhAQABAPAAAP8AAP///yH5BAAAAAAALAAAAA"
326effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch              "ABAAEAAAICRAEAOw==")
327effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
328effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  before_exists = os.path.exists(before_path)
329effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  after_exists = os.path.exists(after_path)
330effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not before_exists and not after_exists:
331effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # TODO(johnme): Make this more informative.
332effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return (-100, url, red_path)
333effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if before_exists != after_exists:
334effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # TODO(johnme): Make this more informative.
335effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return (200, url, red_path)
336effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
337effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Get percentage difference.
338effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  p = subprocess.Popen([image_diff, "--histogram",
339effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                        before_path, after_path],
340effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                        shell=False,
341effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                        stdout=subprocess.PIPE)
342effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  output,_ = p.communicate()
343effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if p.returncode == 0:
344effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return (0, url, before_path)
345effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  diff_match = re.match(r'histogram diff: (\d+\.\d{2})% (?:passed|failed)\n'
346effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                         'exact diff: (\d+\.\d{2})% (?:passed|failed)', output)
347effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not diff_match:
348effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    raise Exception("image_diff output format changed")
349effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  histogram_diff = float(diff_match.group(1))
350effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  exact_diff = float(diff_match.group(2))
351effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  combined_diff = max(histogram_diff + exact_diff / 8, 0.001)
352effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
353effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Produce diff PNG.
354effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  subprocess.call([image_diff, "--diff", before_path, after_path, diff_path])
355effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return (combined_diff, url, diff_path)
356effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
357effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
358effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef CompareResults():
359effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  print "Running image_diff on %d pages..." % len(urls)
360effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  start_time = time.time()
361effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
362effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  results = multiprocessing.Pool().map(CompareResultsTask, urls)
363effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  results.sort(key=itemgetter(0), reverse=True)
364effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
365effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  PrintElapsedTime(time.time() - start_time)
366effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
367effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  now = datetime.datetime.today().strftime("%a %Y-%m-%d %H:%M")
368effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_start = textwrap.dedent("""\
369effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <!DOCTYPE html>
370effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <html>
371effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <head>
372effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <title>Real World Impact report %s</title>
373effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <script>
374effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    var togglingImg = null;
375effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    var toggleTimer = null;
376effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
377effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    var before = true;
378effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    function toggle() {
379effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      var newFolder = before ? "before" : "after";
380effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      togglingImg.src = togglingImg.src.replace(/before|after|diff/, newFolder);
381effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      before = !before;
382effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      toggleTimer = setTimeout(toggle, 300);
383effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
384effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
385effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    function startToggle(img) {
386effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      before = true;
387effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      togglingImg = img;
388effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      if (!img.origSrc)
389effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        img.origSrc = img.src;
390effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      toggle();
391effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
392effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    function stopToggle(img) {
393effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      clearTimeout(toggleTimer);
394effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      img.src = img.origSrc;
395effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
396effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
397effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    document.onkeydown = function(e) {
398effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      e = e || window.event;
399effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      var keyCode = e.keyCode || e.which;
400effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      var newFolder;
401effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      switch (keyCode) {
402effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        case 49: //'1'
403effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          newFolder = "before"; break;
404effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        case 50: //'2'
405effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          newFolder = "after"; break;
406effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        case 51: //'3'
407effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          newFolder = "diff"; break;
408effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        default:
409effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          return;
410effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      }
411effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      var imgs = document.getElementsByTagName("img");
412effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      for (var i = 0; i < imgs.length; i++) {
413effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        imgs[i].src = imgs[i].src.replace(/before|after|diff/, newFolder);
414effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      }
415effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    };
416effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  </script>
417effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <style>
418effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    h1 {
419effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      font-family: sans;
420effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
421effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    h2 {
422effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      font-family: monospace;
423effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      white-space: pre;
424effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
425effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    .nsfw-spacer {
426effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      height: 50vh;
427effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
428effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    .nsfw-warning {
429effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      background: yellow;
430effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      border: 10px solid red;
431effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
432effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    .info {
433effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      font-size: 1.2em;
434effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      font-style: italic;
435effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
436effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    body:not(.details-supported) details {
437effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      display: none;
438effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
439effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  </style>
440effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  </head>
441effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <body>
442effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <script>
443effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if ('open' in document.createElement('details'))
444effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      document.body.className = "details-supported";
445effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    </script>
446effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <!--<div class="nsfw-spacer"></div>-->
447effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <p class="nsfw-warning">Warning: sites below are taken from the Alexa top %d
448effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    and may be NSFW.</p>
449effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <!--<div class="nsfw-spacer"></div>-->
450effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <h1>Real World Impact report %s</h1>
451effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <p class="info">Press 1, 2 and 3 to switch between before, after and diff
452effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    screenshots respectively; or hover over the images to rapidly alternate
453effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    between before and after.</p>
454effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  """ % (now, num_sites, now))
455effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
456effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_same_row = """\
457effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <h2>No difference on <a href="%s">%s</a>.</h2>
458effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  """
459effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
460effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_diff_row = """\
461effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <h2>%7.3f%% difference on <a href="%s">%s</a>:</h2>
462effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <img src="%s" width="800" height="600"
463effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch       onmouseover="startToggle(this)" onmouseout="stopToggle(this)">
464effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  """
465effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
466effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_nsfw_diff_row = """\
467effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <h2>%7.3f%% difference on <a href="%s">%s</a>:</h2>
468effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  <details>
469effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <summary>This site may be NSFW. Click to expand/collapse.</summary>
470effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    <img src="%s" width="800" height="600"
471effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch         onmouseover="startToggle(this)" onmouseout="stopToggle(this)">
472effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  </details>
473effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  """
474effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
475effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_end = textwrap.dedent("""\
476effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  </body>
477effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  </html>""")
478effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
479effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  html_path = os.path.join(output_dir, "diff.html")
480effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  with open(html_path, 'w') as f:
481effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    f.write(html_start)
482effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    for (diff_float, url, diff_path) in results:
483effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      diff_path = os.path.relpath(diff_path, output_dir)
484effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      if diff_float == 0:
485effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        f.write(html_same_row % (url, url))
486effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      elif url in nsfw_urls:
487effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        f.write(html_nsfw_diff_row % (diff_float, url, url, diff_path))
488effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      else:
489effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        f.write(html_diff_row % (diff_float, url, url, diff_path))
490effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    f.write(html_end)
491effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
492effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  webbrowser.open_new_tab("file://" + html_path)
493effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
494effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
495effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef main(argv):
496effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  global num_sites, action, allow_js, additional_content_shell_flags
497effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
498effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  parser = argparse.ArgumentParser(
499effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      formatter_class=RawTextHelpFormatter,
500effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      description="Compare the real world impact of a content shell change.",
501effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      epilog=textwrap.dedent("""\
502effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          Example usage:
503effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            1. Build content_shell in out/Release without any changes.
504effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            2. Run: %s before [num sites to test (default %d)].
505effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            3. Either:
506effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                 a. Apply your controversial patch and rebuild content_shell.
507effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                 b. Pass --additional_flags="--enable_your_flag" in step 4.
508effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            4. Run: %s after [num sites to test (default %d)].
509effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            5. Run: %s compare [num sites to test (default %d)].
510effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch               This will open the results in your web browser.
511effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          """ % (argv[0], num_sites, argv[0], num_sites, argv[0], num_sites)))
512effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  parser.add_argument("--allow_js", help="Don't disable Javascript",
513effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      action="store_true")
514effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  parser.add_argument("--additional_flags",
515effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      help="Additional flags to pass to content shell")
516effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  parser.add_argument("action",
517effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      help=textwrap.dedent("""\
518effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                        Action to perform.
519effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          download - Just download the sites.
520effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          before - Run content shell and record 'before' result.
521effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          after - Run content shell and record 'after' result.
522effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                          compare - Compare before and after results.
523effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      """),
524effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      choices=["download", "before", "after", "compare"])
525effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  parser.add_argument("num_sites",
526effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      help="Number of sites (default %s)" % num_sites,
527effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                      type=int, default=num_sites, nargs='?')
528effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  args = parser.parse_args()
529effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
530effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  action = args.action
531effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
532effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (args.num_sites):
533effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    num_sites = args.num_sites
534effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
535effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (args.allow_js):
536effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    allow_js = args.allow_js
537effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
538effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (args.additional_flags):
539effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    additional_content_shell_flags = args.additional_flags
540effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
541effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if not SetupPathsAndOut() or not CheckPrerequisites() or not PickSampleUrls():
542effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return 1
543effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
544effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if action == 'compare':
545effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    CompareResults()
546effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  else:
547effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DownloadStaticCopies()
548effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if action != 'download':
549effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      RunDrt()
550effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return 0
551effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
552effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
553effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochif __name__ == '__main__':
554effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  sys.exit(main(sys.argv))