1effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#!/usr/bin/env python 2effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Copyright 2014 The Chromium Authors. All rights reserved. 3effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Use of this source code is governed by a BSD-style license that can be 4effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# found in the LICENSE file. 5effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 6effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Tool for seeing the real world impact of a patch. 7effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# 8effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# Layout Tests can tell you whether something has changed, but this can help 9effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# you determine whether a subtle/controversial change is beneficial or not. 10effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# 11effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# It dumps the rendering of a large number of sites, both with and without a 12effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# patch being evaluated, then sorts them by greatest difference in rendering, 13effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# such that a human reviewer can quickly review the most impacted sites, 14effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# rather than having to manually try sites to see if anything changes. 15effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# 16effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# In future it might be possible to extend this to other kinds of differences, 17effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch# e.g. page load times. 18effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 19effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport argparse 20effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom argparse import RawTextHelpFormatter 21effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom contextlib import closing 22effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport datetime 23effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport errno 24effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom distutils.spawn import find_executable 25effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom operator import itemgetter 26effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport multiprocessing 27effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport os 28effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport re 29effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom cStringIO import StringIO 30effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport subprocess 31effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport sys 32effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport textwrap 33effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport time 34effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom urllib2 import urlopen 35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom urlparse import urlparse 36effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimport webbrowser 37effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom zipfile import ZipFile 38effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 39effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom nsfw_urls import nsfw_urls 40effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 41effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochaction = None 42effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochallow_js = False 43effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochadditional_content_shell_flags = "" 44effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochchromium_src_root = "" 45effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochchromium_out_dir = "" 46effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochimage_diff = "" 47effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochcontent_shell = "" 48effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochoutput_dir = "" 49effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochnum_sites = 100 50effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochurls = [] 51effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochprint_lock = multiprocessing.Lock() 52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 53effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 54effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef MakeDirsIfNotExist(dir): 55effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch try: 56effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch os.makedirs(dir) 57effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch except OSError as e: 58effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if e.errno != errno.EEXIST: 59effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch raise 60effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 61effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 62effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef SetupPathsAndOut(): 63effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch global chromium_src_root, chromium_out_dir, output_dir 64effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch global image_diff, content_shell 65effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch chromium_src_root = os.path.abspath(os.path.join(os.path.dirname(__file__), 66effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch os.pardir, 67effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch os.pardir)) 68effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Find out directory (might be out_linux for users of cr). 69effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch for out_suffix in ["_linux", ""]: 70effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch out_dir = os.path.join(chromium_src_root, "out" + out_suffix) 71effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if os.path.exists(out_dir): 72effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch chromium_out_dir = out_dir 73effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch break 74effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not chromium_out_dir: 75effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 76effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 77effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch this_script_name = "real_world_impact" 78effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch output_dir = os.path.join(chromium_out_dir, 79effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "Release", 80effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch this_script_name) 81effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch MakeDirsIfNotExist(output_dir) 82effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 83effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch image_diff = os.path.join(chromium_out_dir, "Release", "image_diff") 84effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 85effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if sys.platform == 'darwin': 86effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch content_shell = os.path.join(chromium_out_dir, "Release", 87effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "Content Shell.app/Contents/MacOS/Content Shell") 88effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch elif sys.platform.startswith('linux'): 89effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch content_shell = os.path.join(chromium_out_dir, "Release", 90effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "content_shell") 91effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch elif sys.platform.startswith('win'): 92effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch content_shell = os.path.join(chromium_out_dir, "Release", 93effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "content_shell.exe") 94effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return True 95effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 96effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 97effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef CheckPrerequisites(): 98effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not find_executable("wget"): 99effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "wget not found! Install wget and re-run this." 100effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 101effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(image_diff): 102effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "image_diff not found (%s)!" % image_diff 103effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Build the image_diff target and re-run this." 104effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 105effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(content_shell): 106effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Content shell not found (%s)!" % content_shell 107effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Build Release/content_shell and re-run this." 108effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 109effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return True 110effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 111effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 112effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef PickSampleUrls(): 113effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch global urls 114effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch data_dir = os.path.join(output_dir, "data") 115effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch MakeDirsIfNotExist(data_dir) 116effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 117effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Download Alexa top 1,000,000 sites 118effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # TODO(johnme): Should probably update this when it gets too stale... 119effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch csv_path = os.path.join(data_dir, "top-1m.csv") 120effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(csv_path): 121effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Downloading list of top 1,000,000 sites from Alexa..." 122effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch csv_url = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip" 123effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with closing(urlopen(csv_url)) as stream: 124effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch ZipFile(StringIO(stream.read())).extract("top-1m.csv", data_dir) 125effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 126effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch bad_urls_path = os.path.join(data_dir, "bad_urls.txt") 127effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if os.path.exists(bad_urls_path): 128effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(bad_urls_path) as f: 129effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch bad_urls = set(f.read().splitlines()) 130effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch else: 131effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch bad_urls = set() 132effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 133effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # See if we've already selected a sample of size num_sites (this way, if you 134effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # call this script with arguments "before N" then "after N", where N is the 135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # same number, we'll use the same sample, as expected!). 136effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch urls_path = os.path.join(data_dir, "%06d_urls.txt" % num_sites) 137effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(urls_path): 138effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if action == 'compare': 139effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print ("Error: you must run 'before %d' and 'after %d' before " 140effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "running 'compare %d'") % (num_sites, num_sites, num_sites) 141effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 142effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Picking %d sample urls..." % num_sites 143effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 144effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # TODO(johnme): For now this just gets the top num_sites entries. In future 145effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # this should pick a weighted random sample. For example, it could fit a 146effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # power-law distribution, which is a good model of website popularity 147effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # (http://www.useit.com/alertbox/9704b.html). 148effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch urls = [] 149effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch remaining_num_sites = num_sites 150effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(csv_path) as f: 151effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch for entry in f: 152effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if remaining_num_sites <= 0: 153effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch break 154effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch remaining_num_sites -= 1 155effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch hostname = entry.strip().split(',')[1] 156effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not '/' in hostname: # Skip Alexa 1,000,000 entries that have paths. 157effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch url = "http://%s/" % hostname 158effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not url in bad_urls: 159effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch urls.append(url) 160effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Don't write these to disk yet; we'll do that in SaveWorkingUrls below 161effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # once we have tried to download them and seen which ones fail. 162effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch else: 163effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(urls_path) as f: 164effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch urls = [u for u in f.read().splitlines() if not u in bad_urls] 165effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return True 166effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 167effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 168effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef SaveWorkingUrls(): 169effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # TODO(johnme): Update the list if a url that used to work goes offline. 170effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch urls_path = os.path.join(output_dir, "data", "%06d_urls.txt" % num_sites) 171effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(urls_path): 172effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(urls_path, 'w') as f: 173effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.writelines(u + '\n' for u in urls) 174effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 175effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 176effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef PrintElapsedTime(elapsed, detail=""): 177effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch elapsed = round(elapsed * 10) / 10.0 178effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch m = elapsed / 60 179effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch s = elapsed % 60 180effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Took %dm%.1fs" % (m, s), detail 181effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 182effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 183effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef DownloadStaticCopyTask(url): 184effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch url_parts = urlparse(url) 185effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch host_dir = os.path.join(output_dir, "data", url_parts.hostname) 186effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Use wget for now, as does a reasonable job of spidering page dependencies 187effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # (e.g. CSS, JS, images). 188effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch success = True 189effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch try: 190effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch subprocess.check_call(["wget", 191effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--execute", "robots=off", 192effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch ("--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS " 193effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) C" 194effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "hrome/32.0.1700.14 Safari/537.36"), 195effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--page-requisites", 196effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--span-hosts", 197effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--adjust-extension", 198effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--convert-links", 199effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--directory-prefix=" + host_dir, 200effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--force-directories", 201effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--default-page=index.html", 202effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--no-check-certificate", 203effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--timeout=5", # 5s timeout 204effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--tries=2", 205effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--quiet", 206effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch url]) 207effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch except KeyboardInterrupt: 208effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch success = False 209effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch except subprocess.CalledProcessError: 210effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Ignoring these for now, as some sites have issues with their subresources 211effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # yet still produce a renderable index.html 212effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch pass #success = False 213effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if success: 214effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch download_path = os.path.join(host_dir, url_parts.hostname, "index.html") 215effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(download_path): 216effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch success = False 217effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch else: 218effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with print_lock: 219effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Downloaded:", url 220effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not success: 221effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with print_lock: 222effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Failed to download:", url 223effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 224effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return True 225effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 226effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 227effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef DownloadStaticCopies(): 228effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch global urls 229effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch new_urls = [] 230effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch for url in urls: 231effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch url_parts = urlparse(url) 232effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch host_dir = os.path.join(output_dir, "data", url_parts.hostname) 233effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch download_path = os.path.join(host_dir, url_parts.hostname, "index.html") 234effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(download_path): 235effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch new_urls.append(url) 236effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 237effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if new_urls: 238effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Downloading static copies of %d sites..." % len(new_urls) 239effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch start_time = time.time() 240effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 241effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch results = multiprocessing.Pool(20).map(DownloadStaticCopyTask, new_urls) 242effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch failed_urls = [new_urls[i] for i,ret in enumerate(results) if not ret] 243effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if failed_urls: 244effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch bad_urls_path = os.path.join(output_dir, "data", "bad_urls.txt") 245effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(bad_urls_path, 'a') as f: 246effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.writelines(u + '\n' for u in failed_urls) 247effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch failed_urls_set = set(failed_urls) 248effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch urls = [u for u in urls if u not in failed_urls_set] 249effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 250effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch PrintElapsedTime(time.time() - start_time) 251effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 252effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch SaveWorkingUrls() 253effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 254effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 255effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef RunDrtTask(url): 256effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch url_parts = urlparse(url) 257effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch host_dir = os.path.join(output_dir, "data", url_parts.hostname) 258effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_path = os.path.join(host_dir, url_parts.hostname, "index.html") 259effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 260effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not allow_js: 261effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch nojs_path = os.path.join(host_dir, url_parts.hostname, "index-nojs.html") 262effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not os.path.exists(nojs_path): 263effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(html_path) as f: 264effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html = f.read() 265effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not html: 266effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 267effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # These aren't intended to be XSS safe :) 268effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch block_tags = (r'<\s*(script|object|video|audio|iframe|frameset|frame)' 269effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch r'\b.*?<\s*\/\s*\1\s*>') 270effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch block_attrs = r'\s(onload|onerror)\s*=\s*(\'[^\']*\'|"[^"]*|\S*)' 271effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html = re.sub(block_tags, '', html, flags=re.I|re.S) 272effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html = re.sub(block_attrs, '', html, flags=re.I) 273effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(nojs_path, 'w') as f: 274effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(html) 275effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_path = nojs_path 276effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 277effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch start_time = time.time() 278effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 279effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(os.devnull, "w") as fnull: 280effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch p = subprocess.Popen([content_shell, 281effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "--dump-render-tree", 282effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch additional_content_shell_flags, 283effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # The single quote is not a typo, it's a separator! 284effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_path + "'--pixel-test" 285effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch ], 286effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch shell=False, 287effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch stdout=subprocess.PIPE, 288effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch stderr=fnull) 289effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch result = p.stdout.read() 290effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch PNG_START = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A" 291effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch PNG_END = b"\x49\x45\x4E\x44\xAE\x42\x60\x82" 292effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch try: 293effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch start = result.index(PNG_START) 294effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch end = result.rindex(PNG_END) + 8 295effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch except ValueError: 296effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return False 297effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 298effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch png_path = os.path.join(output_dir, action, url_parts.hostname + ".png") 299effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch MakeDirsIfNotExist(os.path.dirname(png_path)) 300effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(png_path, 'wb') as f: 301effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(result[start:end]) 302effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch elapsed_time = (time.time() - start_time, url) 303effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return elapsed_time 304effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 305effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 306effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef RunDrt(): 307effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Taking screenshots of %d pages..." % len(urls) 308effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch start_time = time.time() 309effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 310effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch results = multiprocessing.Pool().map(RunDrtTask, urls, 1) 311effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 312effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch max_time, url = max(t for t in results if t) 313effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch elapsed_detail = "(slowest: %.2fs on %s)" % (max_time, url) 314effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch PrintElapsedTime(time.time() - start_time, elapsed_detail) 315effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 316effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 317effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef CompareResultsTask(url): 318effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch url_parts = urlparse(url) 319effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch before_path = os.path.join(output_dir, "before", url_parts.hostname + ".png") 320effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch after_path = os.path.join(output_dir, "after", url_parts.hostname + ".png") 321effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch diff_path = os.path.join(output_dir, "diff", url_parts.hostname + ".png") 322effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch MakeDirsIfNotExist(os.path.join(output_dir, "diff")) 323effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 324effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # TODO(johnme): Don't hardcode "real_world_impact". 325effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch red_path = ("data:image/gif;base64,R0lGODlhAQABAPAAAP8AAP///yH5BAAAAAAALAAAAA" 326effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch "ABAAEAAAICRAEAOw==") 327effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 328effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch before_exists = os.path.exists(before_path) 329effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch after_exists = os.path.exists(after_path) 330effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not before_exists and not after_exists: 331effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # TODO(johnme): Make this more informative. 332effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return (-100, url, red_path) 333effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if before_exists != after_exists: 334effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # TODO(johnme): Make this more informative. 335effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return (200, url, red_path) 336effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 337effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Get percentage difference. 338effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch p = subprocess.Popen([image_diff, "--histogram", 339effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch before_path, after_path], 340effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch shell=False, 341effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch stdout=subprocess.PIPE) 342effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch output,_ = p.communicate() 343effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if p.returncode == 0: 344effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return (0, url, before_path) 345effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch diff_match = re.match(r'histogram diff: (\d+\.\d{2})% (?:passed|failed)\n' 346effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 'exact diff: (\d+\.\d{2})% (?:passed|failed)', output) 347effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not diff_match: 348effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch raise Exception("image_diff output format changed") 349effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch histogram_diff = float(diff_match.group(1)) 350effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch exact_diff = float(diff_match.group(2)) 351effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch combined_diff = max(histogram_diff + exact_diff / 8, 0.001) 352effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 353effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Produce diff PNG. 354effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch subprocess.call([image_diff, "--diff", before_path, after_path, diff_path]) 355effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return (combined_diff, url, diff_path) 356effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 357effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 358effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef CompareResults(): 359effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch print "Running image_diff on %d pages..." % len(urls) 360effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch start_time = time.time() 361effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 362effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch results = multiprocessing.Pool().map(CompareResultsTask, urls) 363effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch results.sort(key=itemgetter(0), reverse=True) 364effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 365effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch PrintElapsedTime(time.time() - start_time) 366effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 367effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch now = datetime.datetime.today().strftime("%a %Y-%m-%d %H:%M") 368effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_start = textwrap.dedent("""\ 369effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <!DOCTYPE html> 370effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <html> 371effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <head> 372effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <title>Real World Impact report %s</title> 373effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <script> 374effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var togglingImg = null; 375effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var toggleTimer = null; 376effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 377effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var before = true; 378effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch function toggle() { 379effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var newFolder = before ? "before" : "after"; 380effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch togglingImg.src = togglingImg.src.replace(/before|after|diff/, newFolder); 381effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch before = !before; 382effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch toggleTimer = setTimeout(toggle, 300); 383effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 384effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 385effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch function startToggle(img) { 386effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch before = true; 387effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch togglingImg = img; 388effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (!img.origSrc) 389effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch img.origSrc = img.src; 390effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch toggle(); 391effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 392effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch function stopToggle(img) { 393effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch clearTimeout(toggleTimer); 394effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch img.src = img.origSrc; 395effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 396effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 397effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch document.onkeydown = function(e) { 398effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch e = e || window.event; 399effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var keyCode = e.keyCode || e.which; 400effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var newFolder; 401effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch switch (keyCode) { 402effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch case 49: //'1' 403effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch newFolder = "before"; break; 404effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch case 50: //'2' 405effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch newFolder = "after"; break; 406effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch case 51: //'3' 407effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch newFolder = "diff"; break; 408effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch default: 409effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return; 410effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 411effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch var imgs = document.getElementsByTagName("img"); 412effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch for (var i = 0; i < imgs.length; i++) { 413effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch imgs[i].src = imgs[i].src.replace(/before|after|diff/, newFolder); 414effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 415effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch }; 416effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </script> 417effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <style> 418effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch h1 { 419effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch font-family: sans; 420effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 421effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch h2 { 422effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch font-family: monospace; 423effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch white-space: pre; 424effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 425effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch .nsfw-spacer { 426effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch height: 50vh; 427effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 428effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch .nsfw-warning { 429effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch background: yellow; 430effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch border: 10px solid red; 431effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 432effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch .info { 433effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch font-size: 1.2em; 434effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch font-style: italic; 435effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 436effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch body:not(.details-supported) details { 437effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch display: none; 438effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 439effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </style> 440effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </head> 441effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <body> 442effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <script> 443effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if ('open' in document.createElement('details')) 444effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch document.body.className = "details-supported"; 445effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </script> 446effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <!--<div class="nsfw-spacer"></div>--> 447effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <p class="nsfw-warning">Warning: sites below are taken from the Alexa top %d 448effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch and may be NSFW.</p> 449effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <!--<div class="nsfw-spacer"></div>--> 450effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <h1>Real World Impact report %s</h1> 451effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <p class="info">Press 1, 2 and 3 to switch between before, after and diff 452effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch screenshots respectively; or hover over the images to rapidly alternate 453effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch between before and after.</p> 454effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch """ % (now, num_sites, now)) 455effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 456effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_same_row = """\ 457effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <h2>No difference on <a href="%s">%s</a>.</h2> 458effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch """ 459effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 460effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_diff_row = """\ 461effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <h2>%7.3f%% difference on <a href="%s">%s</a>:</h2> 462effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <img src="%s" width="800" height="600" 463effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch onmouseover="startToggle(this)" onmouseout="stopToggle(this)"> 464effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch """ 465effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 466effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_nsfw_diff_row = """\ 467effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <h2>%7.3f%% difference on <a href="%s">%s</a>:</h2> 468effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <details> 469effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <summary>This site may be NSFW. Click to expand/collapse.</summary> 470effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch <img src="%s" width="800" height="600" 471effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch onmouseover="startToggle(this)" onmouseout="stopToggle(this)"> 472effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </details> 473effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch """ 474effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 475effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_end = textwrap.dedent("""\ 476effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </body> 477effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch </html>""") 478effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 479effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch html_path = os.path.join(output_dir, "diff.html") 480effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch with open(html_path, 'w') as f: 481effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(html_start) 482effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch for (diff_float, url, diff_path) in results: 483effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch diff_path = os.path.relpath(diff_path, output_dir) 484effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if diff_float == 0: 485effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(html_same_row % (url, url)) 486effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch elif url in nsfw_urls: 487effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(html_nsfw_diff_row % (diff_float, url, url, diff_path)) 488effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch else: 489effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(html_diff_row % (diff_float, url, url, diff_path)) 490effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch f.write(html_end) 491effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 492effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch webbrowser.open_new_tab("file://" + html_path) 493effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 494effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 495effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochdef main(argv): 496effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch global num_sites, action, allow_js, additional_content_shell_flags 497effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 498effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch parser = argparse.ArgumentParser( 499effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch formatter_class=RawTextHelpFormatter, 500effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch description="Compare the real world impact of a content shell change.", 501effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch epilog=textwrap.dedent("""\ 502effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch Example usage: 503effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 1. Build content_shell in out/Release without any changes. 504effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 2. Run: %s before [num sites to test (default %d)]. 505effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 3. Either: 506effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch a. Apply your controversial patch and rebuild content_shell. 507effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch b. Pass --additional_flags="--enable_your_flag" in step 4. 508effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 4. Run: %s after [num sites to test (default %d)]. 509effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 5. Run: %s compare [num sites to test (default %d)]. 510effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch This will open the results in your web browser. 511effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch """ % (argv[0], num_sites, argv[0], num_sites, argv[0], num_sites))) 512effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch parser.add_argument("--allow_js", help="Don't disable Javascript", 513effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch action="store_true") 514effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch parser.add_argument("--additional_flags", 515effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch help="Additional flags to pass to content shell") 516effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch parser.add_argument("action", 517effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch help=textwrap.dedent("""\ 518effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch Action to perform. 519effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch download - Just download the sites. 520effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch before - Run content shell and record 'before' result. 521effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch after - Run content shell and record 'after' result. 522effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch compare - Compare before and after results. 523effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch """), 524effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch choices=["download", "before", "after", "compare"]) 525effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch parser.add_argument("num_sites", 526effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch help="Number of sites (default %s)" % num_sites, 527effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch type=int, default=num_sites, nargs='?') 528effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch args = parser.parse_args() 529effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 530effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch action = args.action 531effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 532effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (args.num_sites): 533effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch num_sites = args.num_sites 534effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 535effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (args.allow_js): 536effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch allow_js = args.allow_js 537effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 538effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (args.additional_flags): 539effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch additional_content_shell_flags = args.additional_flags 540effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 541effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if not SetupPathsAndOut() or not CheckPrerequisites() or not PickSampleUrls(): 542effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return 1 543effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 544effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if action == 'compare': 545effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch CompareResults() 546effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch else: 547effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch DownloadStaticCopies() 548effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if action != 'download': 549effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch RunDrt() 550effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return 0 551effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 552effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 553effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochif __name__ == '__main__': 554effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch sys.exit(main(sys.argv))