15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""Component for automatically creating masks of changing areas of a website. 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Works by repeated invokation of a browser and scraping of the resulting page. 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Areas that differ will be added to the auto-generated mask. The mask generator 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)considers the mask complete when further scrapes fail to produce any differences 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)in the mask. 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import os # Functions for walking the directory tree 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import tempfile # Get a temporary directory to hold intermediates 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import time # Used for sleep() and naming masks by time 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import command_line 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import drivers 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from PIL import Image 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from PIL import ImageChops 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import scrapers 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def CreateCommand(cmdline): 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Inserts the command and arguments into a command line for parsing.""" 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd = cmdline.AddCommand( 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["maskmaker"], 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Automatically generates a mask from a list of URLs", 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ValidateMaskmaker, 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ExecuteMaskmaker) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-bp", "--browserpath"], "Full path to browser's executable", 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) type="readfile", metaname="PATH") 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-b", "--browser"], "Which browser to use", type="string", 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default="chrome") 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-bv", "--browserver"], "Version of the browser", metaname="VERSION") 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR", 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) required=True) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-u", "--url"], "URL to compare") 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-l", "--list"], "List of URLs to compare", type="readfile") 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddMutualExclusion(["--url", "--list"]) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-s", "--startline"], "First line of URL list", type="int") 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-c", "--count"], "Number of lines of URL file to use", type="int") 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddDependency("--startline", "--list") 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddRequiredGroup(["--url", "--list"]) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddDependency("--endline", "--list") 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddDependency("--count", "--list") 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddMutualExclusion(["--count", "--endline"]) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddDependency("--count", "--startline") 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "finish loading", 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) type="int", default=60) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-w", "--wait"], 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Amount of time (in seconds) to wait between successive scrapes", 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) type="int", default=60) 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-sc", "--scrapes"], 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Number of successive scrapes which must result in no change to a mask " 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "before mask creation is considered complete", type="int", default=10) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes") 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-gu", "--giveup"], 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Number of times to scrape before giving up", type="int", default=50) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["-th", "--threshhold"], 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Percentage of different pixels (0-100) above which the scrape will be" 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "discarded and the mask not updated.", type="int", default=100) 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmd.AddArgument( 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ["--er", "--errors"], 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Number of times a scrape can fail before giving up on the URL.", 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) type="int", default=1) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def ValidateMaskmaker(command): 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Validate the arguments to maskmaker. Raises ParseError if failed.""" 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) executables = [".exe", ".com", ".bat"] 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if command["--browserpath"]: 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if os.path.splitext(command["--browserpath"])[1].lower() not in executables: 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise command_line.ParseError("Browser filename must be an executable") 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def ExecuteMaskmaker(command): 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Performs automatic mask generation.""" 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Get the list of URLs to generate masks for 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class MaskmakerURL(object): 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Helper class for holding information about a URL passed to maskmaker.""" 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __slots__ = ['url', 'consecutive_successes', 'errors'] 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def __init__(self, url): 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.url = url 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.consecutive_successes = 0 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.errors = 0 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if command["--url"]: 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_list = [MaskmakerURL(command["--url"])] 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) startline = command["--startline"] 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if command["--count"]: 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endline = startline+command["--count"] 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endline = command["--endline"] 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_list = [MaskmakerURL(url.strip()) for url in 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) open(command["--list"], "r").readlines()[startline:endline]] 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) complete_list = [] 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_list = [] 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outdir = command["--outdir"] 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scrapes = command["--scrapes"] 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errors = command["--errors"] 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size = command["--size"] 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scrape_pass = 0 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scrapedir = command["--scrapedir"] 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not scrapedir: scrapedir = tempfile.gettempdir() 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Get the scraper 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Repeatedly iterate through the list of URLs until either every URL has 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # a successful mask or too many errors, or we've exceeded the giveup limit 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while url_list and scrape_pass < command["--giveup"]: 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Scrape each URL 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for url in url_list: 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "Processing %r..." % url.url 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp") 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Load the existing mask. This is in a loop so we can try to recover 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # from error conditions 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while True: 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) try: 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask = Image.open(mask_filename) 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if mask.size != size: 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " %r already exists and is the wrong size! (%r vs %r)" % ( 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_filename, mask.size, size) 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_filename = "%s_%r%s" % ( 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_filename[:-4], size, mask_filename[-4:]) 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " Trying again as %r..." % mask_filename 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) except IOError: 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " %r does not exist, creating" % mask_filename 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask = Image.new("1", size, 1) 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask.save(mask_filename) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Find the stored scrape path 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_scrape_dir = os.path.join( 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scrapedir, os.path.splitext(os.path.basename(mask_filename))[0]) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) drivers.windowing.PreparePath(mask_scrape_dir) 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Find the baseline image 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_scrapes = os.listdir(mask_scrape_dir) 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_scrapes.sort() 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not mask_scrapes: 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " No baseline image found, mask will not be updated" 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) baseline = None 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0])) 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask_scrape_filename = os.path.join(mask_scrape_dir, 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) time.strftime("%y%m%d-%H%M%S.bmp")) 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Do the scrape 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) result = scraper.Scrape( 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) [url.url], mask_scrape_dir, size, (0, 0), 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) command["--timeout"], path=command["--browserpath"], 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) filename=mask_scrape_filename) 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if result: 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Return value other than None means an error 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " Scrape failed with error '%r'" % result 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.errors += 1 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if url.errors >= errors: 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " ** Exceeded maximum error count for this URL, giving up" 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Load the new scrape 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scrape = Image.open(mask_scrape_filename) 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Calculate the difference between the new scrape and the baseline, 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # subject to the current mask 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if baseline: 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) diff = ImageChops.multiply(ImageChops.difference(scrape, baseline), 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask.convert(scrape.mode)) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # If the difference is none, there's nothing to update 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if max(diff.getextrema()) == (0, 0): 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " Scrape identical to baseline, no change in mask" 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.consecutive_successes += 1 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if url.consecutive_successes >= scrapes: 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " ** No change for %r scrapes, done!" % scrapes 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # convert the difference to black and white, then change all 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # black pixels (where the scrape and the baseline were identical) 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # to white, all others (where the scrape and the baseline differed) 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # to black. 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Since the below command is a little unclear, here's how it works. 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 1. convert("L") converts the RGB image to grayscale 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 2. point() maps grayscale values (or the individual channels) 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # of an RGB image) to different ones. Because it operates on 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # individual channels, the grayscale conversion from step 1 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # is necessary. 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 3. The "1" second parameter to point() outputs the result as 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # a monochrome bitmap. If the original RGB image were converted 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # directly to monochrome, PIL would dither it. 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) diff = diff.convert("L").point([255]+[0]*255, "1") 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # count the number of different pixels 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) diff_pixels = diff.getcolors()[0][0] 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # is this too much? 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1]) 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if diff_pixel_percent > command["--threshhold"]: 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print (" Scrape differed from baseline by %.2f percent, ignoring" 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) % diff_pixel_percent) 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " Scrape differed in %d pixels, updating mask" % diff_pixels 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask = ImageChops.multiply(mask, diff) 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mask.save(mask_filename) 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # reset the number of consecutive "good" scrapes 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.consecutive_successes = 0 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Remove URLs whose mask is deemed done 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) complete_list.extend( 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) [url for url in url_list if url.consecutive_successes >= scrapes]) 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_list.extend( 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) [url for url in url_list if url.errors >= errors]) 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_list = [ 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url for url in url_list if 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.consecutive_successes < scrapes and 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.errors < errors] 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scrape_pass += 1 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "**Done with scrape pass %d\n" % scrape_pass 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if scrape_pass >= command["--giveup"]: 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "**Exceeded giveup threshhold. Giving up." 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "Waiting %d seconds..." % command["--wait"] 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) time.sleep(command["--wait"]) 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "*** MASKMAKER COMPLETE ***" 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "Summary report:" 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " %d masks successfully generated" % len(complete_list) 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for url in complete_list: 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " ", url.url 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " %d masks failed with too many errors" % len(error_list) 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for url in error_list: 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " ", url.url 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if scrape_pass >= command["--giveup"]: 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print (" %d masks were not completed before " 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "reaching the giveup threshhold" % len(url_list)) 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for url in url_list: 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print " ", url.url 273