15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""Component for automatically creating masks of changing areas of a website.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Works by repeated invokation of a browser and scraping of the resulting page.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Areas that differ will be added to the auto-generated mask. The mask generator
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)considers the mask complete when further scrapes fail to produce any differences
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)in the mask.
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import os            # Functions for walking the directory tree
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import tempfile      # Get a temporary directory to hold intermediates
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import time          # Used for sleep() and naming masks by time
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import command_line
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import drivers
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from PIL import Image
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from PIL import ImageChops
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import scrapers
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def CreateCommand(cmdline):
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  """Inserts the command and arguments into a command line for parsing."""
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd = cmdline.AddCommand(
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["maskmaker"],
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "Automatically generates a mask from a list of URLs",
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ValidateMaskmaker,
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ExecuteMaskmaker)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-bp", "--browserpath"], "Full path to browser's executable",
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    type="readfile", metaname="PATH")
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-b", "--browser"], "Which browser to use", type="string",
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default="chrome")
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-bv", "--browserver"], "Version of the browser", metaname="VERSION")
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR",
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    required=True)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-u", "--url"], "URL to compare")
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-l", "--list"], "List of URLs to compare", type="readfile")
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddMutualExclusion(["--url", "--list"])
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-s", "--startline"], "First line of URL list", type="int")
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-c", "--count"], "Number of lines of URL file to use", type="int")
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddDependency("--startline", "--list")
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddRequiredGroup(["--url", "--list"])
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddDependency("--endline", "--list")
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddDependency("--count", "--list")
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddMutualExclusion(["--count", "--endline"])
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddDependency("--count", "--startline")
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "finish loading",
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    type="int", default=60)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-w", "--wait"],
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "Amount of time (in seconds) to wait between successive scrapes",
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    type="int", default=60)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-sc", "--scrapes"],
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "Number of successive scrapes which must result in no change to a mask "
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "before mask creation is considered complete", type="int", default=10)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes")
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-gu", "--giveup"],
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "Number of times to scrape before giving up", type="int", default=50)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["-th", "--threshhold"],
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "Percentage of different pixels (0-100) above which the scrape will be"
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "discarded and the mask not updated.", type="int", default=100)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmd.AddArgument(
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ["--er", "--errors"],
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "Number of times a scrape can fail before giving up on the URL.",
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    type="int", default=1)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def ValidateMaskmaker(command):
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  """Validate the arguments to maskmaker. Raises ParseError if failed."""
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  executables = [".exe", ".com", ".bat"]
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if command["--browserpath"]:
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if os.path.splitext(command["--browserpath"])[1].lower() not in executables:
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      raise command_line.ParseError("Browser filename must be an executable")
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def ExecuteMaskmaker(command):
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  """Performs automatic mask generation."""
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Get the list of URLs to generate masks for
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class MaskmakerURL(object):
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Helper class for holding information about a URL passed to maskmaker."""
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    __slots__ = ['url', 'consecutive_successes', 'errors']
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __init__(self, url):
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      self.url = url
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      self.consecutive_successes = 0
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      self.errors = 0
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if command["--url"]:
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_list = [MaskmakerURL(command["--url"])]
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else:
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    startline = command["--startline"]
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if command["--count"]:
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      endline = startline+command["--count"]
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      endline = command["--endline"]
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_list = [MaskmakerURL(url.strip()) for url in
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                open(command["--list"], "r").readlines()[startline:endline]]
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  complete_list = []
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  error_list = []
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  outdir = command["--outdir"]
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scrapes = command["--scrapes"]
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errors = command["--errors"]
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size = command["--size"]
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scrape_pass = 0
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scrapedir = command["--scrapedir"]
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if not scrapedir: scrapedir = tempfile.gettempdir()
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Get the scraper
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Repeatedly iterate through the list of URLs until either every URL has
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # a successful mask or too many errors, or we've exceeded the giveup limit
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while url_list and scrape_pass < command["--giveup"]:
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Scrape each URL
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for url in url_list:
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      print "Processing %r..." % url.url
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp")
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Load the existing mask. This is in a loop so we can try to recover
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # from error conditions
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while True:
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        try:
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          mask = Image.open(mask_filename)
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if mask.size != size:
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            print "  %r already exists and is the wrong size! (%r vs %r)" % (
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              mask_filename, mask.size, size)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            mask_filename = "%s_%r%s" % (
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              mask_filename[:-4], size, mask_filename[-4:])
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            print "  Trying again as %r..." % mask_filename
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            continue
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        except IOError:
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          print "  %r does not exist, creating" % mask_filename
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          mask = Image.new("1", size, 1)
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          mask.save(mask_filename)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Find the stored scrape path
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mask_scrape_dir = os.path.join(
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        scrapedir, os.path.splitext(os.path.basename(mask_filename))[0])
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      drivers.windowing.PreparePath(mask_scrape_dir)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Find the baseline image
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mask_scrapes = os.listdir(mask_scrape_dir)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mask_scrapes.sort()
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if not mask_scrapes:
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        print "  No baseline image found, mask will not be updated"
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        baseline = None
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else:
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0]))
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mask_scrape_filename = os.path.join(mask_scrape_dir,
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                          time.strftime("%y%m%d-%H%M%S.bmp"))
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Do the scrape
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      result = scraper.Scrape(
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        [url.url], mask_scrape_dir, size, (0, 0),
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        command["--timeout"], path=command["--browserpath"],
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        filename=mask_scrape_filename)
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if result:
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Return value other than None means an error
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        print "  Scrape failed with error '%r'" % result
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        url.errors += 1
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if url.errors >= errors:
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          print "  ** Exceeded maximum error count for this URL, giving up"
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Load the new scrape
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      scrape = Image.open(mask_scrape_filename)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Calculate the difference between the new scrape and the baseline,
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # subject to the current mask
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if baseline:
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        diff = ImageChops.multiply(ImageChops.difference(scrape, baseline),
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   mask.convert(scrape.mode))
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # If the difference is none, there's nothing to update
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if max(diff.getextrema()) == (0, 0):
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          print "  Scrape identical to baseline, no change in mask"
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          url.consecutive_successes += 1
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if url.consecutive_successes >= scrapes:
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            print "  ** No change for %r scrapes, done!" % scrapes
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else:
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # convert the difference to black and white, then change all
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # black pixels (where the scrape and the baseline were identical)
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # to white, all others (where the scrape and the baseline differed)
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # to black.
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # Since the below command is a little unclear, here's how it works.
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #    1. convert("L") converts the RGB image to grayscale
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #    2. point() maps grayscale values (or the individual channels)
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #       of an RGB image) to different ones. Because it operates on
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #       individual channels, the grayscale conversion from step 1
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #       is necessary.
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #    3. The "1" second parameter to point() outputs the result as
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #       a monochrome bitmap. If the original RGB image were converted
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          #       directly to monochrome, PIL would dither it.
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          diff = diff.convert("L").point([255]+[0]*255, "1")
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # count the number of different pixels
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          diff_pixels = diff.getcolors()[0][0]
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          # is this too much?
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1])
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if diff_pixel_percent > command["--threshhold"]:
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            print ("  Scrape differed from baseline by %.2f percent, ignoring"
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   % diff_pixel_percent)
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          else:
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            print "  Scrape differed in %d pixels, updating mask" % diff_pixels
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            mask = ImageChops.multiply(mask, diff)
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            mask.save(mask_filename)
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            # reset the number of consecutive "good" scrapes
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            url.consecutive_successes = 0
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Remove URLs whose mask is deemed done
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    complete_list.extend(
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      [url for url in url_list if url.consecutive_successes >= scrapes])
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    error_list.extend(
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      [url for url in url_list if url.errors >= errors])
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_list = [
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url for url in url_list if
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url.consecutive_successes < scrapes and
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url.errors < errors]
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    scrape_pass += 1
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    print "**Done with scrape pass %d\n" % scrape_pass
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if scrape_pass >= command["--giveup"]:
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      print "**Exceeded giveup threshhold. Giving up."
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      print "Waiting %d seconds..." % command["--wait"]
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      time.sleep(command["--wait"])
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  print
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  print "*** MASKMAKER COMPLETE ***"
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  print "Summary report:"
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  print "  %d masks successfully generated" % len(complete_list)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for url in complete_list:
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    print "    ", url.url
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  print "  %d masks failed with too many errors" % len(error_list)
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for url in error_list:
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    print "    ", url.url
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if scrape_pass >= command["--giveup"]:
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    print ("  %d masks were not completed before "
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           "reaching the giveup threshhold" % len(url_list))
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for url in url_list:
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      print "    ", url.url
273