1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""SiteCompare component to handle bulk scrapes.
7
8Invokes a list of browsers and sends them to a list of URLs,
9saving the rendered results to a specified directory, then
10performs comparison operations on the resulting bitmaps and
11saves the results
12"""
13
14
15# This line is necessary to work around a QEMU bug
16import _imaging
17
18import os            # Functions for walking the directory tree
19import types         # Runtime type-checking
20
21import command_line  # command-line parsing
22import drivers       # Functions for driving keyboard/mouse/windows, OS-specific
23import operators     # Functions that, given two bitmaps as input, produce
24                     # output depending on the performance of an operation
25import scrapers      # Functions that know how to capture a render from
26                     # particular browsers
27
28import commands.compare2  # compare one page in two versions of same browser
29import commands.maskmaker # generate a mask based on repeated scrapes
30import commands.measure   # measure length of time a page takes to load
31import commands.scrape    # scrape a URL or series of URLs to a bitmap
32
33# The timeload command is obsolete (too flaky); it may be reinstated
34# later but for now it's been superceded by "measure"
35# import commands.timeload  # measure length of time a page takes to load
36
37def Scrape(browsers, urls, window_size=(1024, 768),
38           window_pos=(0, 0), timeout=20, save_path=None, **kwargs):
39  """Invoke one or more browsers over one or more URLs, scraping renders.
40
41  Args:
42    browsers: browsers to invoke with optional version strings
43    urls: URLs to visit
44    window_size: size of the browser window to display
45    window_pos: location of browser window
46    timeout: time (in seconds) to wait for page to load
47    save_path: root of save path, automatically appended with browser and
48      version
49    kwargs: miscellaneous keyword args, passed to scraper
50  Returns:
51    None
52
53  @TODO(jhaas): more parameters, or perhaps an indefinite dictionary
54  parameter, for things like length of time to wait for timeout, speed
55  of mouse clicks, etc. Possibly on a per-browser, per-URL, or
56  per-browser-per-URL basis
57  """
58
59  if type(browsers) in types.StringTypes: browsers = [browsers]
60
61  if save_path is None:
62    # default save path is "scrapes" off the current root
63    save_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
64
65  for browser in browsers:
66    # Browsers should be tuples of (browser, version)
67    if type(browser) in types.StringTypes: browser = (browser, None)
68    scraper = scrapers.GetScraper(browser)
69
70    full_path = os.path.join(save_path, browser[0], scraper.version)
71    drivers.windowing.PreparePath(full_path)
72
73    scraper.Scrape(urls, full_path, window_size, window_pos, timeout, kwargs)
74
75
76def Compare(base, compare, ops, root_path=None, out_path=None):
77  """Compares a series of scrapes using a series of operators.
78
79  Args:
80    base: (browser, version) tuple of version to consider the baseline
81    compare: (browser, version) tuple of version to compare to
82    ops: list of operators plus operator arguments
83    root_path: root of the scrapes
84    out_path: place to put any output from the operators
85
86  Returns:
87    None
88
89  @TODO(jhaas): this method will likely change, to provide a robust and
90  well-defined way of chaining operators, applying operators conditionally,
91  and full-featured scripting of the operator chain. There also needs
92  to be better definition of the output; right now it's to stdout and
93  a log.txt file, with operator-dependent images saved for error output
94  """
95  if root_path is None:
96    # default save path is "scrapes" off the current root
97    root_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
98
99  if out_path is None:
100    out_path = os.path.join(os.path.split(__file__)[0], "Compares")
101
102  if type(base) in types.StringTypes: base = (base, None)
103  if type(compare) in types.StringTypes: compare = (compare, None)
104  if type(ops) in types.StringTypes: ops = [ops]
105
106  base_dir = os.path.join(root_path, base[0])
107  compare_dir = os.path.join(root_path, compare[0])
108
109  if base[1] is None:
110    # base defaults to earliest capture
111    base = (base[0], max(os.listdir(base_dir)))
112
113  if compare[1] is None:
114    # compare defaults to latest capture
115    compare = (compare[0], min(os.listdir(compare_dir)))
116
117  out_path = os.path.join(out_path, base[0], base[1], compare[0], compare[1])
118  drivers.windowing.PreparePath(out_path)
119
120  # TODO(jhaas): right now we're just dumping output to a log file
121  # (and the console), which works as far as it goes but isn't nearly
122  # robust enough. Change this after deciding exactly what we want to
123  # change it to.
124  out_file = open(os.path.join(out_path, "log.txt"), "w")
125  description_string = ("Comparing %s %s to %s %s" %
126                        (base[0], base[1], compare[0], compare[1]))
127  out_file.write(description_string)
128  print description_string
129
130  base_dir = os.path.join(base_dir, base[1])
131  compare_dir = os.path.join(compare_dir, compare[1])
132
133  for filename in os.listdir(base_dir):
134    out_file.write("%s: " % filename)
135
136    if not os.path.isfile(os.path.join(compare_dir, filename)):
137      out_file.write("Does not exist in target directory\n")
138      print "File %s does not exist in target directory" % filename
139      continue
140
141    base_filename = os.path.join(base_dir, filename)
142    compare_filename = os.path.join(compare_dir, filename)
143
144    for op in ops:
145      if type(op) in types.StringTypes: op = (op, None)
146
147      module = operators.GetOperator(op[0])
148
149      ret = module.Compare(base_filename, compare_filename)
150      if ret is None:
151        print "%s: OK" % (filename,)
152        out_file.write("OK\n")
153      else:
154        print "%s: %s" % (filename, ret[0])
155        out_file.write("%s\n" % (ret[0]))
156        ret[1].save(os.path.join(out_path, filename))
157
158  out_file.close()
159
160
161def main():
162  """Main executable. Parse the command line and invoke the command."""
163  cmdline = command_line.CommandLine()
164
165  # The below two commands are currently unstable so have been disabled
166  # commands.compare2.CreateCommand(cmdline)
167  # commands.maskmaker.CreateCommand(cmdline)
168  commands.measure.CreateCommand(cmdline)
169  commands.scrape.CreateCommand(cmdline)
170
171  cmdline.ParseCommandLine()
172  return 0
173
174
175if __name__ == "__main__":
176  sys.exit(main())
177