1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Does scraping for all currently-known versions of Chrome"""
7
8import pywintypes
9import types
10
11from drivers import keyboard
12from drivers import mouse
13from drivers import windowing
14
15
16# TODO: this has moved, use some logic to find it. For now,
17# expects a subst k:.
18DEFAULT_PATH = r"k:\chrome.exe"
19
20
21def InvokeBrowser(path):
22  """Invoke the Chrome browser.
23
24  Args:
25    path: full path to browser
26
27  Returns:
28    A tuple of (main window, process handle, address bar, render pane)
29  """
30
31  # Reuse an existing instance of the browser if we can find one. This
32  # may not work correctly, especially if the window is behind other windows.
33
34  # TODO(jhaas): make this work with Vista
35  wnds = windowing.FindChildWindows(0, "Chrome_XPFrame")
36  if len(wnds):
37    wnd = wnds[0]
38    proc = None
39  else:
40    # Invoke Chrome
41    (proc, wnd) = windowing.InvokeAndWait(path)
42
43  # Get windows we'll need
44  address_bar = windowing.FindChildWindow(wnd, "Chrome_AutocompleteEdit")
45  render_pane = GetChromeRenderPane(wnd)
46
47  return (wnd, proc, address_bar, render_pane)
48
49
50def Scrape(urls, outdir, size, pos, timeout, kwargs):
51  """Invoke a browser, send it to a series of URLs, and save its output.
52
53  Args:
54    urls: list of URLs to scrape
55    outdir: directory to place output
56    size: size of browser window to use
57    pos: position of browser window
58    timeout: amount of time to wait for page to load
59    kwargs: miscellaneous keyword args
60
61  Returns:
62    None if success, else an error string
63  """
64  if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
65  else: path = DEFAULT_PATH
66
67  (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)
68
69  # Resize and reposition the frame
70  windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)
71
72  # Visit each URL we're given
73  if type(urls) in types.StringTypes: urls = [urls]
74
75  timedout = False
76
77  for url in urls:
78    # Double-click in the address bar, type the name, and press Enter
79    mouse.ClickInWindow(address_bar)
80    keyboard.TypeString(url, 0.1)
81    keyboard.TypeString("\n")
82
83    # Wait for the page to finish loading
84    load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
85    timedout = load_time < 0
86
87    if timedout:
88      break
89
90    # Scrape the page
91    image = windowing.ScrapeWindow(render_pane)
92
93    # Save to disk
94    if "filename" in kwargs:
95      if callable(kwargs["filename"]):
96        filename = kwargs["filename"](url)
97      else:
98        filename = kwargs["filename"]
99    else:
100      filename = windowing.URLtoFilename(url, outdir, ".bmp")
101    image.save(filename)
102
103  if proc:
104    windowing.SetForegroundWindow(wnd)
105
106    # Send Alt-F4, then wait for process to end
107    keyboard.TypeString(r"{\4}", use_modifiers=True)
108    if not windowing.WaitForProcessExit(proc, timeout):
109      windowing.EndProcess(proc)
110      return "crashed"
111
112  if timedout:
113    return "timeout"
114
115  return None
116
117
118def Time(urls, size, timeout, kwargs):
119  """Measure how long it takes to load each of a series of URLs
120
121  Args:
122    urls: list of URLs to time
123    size: size of browser window to use
124    timeout: amount of time to wait for page to load
125    kwargs: miscellaneous keyword args
126
127  Returns:
128    A list of tuples (url, time). "time" can be "crashed" or "timeout"
129  """
130  if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
131  else: path = DEFAULT_PATH
132  proc = None
133
134  # Visit each URL we're given
135  if type(urls) in types.StringTypes: urls = [urls]
136
137  ret = []
138  for url in urls:
139    try:
140      # Invoke the browser if necessary
141      if not proc:
142        (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)
143
144        # Resize and reposition the frame
145        windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
146
147      # Double-click in the address bar, type the name, and press Enter
148      mouse.ClickInWindow(address_bar)
149      keyboard.TypeString(url, 0.1)
150      keyboard.TypeString("\n")
151
152      # Wait for the page to finish loading
153      load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
154
155      timedout = load_time < 0
156
157      if timedout:
158        load_time = "timeout"
159
160        # Send an alt-F4 to make the browser close; if this times out,
161        # we've probably got a crash
162        windowing.SetForegroundWindow(wnd)
163
164        keyboard.TypeString(r"{\4}", use_modifiers=True)
165        if not windowing.WaitForProcessExit(proc, timeout):
166          windowing.EndProcess(proc)
167          load_time = "crashed"
168        proc = None
169    except pywintypes.error:
170      proc = None
171      load_time = "crashed"
172
173    ret.append( (url, load_time) )
174
175  if proc:
176    windowing.SetForegroundWindow(wnd)
177    keyboard.TypeString(r"{\4}", use_modifiers=True)
178    if not windowing.WaitForProcessExit(proc, timeout):
179      windowing.EndProcess(proc)
180
181  return ret
182
183
184def main():
185  # We're being invoked rather than imported, so run some tests
186  path = r"c:\sitecompare\scrapes\chrome\0.1.97.0"
187  windowing.PreparePath(path)
188
189  # Scrape three sites and save the results
190  Scrape([
191    "http://www.microsoft.com",
192    "http://www.google.com",
193    "http://www.sun.com"],
194         path, (1024, 768), (0, 0))
195  return 0
196
197
198if __name__ == "__main__":
199  sys.exit(main())
200