1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Does scraping for all known versions of IE."""
7
8import pywintypes
9import time
10import types
11
12from drivers import keyboard
13from drivers import mouse
14from drivers import windowing
15
16# Default version
17version = "7.0.5730.1"
18
19DEFAULT_PATH = r"c:\program files\internet explorer\iexplore.exe"
20
21def GetBrowser(path):
22  """Invoke the IE browser and return the process, frame, and content window.
23
24  Args:
25    path: full path to browser
26
27  Returns:
28    A tuple of (process handle, render pane)
29  """
30  if not path: path = DEFAULT_PATH
31
32  (iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
33  return (ieproc, iewnd, render_pane)
34
35
36def InvokeBrowser(path):
37  """Invoke the IE browser.
38
39  Args:
40    path: full path to browser
41
42  Returns:
43    A tuple of (main window, process handle, address bar,
44                render_pane, tab_window)
45  """
46  # Invoke IE
47  (ieproc, iewnd) = windowing.InvokeAndWait(path)
48
49  # Get windows we'll need
50  for tries in xrange(10):
51    try:
52      address_bar = windowing.FindChildWindow(
53        iewnd, "WorkerW|Navigation Bar/ReBarWindow32/"
54        "Address Band Root/ComboBoxEx32/ComboBox/Edit")
55      render_pane = windowing.FindChildWindow(
56        iewnd, "TabWindowClass/Shell DocObject View")
57      tab_window = windowing.FindChildWindow(
58        iewnd, "CommandBarClass/ReBarWindow32/TabBandClass/DirectUIHWND")
59    except IndexError:
60      time.sleep(1)
61      continue
62    break
63
64  return (iewnd, ieproc, address_bar, render_pane, tab_window)
65
66
67def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
68  """Invoke a browser, send it to a series of URLs, and save its output.
69
70  Args:
71    urls: list of URLs to scrape
72    outdir: directory to place output
73    size: size of browser window to use
74    pos: position of browser window
75    timeout: amount of time to wait for page to load
76    kwargs: miscellaneous keyword args
77
78  Returns:
79    None if success, else an error string
80  """
81  path = r"c:\program files\internet explorer\iexplore.exe"
82
83  if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
84
85  (iewnd, ieproc, address_bar, render_pane, tab_window) = (
86    InvokeBrowser(path) )
87
88  # Resize and reposition the frame
89  windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane)
90
91  # Visit each URL we're given
92  if type(urls) in types.StringTypes: urls = [urls]
93
94  timedout = False
95
96  for url in urls:
97
98    # Double-click in the address bar, type the name, and press Enter
99    mouse.DoubleClickInWindow(address_bar)
100    keyboard.TypeString(url)
101    keyboard.TypeString("\n")
102
103    # Wait for the page to finish loading
104    load_time = windowing.WaitForThrobber(
105      tab_window, (6, 8, 22, 24), timeout)
106    timedout = load_time < 0
107
108    if timedout:
109      break
110
111    # Scrape the page
112    image = windowing.ScrapeWindow(render_pane)
113
114    # Save to disk
115    if "filename" in kwargs:
116      if callable(kwargs["filename"]):
117        filename = kwargs["filename"](url)
118      else:
119        filename = kwargs["filename"]
120    else:
121      filename = windowing.URLtoFilename(url, outdir, ".bmp")
122    image.save(filename)
123
124  windowing.EndProcess(ieproc)
125
126  if timedout:
127    return "timeout"
128
129
130def Time(urls, size, timeout, **kwargs):
131  """Measure how long it takes to load each of a series of URLs
132
133  Args:
134    urls: list of URLs to time
135    size: size of browser window to use
136    timeout: amount of time to wait for page to load
137    kwargs: miscellaneous keyword args
138
139  Returns:
140    A list of tuples (url, time). "time" can be "crashed" or "timeout"
141  """
142  if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
143  else: path = DEFAULT_PATH
144  proc = None
145
146  # Visit each URL we're given
147  if type(urls) in types.StringTypes: urls = [urls]
148
149  ret = []
150  for url in urls:
151    try:
152      # Invoke the browser if necessary
153      if not proc:
154        (wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
155
156        # Resize and reposition the frame
157        windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
158
159      # Double-click in the address bar, type the name, and press Enter
160      mouse.DoubleClickInWindow(address_bar)
161      keyboard.TypeString(url)
162      keyboard.TypeString("\n")
163
164      # Wait for the page to finish loading
165      load_time = windowing.WaitForThrobber(
166        tab_window, (6, 8, 22, 24), timeout)
167      timedout = load_time < 0
168
169      if timedout:
170        load_time = "timeout"
171
172        # Send an alt-F4 to make the browser close; if this times out,
173        # we've probably got a crash
174        keyboard.TypeString(r"{\4}", use_modifiers=True)
175        if not windowing.WaitForProcessExit(proc, timeout):
176          windowing.EndProcess(proc)
177          load_time = "crashed"
178        proc = None
179    except pywintypes.error:
180      load_time = "crashed"
181      proc = None
182
183    ret.append( (url, load_time) )
184
185  # Send an alt-F4 to make the browser close; if this times out,
186  # we've probably got a crash
187  if proc:
188    keyboard.TypeString(r"{\4}", use_modifiers=True)
189    if not windowing.WaitForProcessExit(proc, timeout):
190      windowing.EndProcess(proc)
191
192  return ret
193
194
195def main():
196  # We're being invoked rather than imported, so run some tests
197  path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11"
198  windowing.PreparePath(path)
199
200  # Scrape three sites and save the results
201  Scrape(
202    ["http://www.microsoft.com",
203     "http://www.google.com",
204     "http://www.sun.com"],
205    path, (1024, 768), (0, 0))
206  return 0
207
208
209if __name__ == "__main__":
210  sys.exit(main())
211