1# Copyright (c) 2011 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Utility to use a browser to visit multiple URLs.
6
7Prerequisites:
8  1. The command_line package from tools/site_compare
9  2. Either the IE BHO or Firefox extension (or both)
10
11Installation:
12  1. Build the IE BHO, or call regsvr32 on a prebuilt binary
13  2. Add a file called "measurepageloadtimeextension@google.com" to
14     the default Firefox profile directory under extensions, containing
15     the path to the Firefox extension root
16
17Invoke with the command line arguments as documented within
18the command line.
19"""
20
21import command_line
22import scrapers
23import socket
24import time
25
26from drivers import windowing
27
28# Constants
29MAX_URL = 1024
30PORT = 42492
31
32def SetupIterationCommandLine(cmd):
33  """Adds the necessary flags for iteration to a command.
34
35  Args:
36    cmd: an object created by cmdline.AddCommand
37  """
38  cmd.AddArgument(
39    ["-b", "--browser"], "Browser to use (ie, firefox, chrome)",
40    type="string", required=True)
41  cmd.AddArgument(
42    ["-b1v", "--browserver"], "Version of browser", metaname="VERSION")
43  cmd.AddArgument(
44    ["-p", "--browserpath"], "Path to browser.",
45    type="string", required=False)
46  cmd.AddArgument(
47    ["-u", "--url"], "URL to visit")
48  cmd.AddArgument(
49    ["-l", "--list"], "File containing list of URLs to visit", type="readfile")
50  cmd.AddMutualExclusion(["--url", "--list"])
51  cmd.AddArgument(
52    ["-s", "--startline"], "First line of URL list", type="int")
53  cmd.AddArgument(
54    ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
55  cmd.AddArgument(
56    ["-c", "--count"], "Number of lines of URL file to use", type="int")
57  cmd.AddDependency("--startline", "--list")
58  cmd.AddRequiredGroup(["--url", "--list"])
59  cmd.AddDependency("--endline", "--list")
60  cmd.AddDependency("--count", "--list")
61  cmd.AddMutualExclusion(["--count", "--endline"])
62  cmd.AddDependency("--count", "--startline")
63  cmd.AddArgument(
64    ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
65    "finish loading",
66    type="int", default=300)
67  cmd.AddArgument(
68    ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
69
70
71def Iterate(command, iteration_func):
72  """Iterates over a list of URLs, calling a function on each.
73
74  Args:
75    command: the command line containing the iteration flags
76    iteration_func: called for each URL with (proc, wnd, url, result)
77  """
78
79  # Retrieve the browser scraper to use to invoke the browser
80  scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
81
82  def AttachToBrowser(path, timeout):
83    """Invoke the browser process and connect to the socket."""
84    (proc, frame, wnd) = scraper.GetBrowser(path)
85
86    if not wnd: raise ValueError("Could not invoke browser.")
87
88    # Try to connect the socket. If it fails, wait and try
89    # again. Do this for ten seconds
90    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
91
92    for attempt in xrange(10):
93      try:
94        s.connect(("localhost", PORT))
95      except socket.error:
96        time.sleep(1)
97        continue
98      break
99
100    try:
101      s.getpeername()
102    except socket.error:
103      raise ValueError("Could not connect to browser")
104
105    if command["--size"]:
106      # Resize and reposition the frame
107      windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd)
108
109    s.settimeout(timeout)
110
111    Iterate.proc = proc
112    Iterate.wnd = wnd
113    Iterate.s = s
114
115  def DetachFromBrowser():
116    """Close the socket and kill the process if necessary."""
117    if Iterate.s:
118      Iterate.s.close()
119      Iterate.s = None
120
121    if Iterate.proc:
122      if not windowing.WaitForProcessExit(Iterate.proc, 0):
123        try:
124          windowing.EndProcess(Iterate.proc)
125          windowing.WaitForProcessExit(Iterate.proc, 0)
126        except pywintypes.error:
127          # Exception here most likely means the process died on its own
128          pass
129      Iterate.proc = None
130
131  if command["--browserpath"]:
132    browser = command["--browserpath"]
133  else:
134    browser = None
135
136  # Read the URLs from the file
137  if command["--url"]:
138    url_list = [command["--url"]]
139  else:
140    startline = command["--startline"]
141    if command["--count"]:
142      endline = startline+command["--count"]
143    else:
144      endline = command["--endline"]
145
146    url_list = []
147    file = open(command["--list"], "r")
148
149    for line in xrange(startline-1):
150      file.readline()
151
152    for line in xrange(endline-startline):
153      url_list.append(file.readline().strip())
154
155  timeout = command["--timeout"]
156
157  # Loop through the URLs and send them through the socket
158  Iterate.s    = None
159  Iterate.proc = None
160  Iterate.wnd  = None
161
162  for url in url_list:
163    # Invoke the browser if necessary
164    if not Iterate.proc:
165      AttachToBrowser(browser, timeout)
166    # Send the URL and wait for a response
167    Iterate.s.send(url + "\n")
168
169    response = ""
170
171    while (response.find("\n") < 0):
172
173      try:
174        recv = Iterate.s.recv(MAX_URL)
175        response = response + recv
176
177        # Workaround for an oddity: when Firefox closes
178        # gracefully, somehow Python doesn't detect it.
179        # (Telnet does)
180        if not recv:
181          raise socket.error
182
183      except socket.timeout:
184        response = url + ",hang\n"
185        DetachFromBrowser()
186      except socket.error:
187        # If there was a socket error, it's probably a crash
188        response = url + ",crash\n"
189        DetachFromBrowser()
190
191      # If we received a timeout response, restart the browser
192      if response[-9:] == ",timeout\n":
193        DetachFromBrowser()
194
195      # Invoke the iteration function
196      iteration_func(url, Iterate.proc, Iterate.wnd, response)
197
198  # We're done
199  DetachFromBrowser()
200