15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""Does scraping for versions of Chrome from 0.1.101.0 up.""" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from drivers import windowing 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import chromebase 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Default version 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)version = "0.1.101.0" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def GetChromeRenderPane(wnd): 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return windowing.FindChildWindow(wnd, "Chrome_TabContents") 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Invoke a browser, send it to a series of URLs, and save its output. 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Args: 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) urls: list of URLs to scrape 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outdir: directory to place output 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size: size of browser window to use 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pos: position of browser window 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) timeout: amount of time to wait for page to load 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kwargs: miscellaneous keyword args 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Returns: 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) None if succeeded, else an error code 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """ 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) chromebase.GetChromeRenderPane = GetChromeRenderPane 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return chromebase.Scrape(urls, outdir, size, pos, timeout, kwargs) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def Time(urls, size, timeout, **kwargs): 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Forwards the Time command to chromebase.""" 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) chromebase.GetChromeRenderPane = GetChromeRenderPane 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return chromebase.Time(urls, size, timeout, kwargs) 43