1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import csv 6import inspect 7import os 8 9from telemetry.core import util 10from telemetry.page import cloud_storage 11from telemetry.page import page as page_module 12from telemetry.page import page_set_archive_info 13 14 15PUBLIC_BUCKET = cloud_storage.PUBLIC_BUCKET 16PARTNER_BUCKET = cloud_storage.PARTNER_BUCKET 17INTERNAL_BUCKET = cloud_storage.INTERNAL_BUCKET 18 19 20class PageSetError(Exception): 21 pass 22 23 24class PageSet(object): 25 def __init__(self, file_path=None, archive_data_file='', 26 credentials_path=None, user_agent_type=None, 27 make_javascript_deterministic=True, startup_url='', 28 serving_dirs=None, bucket=None): 29 # The default value of file_path is location of the file that define this 30 # page set instance's class. 31 if file_path is None: 32 file_path = inspect.getfile(self.__class__) 33 # Turn pyc file into py files if we can 34 if file_path.endswith('.pyc') and os.path.exists(file_path[:-1]): 35 file_path = file_path[:-1] 36 37 self.file_path = file_path 38 # These attributes can be set dynamically by the page set. 39 self.archive_data_file = archive_data_file 40 self.credentials_path = credentials_path 41 self.user_agent_type = user_agent_type 42 self.make_javascript_deterministic = make_javascript_deterministic 43 self._wpr_archive_info = None 44 self.startup_url = startup_url 45 self.pages = [] 46 self.serving_dirs = set() 47 serving_dirs = [] if serving_dirs is None else serving_dirs 48 # Makes sure that page_set's serving_dirs are absolute paths 49 for sd in serving_dirs: 50 if os.path.isabs(sd): 51 self.serving_dirs.add(os.path.realpath(sd)) 52 else: 53 self.serving_dirs.add(os.path.realpath(os.path.join(self.base_dir, sd))) 54 if self._IsValidPrivacyBucket(bucket): 55 self._bucket = bucket 56 else: 57 raise ValueError("Pageset privacy bucket %s is invalid" % bucket) 58 59 @classmethod 60 def Name(cls): 61 return cls.__module__.split('.')[-1] 62 63 @classmethod 64 def Description(cls): 65 if cls.__doc__: 66 return cls.__doc__.splitlines()[0] 67 else: 68 return '' 69 70 def AddPage(self, page): 71 assert page.page_set is self 72 self.pages.append(page) 73 74 def AddPageWithDefaultRunNavigate(self, page_url): 75 """ Add a simple page with url equals to page_url that contains only default 76 RunNavigateSteps. 77 """ 78 self.AddPage(page_module.Page( 79 page_url, self, self.base_dir)) 80 81 @staticmethod 82 def FromFile(file_path): 83 _, ext_name = os.path.splitext(file_path) 84 if ext_name == '.py': 85 return PageSet.FromPythonFile(file_path) 86 else: 87 raise PageSetError("Pageset %s has unsupported file type" % file_path) 88 89 @staticmethod 90 def FromPythonFile(file_path): 91 page_set_classes = [] 92 module = util.GetPythonPageSetModule(file_path) 93 for m in dir(module): 94 if m.endswith('PageSet') and m != 'PageSet': 95 page_set_classes.append(getattr(module, m)) 96 if len(page_set_classes) != 1: 97 raise PageSetError("Pageset file needs to contain exactly 1 pageset class" 98 " with prefix 'PageSet'") 99 page_set = page_set_classes[0]() 100 for page in page_set.pages: 101 page_class = page.__class__ 102 103 for method_name, method in inspect.getmembers(page_class, 104 predicate=inspect.ismethod): 105 if method_name.startswith("Run"): 106 args, _, _, _ = inspect.getargspec(method) 107 if not (args[0] == "self" and args[1] == "action_runner"): 108 raise PageSetError("""Definition of Run<...> method of all 109pages in %s must be in the form of def Run<...>(self, action_runner):""" 110 % file_path) 111 return page_set 112 113 @staticmethod 114 def _IsValidPrivacyBucket(bucket_name): 115 if not bucket_name: 116 return True 117 if (bucket_name in [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]): 118 return True 119 return False 120 121 @property 122 def base_dir(self): 123 if os.path.isfile(self.file_path): 124 return os.path.dirname(self.file_path) 125 else: 126 return self.file_path 127 128 @property 129 def wpr_archive_info(self): # pylint: disable=E0202 130 """Lazily constructs wpr_archive_info if it's not set and returns it.""" 131 if self.archive_data_file and not self._wpr_archive_info: 132 self._wpr_archive_info = ( 133 page_set_archive_info.PageSetArchiveInfo.FromFile( 134 os.path.join(self.base_dir, self.archive_data_file))) 135 return self._wpr_archive_info 136 137 @property 138 def bucket(self): 139 return self._bucket 140 141 @wpr_archive_info.setter 142 def wpr_archive_info(self, value): # pylint: disable=E0202 143 self._wpr_archive_info = value 144 145 def ContainsOnlyFileURLs(self): 146 for page in self.pages: 147 if not page.is_file: 148 return False 149 return True 150 151 def ReorderPageSet(self, results_file): 152 """Reorders this page set based on the results of a past run.""" 153 page_set_dict = {} 154 for page in self.pages: 155 page_set_dict[page.url] = page 156 157 pages = [] 158 with open(results_file, 'rb') as csv_file: 159 csv_reader = csv.reader(csv_file) 160 csv_header = csv_reader.next() 161 162 if 'url' not in csv_header: 163 raise Exception('Unusable results_file.') 164 165 url_index = csv_header.index('url') 166 167 for csv_row in csv_reader: 168 if csv_row[url_index] in page_set_dict: 169 self.AddPage(page_set_dict[csv_row[url_index]]) 170 else: 171 raise Exception('Unusable results_file.') 172 173 return pages 174 175 def WprFilePathForPage(self, page): 176 if not self.wpr_archive_info: 177 return None 178 return self.wpr_archive_info.WprFilePathForPage(page) 179 180 def __iter__(self): 181 return self.pages.__iter__() 182 183 def __len__(self): 184 return len(self.pages) 185 186 def __getitem__(self, key): 187 return self.pages[key] 188 189 def __setitem__(self, key, value): 190 self.pages[key] = value 191