1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import csv
6import inspect
7import os
8
9from telemetry.core import util
10from telemetry.page import cloud_storage
11from telemetry.page import page as page_module
12from telemetry.page import page_set_archive_info
13
14
15PUBLIC_BUCKET = cloud_storage.PUBLIC_BUCKET
16PARTNER_BUCKET = cloud_storage.PARTNER_BUCKET
17INTERNAL_BUCKET = cloud_storage.INTERNAL_BUCKET
18
19
20class PageSetError(Exception):
21  pass
22
23
24class PageSet(object):
25  def __init__(self, file_path=None, archive_data_file='',
26               credentials_path=None, user_agent_type=None,
27               make_javascript_deterministic=True, startup_url='',
28               serving_dirs=None, bucket=None):
29    # The default value of file_path is location of the file that define this
30    # page set instance's class.
31    if file_path is None:
32      file_path = inspect.getfile(self.__class__)
33      # Turn pyc file into py files if we can
34      if file_path.endswith('.pyc') and os.path.exists(file_path[:-1]):
35        file_path = file_path[:-1]
36
37    self.file_path = file_path
38    # These attributes can be set dynamically by the page set.
39    self.archive_data_file = archive_data_file
40    self.credentials_path = credentials_path
41    self.user_agent_type = user_agent_type
42    self.make_javascript_deterministic = make_javascript_deterministic
43    self._wpr_archive_info = None
44    self.startup_url = startup_url
45    self.pages = []
46    self.serving_dirs = set()
47    serving_dirs = [] if serving_dirs is None else serving_dirs
48    # Makes sure that page_set's serving_dirs are absolute paths
49    for sd in serving_dirs:
50      if os.path.isabs(sd):
51        self.serving_dirs.add(os.path.realpath(sd))
52      else:
53        self.serving_dirs.add(os.path.realpath(os.path.join(self.base_dir, sd)))
54    if self._IsValidPrivacyBucket(bucket):
55      self._bucket = bucket
56    else:
57      raise ValueError("Pageset privacy bucket %s is invalid" % bucket)
58
59  @classmethod
60  def Name(cls):
61    return cls.__module__.split('.')[-1]
62
63  @classmethod
64  def Description(cls):
65    if cls.__doc__:
66      return cls.__doc__.splitlines()[0]
67    else:
68      return ''
69
70  def AddPage(self, page):
71    assert page.page_set is self
72    self.pages.append(page)
73
74  def AddPageWithDefaultRunNavigate(self, page_url):
75    """ Add a simple page with url equals to page_url that contains only default
76    RunNavigateSteps.
77    """
78    self.AddPage(page_module.Page(
79      page_url, self, self.base_dir))
80
81  @staticmethod
82  def FromFile(file_path):
83    _, ext_name = os.path.splitext(file_path)
84    if ext_name == '.py':
85      return PageSet.FromPythonFile(file_path)
86    else:
87      raise PageSetError("Pageset %s has unsupported file type" % file_path)
88
89  @staticmethod
90  def FromPythonFile(file_path):
91    page_set_classes = []
92    module = util.GetPythonPageSetModule(file_path)
93    for m in dir(module):
94      if m.endswith('PageSet') and m != 'PageSet':
95        page_set_classes.append(getattr(module, m))
96    if len(page_set_classes) != 1:
97      raise PageSetError("Pageset file needs to contain exactly 1 pageset class"
98                         " with prefix 'PageSet'")
99    page_set = page_set_classes[0]()
100    for page in page_set.pages:
101      page_class = page.__class__
102
103      for method_name, method in inspect.getmembers(page_class,
104                                                    predicate=inspect.ismethod):
105        if method_name.startswith("Run"):
106          args, _, _, _ = inspect.getargspec(method)
107          if not (args[0] == "self" and args[1] == "action_runner"):
108            raise PageSetError("""Definition of Run<...> method of all
109pages in %s must be in the form of def Run<...>(self, action_runner):"""
110                                     % file_path)
111    return page_set
112
113  @staticmethod
114  def _IsValidPrivacyBucket(bucket_name):
115    if not bucket_name:
116      return True
117    if (bucket_name in [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]):
118      return True
119    return False
120
121  @property
122  def base_dir(self):
123    if os.path.isfile(self.file_path):
124      return os.path.dirname(self.file_path)
125    else:
126      return self.file_path
127
128  @property
129  def wpr_archive_info(self):  # pylint: disable=E0202
130    """Lazily constructs wpr_archive_info if it's not set and returns it."""
131    if self.archive_data_file and not self._wpr_archive_info:
132      self._wpr_archive_info = (
133          page_set_archive_info.PageSetArchiveInfo.FromFile(
134            os.path.join(self.base_dir, self.archive_data_file)))
135    return self._wpr_archive_info
136
137  @property
138  def bucket(self):
139    return self._bucket
140
141  @wpr_archive_info.setter
142  def wpr_archive_info(self, value):  # pylint: disable=E0202
143    self._wpr_archive_info = value
144
145  def ContainsOnlyFileURLs(self):
146    for page in self.pages:
147      if not page.is_file:
148        return False
149    return True
150
151  def ReorderPageSet(self, results_file):
152    """Reorders this page set based on the results of a past run."""
153    page_set_dict = {}
154    for page in self.pages:
155      page_set_dict[page.url] = page
156
157    pages = []
158    with open(results_file, 'rb') as csv_file:
159      csv_reader = csv.reader(csv_file)
160      csv_header = csv_reader.next()
161
162      if 'url' not in csv_header:
163        raise Exception('Unusable results_file.')
164
165      url_index = csv_header.index('url')
166
167      for csv_row in csv_reader:
168        if csv_row[url_index] in page_set_dict:
169          self.AddPage(page_set_dict[csv_row[url_index]])
170        else:
171          raise Exception('Unusable results_file.')
172
173    return pages
174
175  def WprFilePathForPage(self, page):
176    if not self.wpr_archive_info:
177      return None
178    return self.wpr_archive_info.WprFilePathForPage(page)
179
180  def __iter__(self):
181    return self.pages.__iter__()
182
183  def __len__(self):
184    return len(self.pages)
185
186  def __getitem__(self, key):
187    return self.pages[key]
188
189  def __setitem__(self, key, value):
190    self.pages[key] = value
191