1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import csv
6import inspect
7import os
8
9from telemetry.page import page as page_module
10from telemetry.page import page_set_archive_info
11from telemetry.util import cloud_storage
12
13PUBLIC_BUCKET = cloud_storage.PUBLIC_BUCKET
14PARTNER_BUCKET = cloud_storage.PARTNER_BUCKET
15INTERNAL_BUCKET = cloud_storage.INTERNAL_BUCKET
16
17
18class PageSetError(Exception):
19  pass
20
21
22class PageSet(object):
23  def __init__(self, file_path=None, archive_data_file='',
24               credentials_path=None, user_agent_type=None,
25               make_javascript_deterministic=True, startup_url='',
26               serving_dirs=None, bucket=None):
27    # The default value of file_path is location of the file that define this
28    # page set instance's class.
29    if file_path is None:
30      file_path = inspect.getfile(self.__class__)
31      # Turn pyc file into py files if we can
32      if file_path.endswith('.pyc') and os.path.exists(file_path[:-1]):
33        file_path = file_path[:-1]
34
35    self.file_path = file_path
36    # These attributes can be set dynamically by the page set.
37    self.archive_data_file = archive_data_file
38    self.credentials_path = credentials_path
39    self.user_agent_type = user_agent_type
40    self.make_javascript_deterministic = make_javascript_deterministic
41    self._wpr_archive_info = None
42    self.startup_url = startup_url
43    self.pages = []
44    self.serving_dirs = set()
45    serving_dirs = [] if serving_dirs is None else serving_dirs
46    # Makes sure that page_set's serving_dirs are absolute paths
47    for sd in serving_dirs:
48      if os.path.isabs(sd):
49        self.serving_dirs.add(os.path.realpath(sd))
50      else:
51        self.serving_dirs.add(os.path.realpath(os.path.join(self.base_dir, sd)))
52    if self._IsValidPrivacyBucket(bucket):
53      self._bucket = bucket
54    else:
55      raise ValueError("Pageset privacy bucket %s is invalid" % bucket)
56
57  @classmethod
58  def Name(cls):
59    return cls.__module__.split('.')[-1]
60
61  @classmethod
62  def Description(cls):
63    if cls.__doc__:
64      return cls.__doc__.splitlines()[0]
65    else:
66      return ''
67
68  def AddPage(self, page):
69    assert page.page_set is self
70    self.pages.append(page)
71
72  def AddPageWithDefaultRunNavigate(self, page_url):
73    """ Add a simple page with url equals to page_url that contains only default
74    RunNavigateSteps.
75    """
76    self.AddPage(page_module.Page(
77      page_url, self, self.base_dir))
78
79  @staticmethod
80  def _IsValidPrivacyBucket(bucket_name):
81    if not bucket_name:
82      return True
83    if (bucket_name in [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]):
84      return True
85    return False
86
87  @property
88  def base_dir(self):
89    if os.path.isfile(self.file_path):
90      return os.path.dirname(self.file_path)
91    else:
92      return self.file_path
93
94  @property
95  def wpr_archive_info(self):  # pylint: disable=E0202
96    """Lazily constructs wpr_archive_info if it's not set and returns it."""
97    if self.archive_data_file and not self._wpr_archive_info:
98      self._wpr_archive_info = (
99          page_set_archive_info.PageSetArchiveInfo.FromFile(
100            os.path.join(self.base_dir, self.archive_data_file)))
101    return self._wpr_archive_info
102
103  @property
104  def bucket(self):
105    return self._bucket
106
107  @wpr_archive_info.setter
108  def wpr_archive_info(self, value):  # pylint: disable=E0202
109    self._wpr_archive_info = value
110
111  def ContainsOnlyFileURLs(self):
112    for page in self.pages:
113      if not page.is_file:
114        return False
115    return True
116
117  def ReorderPageSet(self, results_file):
118    """Reorders this page set based on the results of a past run."""
119    page_set_dict = {}
120    for page in self.pages:
121      page_set_dict[page.url] = page
122
123    pages = []
124    with open(results_file, 'rb') as csv_file:
125      csv_reader = csv.reader(csv_file)
126      csv_header = csv_reader.next()
127
128      if 'url' not in csv_header:
129        raise Exception('Unusable results_file.')
130
131      url_index = csv_header.index('url')
132
133      for csv_row in csv_reader:
134        if csv_row[url_index] in page_set_dict:
135          self.AddPage(page_set_dict[csv_row[url_index]])
136        else:
137          raise Exception('Unusable results_file.')
138
139    return pages
140
141  def WprFilePathForPage(self, page):
142    if not self.wpr_archive_info:
143      return None
144    return self.wpr_archive_info.WprFilePathForPage(page)
145
146  def __iter__(self):
147    return self.pages.__iter__()
148
149  def __len__(self):
150    return len(self.pages)
151
152  def __getitem__(self, key):
153    return self.pages[key]
154
155  def __setitem__(self, key, value):
156    self.pages[key] = value
157