1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8import re
9import shutil
10import tempfile
11
12from telemetry.util import cloud_storage
13
14
15class PageSetArchiveInfo(object):
16  def __init__(self, file_path, data, ignore_archive=False):
17    self._file_path = file_path
18    self._base_dir = os.path.dirname(file_path)
19
20    # Ensure directory exists.
21    if not os.path.exists(self._base_dir):
22      os.makedirs(self._base_dir)
23
24    # Download all .wpr files.
25    if not ignore_archive:
26      for archive_path in data['archives']:
27        archive_path = self._WprFileNameToPath(archive_path)
28        try:
29          cloud_storage.GetIfChanged(archive_path)
30        except (cloud_storage.CredentialsError, cloud_storage.PermissionError):
31          if os.path.exists(archive_path):
32            # If the archive exists, assume the user recorded their own and
33            # simply warn.
34            logging.warning('Need credentials to update WPR archive: %s',
35                            archive_path)
36
37    # Map from the relative path (as it appears in the metadata file) of the
38    # .wpr file to a list of page names it supports.
39    self._wpr_file_to_page_names = data['archives']
40
41    # Map from the page name to a relative path (as it appears in the metadata
42    # file) of the .wpr file.
43    self._page_name_to_wpr_file = dict()
44    # Find out the wpr file names for each page.
45    for wpr_file in data['archives']:
46      page_names = data['archives'][wpr_file]
47      for page_name in page_names:
48        self._page_name_to_wpr_file[page_name] = wpr_file
49    self.temp_target_wpr_file_path = None
50
51  @classmethod
52  def FromFile(cls, file_path, ignore_archive=False):
53    if os.path.exists(file_path):
54      with open(file_path, 'r') as f:
55        data = json.load(f)
56        return cls(file_path, data, ignore_archive=ignore_archive)
57    return cls(file_path, {'archives': {}}, ignore_archive=ignore_archive)
58
59  def WprFilePathForPage(self, page):
60    if self.temp_target_wpr_file_path:
61      return self.temp_target_wpr_file_path
62    wpr_file = self._page_name_to_wpr_file.get(page.display_name, None)
63    if wpr_file is None:
64      # Some old page sets always use the URL to identify a page rather than the
65      # display_name, so try to look for that.
66      wpr_file = self._page_name_to_wpr_file.get(page.url, None)
67    if wpr_file:
68      return self._WprFileNameToPath(wpr_file)
69    return None
70
71  def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
72    if temp_wpr_file_path is None:
73      temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
74      os.close(temp_wpr_file_handle)
75    self.temp_target_wpr_file_path = temp_wpr_file_path
76
77  def AddRecordedPages(self, pages):
78    if not pages:
79      os.remove(self.temp_target_wpr_file_path)
80      return
81
82    (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
83    for page in pages:
84      self._SetWprFileForPage(page.display_name, target_wpr_file)
85    shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
86
87    # Update the hash file.
88    with open(target_wpr_file_path + '.sha1', 'wb') as f:
89      f.write(cloud_storage.CalculateHash(target_wpr_file_path))
90      f.flush()
91
92    self._WriteToFile()
93    self._DeleteAbandonedWprFiles()
94
95  def _DeleteAbandonedWprFiles(self):
96    # Update the metadata so that the abandoned wpr files don't have empty page
97    # name arrays.
98    abandoned_wpr_files = self._AbandonedWprFiles()
99    for wpr_file in abandoned_wpr_files:
100      del self._wpr_file_to_page_names[wpr_file]
101      # Don't fail if we're unable to delete some of the files.
102      wpr_file_path = self._WprFileNameToPath(wpr_file)
103      try:
104        os.remove(wpr_file_path)
105      except Exception:
106        logging.warning('Failed to delete file: %s' % wpr_file_path)
107
108  def _AbandonedWprFiles(self):
109    abandoned_wpr_files = []
110    for wpr_file, page_names in self._wpr_file_to_page_names.iteritems():
111      if not page_names:
112        abandoned_wpr_files.append(wpr_file)
113    return abandoned_wpr_files
114
115  def _WriteToFile(self):
116    """Writes the metadata into the file passed as constructor parameter."""
117    metadata = dict()
118    metadata['description'] = (
119        'Describes the Web Page Replay archives for a page set. Don\'t edit by '
120        'hand! Use record_wpr for updating.')
121    metadata['archives'] = self._wpr_file_to_page_names.copy()
122    # Don't write data for abandoned archives.
123    abandoned_wpr_files = self._AbandonedWprFiles()
124    for wpr_file in abandoned_wpr_files:
125      del metadata['archives'][wpr_file]
126
127    with open(self._file_path, 'w') as f:
128      json.dump(metadata, f, indent=4)
129      f.flush()
130
131  def _WprFileNameToPath(self, wpr_file):
132    return os.path.abspath(os.path.join(self._base_dir, wpr_file))
133
134  def _NextWprFileName(self):
135    """Creates a new file name for a wpr archive file."""
136    # The names are of the format "some_thing_number.wpr". Read the numbers.
137    highest_number = -1
138    base = None
139    for wpr_file in self._wpr_file_to_page_names:
140      match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
141      if not match:
142        raise Exception('Illegal wpr file name ' + wpr_file)
143      highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
144      if base and match.groupdict()['BASE'] != base:
145        raise Exception('Illegal wpr file name ' + wpr_file +
146                        ', doesn\'t begin with ' + base)
147      base = match.groupdict()['BASE']
148    if not base:
149      # If we're creating a completely new info file, use the base name of the
150      # page set file.
151      base = os.path.splitext(os.path.basename(self._file_path))[0]
152    new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
153    return new_filename, self._WprFileNameToPath(new_filename)
154
155  def _SetWprFileForPage(self, page_name, wpr_file):
156    """For modifying the metadata when we're going to record a new archive."""
157    old_wpr_file = self._page_name_to_wpr_file.get(page_name, None)
158    if old_wpr_file:
159      self._wpr_file_to_page_names[old_wpr_file].remove(page_name)
160    self._page_name_to_wpr_file[page_name] = wpr_file
161    if wpr_file not in self._wpr_file_to_page_names:
162      self._wpr_file_to_page_names[wpr_file] = []
163    self._wpr_file_to_page_names[wpr_file].append(page_name)
164