1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import re 9import shutil 10import tempfile 11 12from telemetry.util import cloud_storage 13 14 15class PageSetArchiveInfo(object): 16 def __init__(self, file_path, data, ignore_archive=False): 17 self._file_path = file_path 18 self._base_dir = os.path.dirname(file_path) 19 20 # Ensure directory exists. 21 if not os.path.exists(self._base_dir): 22 os.makedirs(self._base_dir) 23 24 # Download all .wpr files. 25 if not ignore_archive: 26 for archive_path in data['archives']: 27 archive_path = self._WprFileNameToPath(archive_path) 28 try: 29 cloud_storage.GetIfChanged(archive_path) 30 except (cloud_storage.CredentialsError, cloud_storage.PermissionError): 31 if os.path.exists(archive_path): 32 # If the archive exists, assume the user recorded their own and 33 # simply warn. 34 logging.warning('Need credentials to update WPR archive: %s', 35 archive_path) 36 37 # Map from the relative path (as it appears in the metadata file) of the 38 # .wpr file to a list of page names it supports. 39 self._wpr_file_to_page_names = data['archives'] 40 41 # Map from the page name to a relative path (as it appears in the metadata 42 # file) of the .wpr file. 43 self._page_name_to_wpr_file = dict() 44 # Find out the wpr file names for each page. 45 for wpr_file in data['archives']: 46 page_names = data['archives'][wpr_file] 47 for page_name in page_names: 48 self._page_name_to_wpr_file[page_name] = wpr_file 49 self.temp_target_wpr_file_path = None 50 51 @classmethod 52 def FromFile(cls, file_path, ignore_archive=False): 53 if os.path.exists(file_path): 54 with open(file_path, 'r') as f: 55 data = json.load(f) 56 return cls(file_path, data, ignore_archive=ignore_archive) 57 return cls(file_path, {'archives': {}}, ignore_archive=ignore_archive) 58 59 def WprFilePathForPage(self, page): 60 if self.temp_target_wpr_file_path: 61 return self.temp_target_wpr_file_path 62 wpr_file = self._page_name_to_wpr_file.get(page.display_name, None) 63 if wpr_file is None: 64 # Some old page sets always use the URL to identify a page rather than the 65 # display_name, so try to look for that. 66 wpr_file = self._page_name_to_wpr_file.get(page.url, None) 67 if wpr_file: 68 return self._WprFileNameToPath(wpr_file) 69 return None 70 71 def AddNewTemporaryRecording(self, temp_wpr_file_path=None): 72 if temp_wpr_file_path is None: 73 temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp() 74 os.close(temp_wpr_file_handle) 75 self.temp_target_wpr_file_path = temp_wpr_file_path 76 77 def AddRecordedPages(self, pages): 78 if not pages: 79 os.remove(self.temp_target_wpr_file_path) 80 return 81 82 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() 83 for page in pages: 84 self._SetWprFileForPage(page.display_name, target_wpr_file) 85 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path) 86 87 # Update the hash file. 88 with open(target_wpr_file_path + '.sha1', 'wb') as f: 89 f.write(cloud_storage.CalculateHash(target_wpr_file_path)) 90 f.flush() 91 92 self._WriteToFile() 93 self._DeleteAbandonedWprFiles() 94 95 def _DeleteAbandonedWprFiles(self): 96 # Update the metadata so that the abandoned wpr files don't have empty page 97 # name arrays. 98 abandoned_wpr_files = self._AbandonedWprFiles() 99 for wpr_file in abandoned_wpr_files: 100 del self._wpr_file_to_page_names[wpr_file] 101 # Don't fail if we're unable to delete some of the files. 102 wpr_file_path = self._WprFileNameToPath(wpr_file) 103 try: 104 os.remove(wpr_file_path) 105 except Exception: 106 logging.warning('Failed to delete file: %s' % wpr_file_path) 107 108 def _AbandonedWprFiles(self): 109 abandoned_wpr_files = [] 110 for wpr_file, page_names in self._wpr_file_to_page_names.iteritems(): 111 if not page_names: 112 abandoned_wpr_files.append(wpr_file) 113 return abandoned_wpr_files 114 115 def _WriteToFile(self): 116 """Writes the metadata into the file passed as constructor parameter.""" 117 metadata = dict() 118 metadata['description'] = ( 119 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' 120 'hand! Use record_wpr for updating.') 121 metadata['archives'] = self._wpr_file_to_page_names.copy() 122 # Don't write data for abandoned archives. 123 abandoned_wpr_files = self._AbandonedWprFiles() 124 for wpr_file in abandoned_wpr_files: 125 del metadata['archives'][wpr_file] 126 127 with open(self._file_path, 'w') as f: 128 json.dump(metadata, f, indent=4) 129 f.flush() 130 131 def _WprFileNameToPath(self, wpr_file): 132 return os.path.abspath(os.path.join(self._base_dir, wpr_file)) 133 134 def _NextWprFileName(self): 135 """Creates a new file name for a wpr archive file.""" 136 # The names are of the format "some_thing_number.wpr". Read the numbers. 137 highest_number = -1 138 base = None 139 for wpr_file in self._wpr_file_to_page_names: 140 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) 141 if not match: 142 raise Exception('Illegal wpr file name ' + wpr_file) 143 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) 144 if base and match.groupdict()['BASE'] != base: 145 raise Exception('Illegal wpr file name ' + wpr_file + 146 ', doesn\'t begin with ' + base) 147 base = match.groupdict()['BASE'] 148 if not base: 149 # If we're creating a completely new info file, use the base name of the 150 # page set file. 151 base = os.path.splitext(os.path.basename(self._file_path))[0] 152 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) 153 return new_filename, self._WprFileNameToPath(new_filename) 154 155 def _SetWprFileForPage(self, page_name, wpr_file): 156 """For modifying the metadata when we're going to record a new archive.""" 157 old_wpr_file = self._page_name_to_wpr_file.get(page_name, None) 158 if old_wpr_file: 159 self._wpr_file_to_page_names[old_wpr_file].remove(page_name) 160 self._page_name_to_wpr_file[page_name] = wpr_file 161 if wpr_file not in self._wpr_file_to_page_names: 162 self._wpr_file_to_page_names[wpr_file] = [] 163 self._wpr_file_to_page_names[wpr_file].append(page_name) 164