page_set_archive_info.py revision 116680a4aac90f2aa7413d9095a592090648e557
1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import re 9import shutil 10 11from telemetry.util import cloud_storage 12 13 14class PageSetArchiveInfo(object): 15 def __init__(self, file_path, data, ignore_archive=False): 16 self._file_path = file_path 17 self._base_dir = os.path.dirname(file_path) 18 19 # Ensure directory exists. 20 if not os.path.exists(self._base_dir): 21 os.makedirs(self._base_dir) 22 23 # Download all .wpr files. 24 if not ignore_archive: 25 # TODO(tbarzic): Remove this once http://crbug.com/351143 is diagnosed. 26 log_cloud_storage_exception = True 27 for archive_path in data['archives']: 28 archive_path = self._WprFileNameToPath(archive_path) 29 try: 30 cloud_storage.GetIfChanged(archive_path) 31 except (cloud_storage.CredentialsError, 32 cloud_storage.PermissionError) as e: 33 if os.path.exists(archive_path): 34 # If the archive exists, assume the user recorded their own and 35 # simply warn. 36 logging.warning('Need credentials to update WPR archive: %s', 37 archive_path) 38 elif log_cloud_storage_exception: 39 # Log access errors only once, as they should stay the same in other 40 # iterations. 41 log_cloud_storage_exception = False 42 logging.warning('Error getting WPR archive %s: %s ' % 43 (archive_path, str(e))) 44 logging.info( 45 'HOME: "%s"; USER: "%s"' % 46 (os.environ.get('HOME', ''), os.environ.get('USER', ''))) 47 48 # Map from the relative path (as it appears in the metadata file) of the 49 # .wpr file to a list of page names it supports. 50 self._wpr_file_to_page_names = data['archives'] 51 52 # Map from the page name to a relative path (as it appears in the metadata 53 # file) of the .wpr file. 54 self._page_name_to_wpr_file = dict() 55 # Find out the wpr file names for each page. 56 for wpr_file in data['archives']: 57 page_names = data['archives'][wpr_file] 58 for page_name in page_names: 59 self._page_name_to_wpr_file[page_name] = wpr_file 60 self.temp_target_wpr_file_path = None 61 62 @classmethod 63 def FromFile(cls, file_path, ignore_archive=False): 64 if os.path.exists(file_path): 65 with open(file_path, 'r') as f: 66 data = json.load(f) 67 return cls(file_path, data, ignore_archive=ignore_archive) 68 # TODO(tbarzic): Remove this once http://crbug.com/351143 is diagnosed. 69 logging.warning('Page set archives not found: %s' % file_path) 70 return cls(file_path, {'archives': {}}, ignore_archive=ignore_archive) 71 72 def WprFilePathForPage(self, page): 73 if self.temp_target_wpr_file_path: 74 return self.temp_target_wpr_file_path 75 wpr_file = self._page_name_to_wpr_file.get(page.display_name, None) 76 if wpr_file is None: 77 # Some old page sets always use the URL to identify a page rather than the 78 # display_name, so try to look for that. 79 wpr_file = self._page_name_to_wpr_file.get(page.url, None) 80 if wpr_file: 81 return self._WprFileNameToPath(wpr_file) 82 return None 83 84 def AddNewTemporaryRecording(self, temp_target_wpr_file_path): 85 self.temp_target_wpr_file_path = temp_target_wpr_file_path 86 87 def AddRecordedPages(self, pages): 88 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() 89 for page in pages: 90 self._SetWprFileForPage(page.display_name, target_wpr_file) 91 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path) 92 93 # Update the hash file. 94 with open(target_wpr_file_path + '.sha1', 'wb') as f: 95 f.write(cloud_storage.CalculateHash(target_wpr_file_path)) 96 f.flush() 97 98 self._WriteToFile() 99 self._DeleteAbandonedWprFiles() 100 101 def _DeleteAbandonedWprFiles(self): 102 # Update the metadata so that the abandoned wpr files don't have empty page 103 # name arrays. 104 abandoned_wpr_files = self._AbandonedWprFiles() 105 for wpr_file in abandoned_wpr_files: 106 del self._wpr_file_to_page_names[wpr_file] 107 # Don't fail if we're unable to delete some of the files. 108 wpr_file_path = self._WprFileNameToPath(wpr_file) 109 try: 110 os.remove(wpr_file_path) 111 except Exception: 112 logging.warning('Failed to delete file: %s' % wpr_file_path) 113 114 def _AbandonedWprFiles(self): 115 abandoned_wpr_files = [] 116 for wpr_file, page_names in self._wpr_file_to_page_names.iteritems(): 117 if not page_names: 118 abandoned_wpr_files.append(wpr_file) 119 return abandoned_wpr_files 120 121 def _WriteToFile(self): 122 """Writes the metadata into the file passed as constructor parameter.""" 123 metadata = dict() 124 metadata['description'] = ( 125 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' 126 'hand! Use record_wpr for updating.') 127 metadata['archives'] = self._wpr_file_to_page_names.copy() 128 # Don't write data for abandoned archives. 129 abandoned_wpr_files = self._AbandonedWprFiles() 130 for wpr_file in abandoned_wpr_files: 131 del metadata['archives'][wpr_file] 132 133 with open(self._file_path, 'w') as f: 134 json.dump(metadata, f, indent=4) 135 f.flush() 136 137 def _WprFileNameToPath(self, wpr_file): 138 return os.path.abspath(os.path.join(self._base_dir, wpr_file)) 139 140 def _NextWprFileName(self): 141 """Creates a new file name for a wpr archive file.""" 142 # The names are of the format "some_thing_number.wpr". Read the numbers. 143 highest_number = -1 144 base = None 145 for wpr_file in self._wpr_file_to_page_names: 146 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) 147 if not match: 148 raise Exception('Illegal wpr file name ' + wpr_file) 149 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) 150 if base and match.groupdict()['BASE'] != base: 151 raise Exception('Illegal wpr file name ' + wpr_file + 152 ', doesn\'t begin with ' + base) 153 base = match.groupdict()['BASE'] 154 if not base: 155 # If we're creating a completely new info file, use the base name of the 156 # page set file. 157 base = os.path.splitext(os.path.basename(self._file_path))[0] 158 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) 159 return new_filename, self._WprFileNameToPath(new_filename) 160 161 def _SetWprFileForPage(self, page_name, wpr_file): 162 """For modifying the metadata when we're going to record a new archive.""" 163 old_wpr_file = self._page_name_to_wpr_file.get(page_name, None) 164 if old_wpr_file: 165 self._wpr_file_to_page_names[old_wpr_file].remove(page_name) 166 self._page_name_to_wpr_file[page_name] = wpr_file 167 if wpr_file not in self._wpr_file_to_page_names: 168 self._wpr_file_to_page_names[wpr_file] = [] 169 self._wpr_file_to_page_names[wpr_file].append(page_name) 170