utils.py revision e2e995d750616022e2d5cfeda5d9eb2bcd78df2d
1#!/usr/bin/python 2# Copyright 2017 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" 7This is a utility to build a summary of the given directory. and save to a json 8file. 9 10Example usage: 11 result_utils.py -p path 12 13The content of the json file looks like: 14{'default': {'/D': {'control': {'/S': 734}, 15 'debug': {'/D': {'client.0.DEBUG': {'/S': 5698}, 16 'client.0.ERROR': {'/S': 254}, 17 'client.0.INFO': {'/S': 1020}, 18 'client.0.WARNING': {'/S': 242}}, 19 '/S': 7214} 20 }, 21 '/S': 7948 22 } 23} 24""" 25 26import argparse 27import copy 28import glob 29import json 30import logging 31import os 32import time 33 34import utils_lib 35 36 37# Do NOT import autotest_lib modules here. This module can be executed without 38# dependency on other autotest modules. This is to keep the logic of result 39# trimming on the server side, instead of depending on the autotest client 40# module. 41 42DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json' 43# Minimum disk space should be available after saving the summary file. 44MIN_FREE_DISK_BYTES = 10 * 1024 * 1024 45 46# Autotest uses some state files to track process running state. The files are 47# deleted from test results. Therefore, these files can be ignored. 48FILES_TO_IGNORE = set([ 49 'control.autoserv.state' 50]) 51 52def get_unique_dir_summary_file(path): 53 """Get a unique file path to save the directory summary json string. 54 55 @param path: The directory path to save the summary file to. 56 """ 57 summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time() 58 # Make sure the summary file name is unique. 59 file_name = os.path.join(path, summary_file) 60 if os.path.exists(file_name): 61 count = 1 62 name, ext = os.path.splitext(summary_file) 63 while os.path.exists(file_name): 64 file_name = os.path.join(path, '%s_%s%s' % (name, count, ext)) 65 count += 1 66 return file_name 67 68 69def get_dir_summary(path, top_dir, all_dirs=set()): 70 """Get the directory summary for the given path. 71 72 @param path: The directory to collect summary. 73 @param top_dir: The top directory to collect summary. This is to check if a 74 directory is a subdir of the original directory to collect summary. 75 @param all_dirs: A set of paths that have been collected. This is to prevent 76 infinite recursive call caused by symlink. 77 78 @return: A dictionary of the directory summary. 79 """ 80 dir_info = {} 81 dir_info[utils_lib.ORIGINAL_SIZE_BYTES] = 0 82 summary = {os.path.basename(path): dir_info} 83 84 if os.path.isfile(path): 85 dir_info[utils_lib.ORIGINAL_SIZE_BYTES] = os.stat(path).st_size 86 else: 87 dir_info[utils_lib.DIRS] = {} 88 real_path = os.path.realpath(path) 89 # The assumption here is that results are copied back to drone by 90 # copying the symlink, not the content, which is true with currently 91 # used rsync in cros_host.get_file call. 92 # Skip scanning the child folders if any of following condition is true: 93 # 1. The directory is a symlink and link to a folder under `top_dir`. 94 # 2. The directory was scanned already. 95 if ((os.path.islink(path) and real_path.startswith(top_dir)) or 96 real_path in all_dirs): 97 return summary 98 99 all_dirs.add(real_path) 100 for f in sorted(os.listdir(path)): 101 f_summary = get_dir_summary(os.path.join(path, f), top_dir, 102 all_dirs) 103 dir_info[utils_lib.DIRS][f] = f_summary[f] 104 dir_info[utils_lib.ORIGINAL_SIZE_BYTES] += ( 105 f_summary[f][utils_lib.ORIGINAL_SIZE_BYTES]) 106 107 return summary 108 109 110def build_summary_json(path): 111 """Build summary of files in the given path and return a json string. 112 113 @param path: The directory to build summary. 114 @return: A json string of the directory summary. 115 @raise IOError: If the given path doesn't exist. 116 """ 117 if not os.path.exists(path): 118 raise IOError('Path %s does not exist.' % path) 119 120 if not os.path.isdir(path): 121 raise ValueError('The given path %s is a file. It must be a ' 122 'directory.' % path) 123 124 # Make sure the path ends with `/` so the root key of summary json is always 125 # utils_lib.ROOT_DIR ('') 126 if not path.endswith(os.sep): 127 path = path + os.sep 128 129 return get_dir_summary(path, top_dir=path) 130 131 132def _update_sizes(entry): 133 """Update a directory entry's sizes. 134 135 Values of ORIGINAL_SIZE_BYTES, TRIMMED_SIZE_BYTES and COLLECTED_SIZE_BYTES 136 are re-calculated based on the files under the directory. If the entry is a 137 file, skip the updating. 138 139 @param entry: A dict of directory entry in a summary. 140 """ 141 if utils_lib.DIRS not in entry: 142 return 143 144 entry[utils_lib.ORIGINAL_SIZE_BYTES] = sum([ 145 entry[utils_lib.DIRS][s][utils_lib.ORIGINAL_SIZE_BYTES] 146 for s in entry[utils_lib.DIRS]]) 147 # Before trimming is implemented, COLLECTED_SIZE_BYTES and 148 # TRIMMED_SIZE_BYTES have the same value of ORIGINAL_SIZE_BYTES. 149 entry[utils_lib.COLLECTED_SIZE_BYTES] = sum([ 150 entry[utils_lib.DIRS][s].get( 151 utils_lib.COLLECTED_SIZE_BYTES, 152 entry[utils_lib.DIRS][s].get( 153 utils_lib.TRIMMED_SIZE_BYTES, 154 entry[utils_lib.DIRS][s][utils_lib.ORIGINAL_SIZE_BYTES])) 155 for s in entry[utils_lib.DIRS]]) 156 entry[utils_lib.TRIMMED_SIZE_BYTES] = sum([ 157 entry[utils_lib.DIRS][s].get( 158 utils_lib.TRIMMED_SIZE_BYTES, 159 entry[utils_lib.DIRS][s][utils_lib.ORIGINAL_SIZE_BYTES]) 160 for s in entry[utils_lib.DIRS]]) 161 162 163def _delete_missing_entries(summary_old, summary_new): 164 """Delete files/directories only exists in old summary. 165 166 When the new summary is final, i.e., it's built from the final result 167 directory, files or directories missing are considered to be deleted and 168 trimmed to size 0. 169 170 @param summary_old: Old directory summary. 171 @param summary_new: New directory summary. 172 """ 173 for name in summary_old.keys(): 174 if name not in summary_new: 175 if utils_lib.DIRS in summary_old[name]: 176 # Trim sub-directories. 177 _delete_missing_entries(summary_old[name][utils_lib.DIRS], {}) 178 _update_sizes(summary_old[name]) 179 elif name in FILES_TO_IGNORE: 180 # Remove the file from the summary as it can be ignored. 181 del summary_old[name] 182 else: 183 # Before setting the trimmed size to 0, update the collected 184 # size if it's not set yet. 185 if utils_lib.COLLECTED_SIZE_BYTES not in summary_old[name]: 186 trimmed_size = summary_old[name].get( 187 utils_lib.TRIMMED_SIZE_BYTES, 188 summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES]) 189 summary_old[name][utils_lib.COLLECTED_SIZE_BYTES] = ( 190 trimmed_size) 191 summary_old[name][utils_lib.TRIMMED_SIZE_BYTES] = 0 192 elif utils_lib.DIRS in summary_old[name]: 193 _delete_missing_entries(summary_old[name][utils_lib.DIRS], 194 summary_new[name][utils_lib.DIRS]) 195 _update_sizes(summary_old[name]) 196 _update_sizes(summary_old) 197 198 199def _merge(summary_old, summary_new, is_final=False): 200 """Merge a new directory summary to an old one. 201 202 Update the old directory summary with the new summary. Also calculate the 203 total size of results collected from the client side. 204 205 When merging with previously collected results, any results not existing in 206 the new summary or files with size different from the new files collected 207 are considered as extra results collected or overwritten by the new results. 208 Therefore, the size of the collected result should include such files, and 209 the COLLECTED_SIZE_BYTES can be larger than TRIMMED_SIZE_BYTES. 210 As an example: 211 summary_old: {'file1': {TRIMMED_SIZE_BYTES: 1000, 212 ORIGINAL_SIZE_BYTES: 1000, 213 COLLECTED_SIZE_BYTES: 1000}} 214 This means a result `file1` of original size 1KB was collected with size of 215 1KB byte. 216 summary_new: {'file1': {TRIMMED_SIZE_BYTES: 1000, 217 ORIGINAL_SIZE_BYTES: 2000, 218 COLLECTED_SIZE_BYTES: 1000}} 219 This means a result `file1` of 2KB was trimmed down to 1KB and was collected 220 with size of 1KB byte. 221 Note that the second result collection has an updated result `file1` 222 (because of the different ORIGINAL_SIZE_BYTES), and it needs to be rsync-ed 223 to the drone. Therefore, the merged summary will be: 224 {'file1': {TRIMMED_SIZE_BYTES: 1000, 225 ORIGINAL_SIZE_BYTES: 2000, 226 COLLECTED_SIZE_BYTES: 2000}} 227 Note that: 228 * TRIMMED_SIZE_BYTES is still at 1KB, which reflects the actual size of the 229 file be collected. 230 * ORIGINAL_SIZE_BYTES is updated to 2KB, which is the size of the file in 231 the new result `file1`. 232 * COLLECTED_SIZE_BYTES is 2KB because rsync will copy `file1` twice as it's 233 changed. 234 235 @param summary_old: Old directory summary. 236 @param summary_new: New directory summary. 237 @param is_final: True if summary_new is built from the final result folder. 238 Default is set to False. 239 @return: A tuple of (bytes_diff, merged_summary): 240 bytes_diff: The size of results collected based on the diff of the 241 old summary and the new summary. 242 merged_summary: Merged directory summary. 243 """ 244 for name in summary_new: 245 if not name in summary_old: 246 # A file/dir exists in new client dir, but not in the old one, which 247 # means that the file or a directory is newly collected. 248 summary_old[name] = copy.deepcopy(summary_new[name]) 249 elif utils_lib.DIRS in summary_new[name]: 250 # `name` is a directory in new summary, merge the directories of the 251 # old and new summaries under `name`. 252 253 if utils_lib.DIRS not in summary_old[name]: 254 # If `name` is a file in old summary but a directory in new 255 # summary, the file in the old summary will be overwritten by 256 # the new directory by rsync. Therefore, force it to be an empty 257 # directory in old summary, so that the new directory can be 258 # merged. 259 summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES] = 0 260 summary_old[name][utils_lib.TRIMMED_SIZE_BYTES] = 0 261 summary_old[name][utils_lib.COLLECTED_SIZE_BYTES] = 0 262 summary_old[name][utils_lib.DIRS] = {} 263 264 _merge(summary_old[name][utils_lib.DIRS], 265 summary_new[name][utils_lib.DIRS], is_final) 266 else: 267 # `name` is a file. Compare the original size, if they are 268 # different, the file was overwritten, so increment the 269 # COLLECTED_SIZE_BYTES. 270 271 if utils_lib.DIRS in summary_old[name]: 272 # If `name` is a directory in old summary, but a file in the new 273 # summary, rsync will fail to copy the file as it can't 274 # overwrite an directory. Therefore, skip the merge. 275 continue 276 277 new_size = summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES] 278 old_size = summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES] 279 new_trimmed_size = summary_new[name].get( 280 utils_lib.TRIMMED_SIZE_BYTES, 281 summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES]) 282 old_trimmed_size = summary_old[name].get( 283 utils_lib.TRIMMED_SIZE_BYTES, 284 summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES]) 285 if new_size != old_size: 286 if is_final and new_trimmed_size == old_trimmed_size: 287 # If the file is merged from the final result folder to an 288 # older summary, it's not considered to be trimmed if the 289 # size is not changed. The reason is that the file on the 290 # server side does not have the info of its original size. 291 continue 292 293 # Before trimming is implemented, COLLECTED_SIZE_BYTES is the 294 # value of ORIGINAL_SIZE_BYTES. 295 new_collected_size = summary_new[name].get( 296 utils_lib.COLLECTED_SIZE_BYTES, 297 summary_new[name].get( 298 utils_lib.TRIMMED_SIZE_BYTES, 299 summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES])) 300 old_collected_size = summary_old[name].get( 301 utils_lib.COLLECTED_SIZE_BYTES, 302 summary_old[name].get( 303 utils_lib.TRIMMED_SIZE_BYTES, 304 summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES])) 305 306 summary_old[name][utils_lib.COLLECTED_SIZE_BYTES] = ( 307 new_collected_size + old_collected_size) 308 summary_old[name][utils_lib.TRIMMED_SIZE_BYTES] = ( 309 summary_new[name].get( 310 utils_lib.TRIMMED_SIZE_BYTES, 311 summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES])) 312 summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES] = new_size 313 314 # Update COLLECTED_SIZE_BYTES and ORIGINAL_SIZE_BYTES based on the 315 # merged directory summary. 316 _update_sizes(summary_old[name]) 317 318 319def merge_summaries(path): 320 """Merge all directory summaries in the given path. 321 322 This function calculates the total size of result files being collected for 323 the test device and the files generated on the drone. It also returns merged 324 directory summary. 325 326 @param path: A path to search for directory summaries. 327 @return a tuple of (client_collected_bytes, merged_summary): 328 client_collected_bytes: The total size of results collected from 329 the DUT. The number can be larger than the total file size of 330 the given path, as files can be overwritten or removed. 331 merged_summary: The merged directory summary of the given path. 332 """ 333 # Find all directory summary files and sort them by the time stamp in file 334 # name. 335 summary_files = glob.glob(os.path.join(path, 'dir_summary_*.json')) 336 summary_files = sorted(summary_files, key=os.path.getmtime) 337 338 all_summaries = [] 339 for summary_file in summary_files: 340 with open(summary_file) as f: 341 all_summaries.append(json.load(f)) 342 343 # Merge all summaries. 344 merged_summary = (copy.deepcopy(all_summaries[0]) if len(all_summaries) > 0 345 else {}) 346 for summary in all_summaries[1:]: 347 _merge(merged_summary, summary) 348 # After all summaries from the test device (client side) are merged, we can 349 # get the total size of result files being transfered from the test device. 350 # If there is no directory summary collected, default client_collected_bytes 351 # to 0. 352 client_collected_bytes = 0 353 if merged_summary: 354 client_collected_bytes = ( 355 merged_summary[utils_lib.ROOT_DIR][utils_lib.COLLECTED_SIZE_BYTES]) 356 357 # Get the summary of current directory 358 359 # Make sure the path ends with /, so the top directory in the summary will 360 # be '', which is consistent with other summaries. 361 if not path.endswith(os.sep): 362 path += os.sep 363 364 last_summary = get_dir_summary(path, top_dir=path) 365 _merge(merged_summary, last_summary, is_final=True) 366 _delete_missing_entries(merged_summary, last_summary) 367 368 return client_collected_bytes, merged_summary 369 370 371def main(): 372 """main script. """ 373 parser = argparse.ArgumentParser() 374 parser.add_argument('-p', type=str, dest='path', 375 help='Path to build directory summary.') 376 parser.add_argument('-m', type=int, dest='max_size_KB', default=0, 377 help='Maximum result size in KB. Set to 0 to disable ' 378 'result throttling.') 379 options = parser.parse_args() 380 381 summary = build_summary_json(options.path) 382 summary_json = json.dumps(summary) 383 summary_file = get_unique_dir_summary_file(options.path) 384 385 # Make sure there is enough free disk to write the file 386 stat = os.statvfs(options.path) 387 free_space = stat.f_frsize * stat.f_bavail 388 if free_space - len(summary_json) < MIN_FREE_DISK_BYTES: 389 raise IOError('Not enough disk space after saving the summary file. ' 390 'Available free disk: %s bytes. Summary file size: %s ' 391 'bytes.' % (free_space, len(summary_json))) 392 393 with open(summary_file, 'w') as f: 394 f.write(summary_json) 395 logging.info('Directory summary of %s is saved to file %s.', options.path, 396 summary_file) 397 398 399if __name__ == '__main__': 400 main() 401