result_info.py revision 393fc8c903d61bad72ad0ab13d56955ac2888912
1# Copyright 2017 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Wrapper class to store size related information of test results. 6""" 7 8import copy 9import json 10import os 11 12import result_info_lib 13import utils_lib 14 15 16class ResultInfoError(Exception): 17 """Exception to raise when error occurs in ResultInfo collection.""" 18 19 20class ResultInfo(dict): 21 """A wrapper class to store result file information. 22 23 Details of a result include: 24 original_size: Original size in bytes of the result, before throttling. 25 trimmed_size: Size in bytes after the result is throttled. 26 collected_size: Size in bytes of the results collected from the dut. 27 files: A list of ResultInfo for the files and sub-directories of the result. 28 29 The class contains the size information of a result file/directory, and the 30 information can be merged if a file was collected multiple times during 31 the test. 32 For example, `messages` of size 100 bytes was collected before the test 33 starts, ResultInfo for this file shall be: 34 {'messages': {'/S': 100}} 35 Later in the test, the file was collected again when it's size becomes 200 36 bytes, the new ResultInfo will be: 37 {'messages': {'/S': 200}} 38 At the end of the test, the file is considered too big, and trimmed down to 39 150 bytes, thus the final ResultInfo of the file becomes: 40 {'messages': {# The original size is 200 bytes 41 '/S': 200, 42 # The total collected size is 300(100+200} bytes 43 '/C': 300, 44 # The trimmed size is the final size on disk 45 '/T': 150} 46 From this example, the original size tells us how large the file was. 47 The collected size tells us how much data was transfered from dut to drone 48 to get this file. And the trimmed size shows the final size of the file when 49 the test is finished and the results are throttled again on the server side. 50 51 The class is a wrapper of dictionary. The properties are all keyvals in a 52 dictionary. For example, an instance of ResultInfo can have following 53 dictionary value: 54 {'debug': { 55 # Original size of the debug folder is 1000 bytes. 56 '/S': 1000, 57 # The debug folder was throttled and the size is reduced to 500 58 # bytes. 59 '/T': 500, 60 # collected_size ('/C') can be ignored, its value falls back to 61 # trimmed_size ('/T'). If trimmed_size is not set, its value falls 62 # back to original_size ('S') 63 64 # Sub-files and sub-directories are included in a list of '/D''s 65 # value. 66 # In this example, debug folder has a file `file1`, whose original 67 # size is 1000 bytes, which is trimmed down to 500 bytes. 68 '/D': [ 69 {'file1': { 70 '/S': 1000, 71 '/T': 500, 72 } 73 } 74 ] 75 } 76 } 77 """ 78 79 def __init__(self, parent_dir, name=None, parent_result_info=None, 80 original_info=None): 81 """Initialize a collection of size information for a given result path. 82 83 A ResultInfo object can be initialized in two ways: 84 1. Create from a physical file, which reads the size from the file. 85 In this case, `name` value should be given, and `original_info` shoud 86 not be set. 87 2. Create from previously collected information, i.e., a dictionary 88 deserialized from persisted json file. In this case, `original_info` 89 should be given, and `name` should not be set. 90 91 @param parent_dir: Path to the parent directory. 92 @param name: Name of the result file or directory. 93 @param parent_result_info: A ResultInfo object for the parent directory. 94 @param original_info: A dictionary of the result's size information. 95 This is retrieved from the previously serialized json string. 96 For example: {'file_name': 97 {'/S': 100, '/T': 50} 98 } 99 which means a file's original size is 100 bytes, and trimmed 100 down to 50 bytes. This argument is used when the object is 101 restored from a json string. 102 """ 103 super(ResultInfo, self).__init__() 104 105 if name is not None and original_info is not None: 106 raise ResultInfoError( 107 'Only one of parameter `name` and `original_info` can be ' 108 'set.') 109 110 # _initialized is a flag to indicating the object is in constructor. 111 # It can be used to block any size update to make restoring from json 112 # string faster. For example, if file_details has sub-directories, 113 # all sub-directories will be added to this class recursively, blocking 114 # the size updates can reduce unnecessary calculations. 115 self._initialized = False 116 self._parent_result_info = parent_result_info 117 118 if original_info is None: 119 self._init_from_file(parent_dir, name) 120 else: 121 self._init_with_original_info(parent_dir, original_info) 122 123 # Size of bytes collected in an overwritten or removed directory. 124 self._previous_collected_size = 0 125 self._initialized = True 126 127 def _init_from_file(self, parent_dir, name): 128 """Initialize with the physical file. 129 130 @param parent_dir: Path to the parent directory. 131 @param name: Name of the result file or directory. 132 """ 133 assert name != None 134 self._name = name 135 136 # Dictionary to store details of the given path is set to a keyval of 137 # the wrapper class. 138 self[self.name] = {} 139 140 # rstrip is to remove / when name is ROOT_DIR (''). 141 self._path = os.path.join(parent_dir, self.name).rstrip(os.sep) 142 self._is_dir = os.path.isdir(self._path) 143 144 if self.is_dir: 145 # The value of key utils_lib.DIRS is a list of ResultInfo objects. 146 self.details[utils_lib.DIRS] = [] 147 148 # Set original size to be the physical size if file details are not 149 # given and the path is for a file. 150 if self.is_dir: 151 # Set directory size to 0, it will be updated later after its 152 # sub-directories are added. 153 self.original_size = 0 154 else: 155 self.original_size = self.size 156 157 def _init_with_original_info(self, parent_dir, original_info): 158 """Initialize with pre-collected information. 159 160 @param parent_dir: Path to the parent directory. 161 @param original_info: A dictionary of the result's size information. 162 This is retrieved from the previously serialized json string. 163 For example: {'file_name': 164 {'/S': 100, '/T': 50} 165 } 166 which means a file's original size is 100 bytes, and trimmed 167 down to 50 bytes. This argument is used when the object is 168 restored from a json string. 169 """ 170 assert original_info 171 # The result information dictionary has only 1 key, which is the file or 172 # directory name. 173 self._name = original_info.keys()[0] 174 175 # Dictionary to store details of the given path is set to a keyval of 176 # the wrapper class. 177 self[self.name] = {} 178 179 # rstrip is to remove / when name is ROOT_DIR (''). 180 self._path = os.path.join(parent_dir, self.name).rstrip(os.sep) 181 182 self._is_dir = utils_lib.DIRS in original_info[self.name] 183 184 if self.is_dir: 185 # The value of key utils_lib.DIRS is a list of ResultInfo objects. 186 self.details[utils_lib.DIRS] = [] 187 188 # This is restoring ResultInfo from a json string. 189 self.original_size = original_info[self.name][ 190 utils_lib.ORIGINAL_SIZE_BYTES] 191 if utils_lib.TRIMMED_SIZE_BYTES in original_info[self.name]: 192 self.trimmed_size = original_info[self.name][ 193 utils_lib.TRIMMED_SIZE_BYTES] 194 if self.is_dir: 195 for sub_file in original_info[self.name][utils_lib.DIRS]: 196 self.add_file(None, sub_file) 197 198 @staticmethod 199 def build_from_path(parent_dir, 200 name=utils_lib.ROOT_DIR, 201 parent_result_info=None, top_dir=None, 202 all_dirs=None): 203 """Get the ResultInfo for the given path. 204 205 @param parent_dir: The parent directory of the given file. 206 @param name: Name of the result file or directory. 207 @param parent_result_info: A ResultInfo instance for the parent 208 directory. 209 @param top_dir: The top directory to collect ResultInfo. This is to 210 check if a directory is a subdir of the original directory to 211 collect summary. 212 @param all_dirs: A set of paths that have been collected. This is to 213 prevent infinite recursive call caused by symlink. 214 215 @return: A ResultInfo instance containing the directory summary. 216 """ 217 top_dir = top_dir or parent_dir 218 all_dirs = all_dirs or set() 219 220 dir_info = ResultInfo(parent_dir=parent_dir, 221 name=name, 222 parent_result_info=parent_result_info) 223 path = os.path.join(parent_dir, name) 224 if os.path.isdir(path): 225 real_path = os.path.realpath(path) 226 # The assumption here is that results are copied back to drone by 227 # copying the symlink, not the content, which is true with currently 228 # used rsync in cros_host.get_file call. 229 # Skip scanning the child folders if any of following condition is 230 # true: 231 # 1. The directory is a symlink and link to a folder under `top_dir` 232 # 2. The directory was scanned already. 233 if ((os.path.islink(path) and real_path.startswith(top_dir)) or 234 real_path in all_dirs): 235 return dir_info 236 all_dirs.add(real_path) 237 for f in sorted(os.listdir(path)): 238 dir_info.files.append(ResultInfo.build_from_path( 239 parent_dir=path, 240 name=f, 241 parent_result_info=dir_info, 242 top_dir=top_dir, 243 all_dirs=all_dirs)) 244 dir_info.update_sizes() 245 246 return dir_info 247 248 @property 249 def details(self): 250 """Get the details of the result. 251 252 @return: A dictionary of size and sub-directory information. 253 """ 254 return self[self._name] 255 256 @property 257 def is_dir(self): 258 """Get if the result is a directory. 259 """ 260 return self._is_dir 261 262 @property 263 def name(self): 264 """Name of the result. 265 """ 266 return self._name 267 268 @property 269 def path(self): 270 """Full path to the result. 271 """ 272 return self._path 273 274 @property 275 def files(self): 276 """All files or sub-directories of the result. 277 278 @return: A list of ResultInfo objects. 279 @raise ResultInfoError: If the result is not a directory. 280 """ 281 if not self.is_dir: 282 raise ResultInfoError('%s is not a directory.' % self.path) 283 return self.details[utils_lib.DIRS] 284 285 @property 286 def size(self): 287 """Physical size in bytes for the result file. 288 289 @raise ResultInfoError: If the result is a directory. 290 """ 291 if self.is_dir: 292 raise ResultInfoError( 293 '`size` property does not support directory. Try to use ' 294 '`original_size` property instead.') 295 return result_info_lib.get_file_size(self._path) 296 297 @property 298 def original_size(self): 299 """The original size in bytes of the result before it's throttled. 300 """ 301 return self.details[utils_lib.ORIGINAL_SIZE_BYTES] 302 303 @original_size.setter 304 def original_size(self, value): 305 """Set the original size in bytes of the result. 306 307 @param value: The original size in bytes of the result. 308 """ 309 self.details[utils_lib.ORIGINAL_SIZE_BYTES] = value 310 # Update the size of parent result infos if the object is already 311 # initialized. 312 if self._initialized and self._parent_result_info is not None: 313 self._parent_result_info.update_original_size() 314 315 @property 316 def trimmed_size(self): 317 """The size in bytes of the result after it's throttled. 318 """ 319 return self.details.get(utils_lib.TRIMMED_SIZE_BYTES, 320 self.original_size) 321 322 @trimmed_size.setter 323 def trimmed_size(self, value): 324 """Set the trimmed size in bytes of the result. 325 326 @param value: The trimmed size in bytes of the result. 327 """ 328 self.details[utils_lib.TRIMMED_SIZE_BYTES] = value 329 # Update the size of parent result infos if the object is already 330 # initialized. 331 if self._initialized and self._parent_result_info is not None: 332 self._parent_result_info.update_trimmed_size() 333 334 @property 335 def collected_size(self): 336 """The collected size in bytes of the result. 337 338 The file is throttled on the dut, so the number of bytes collected from 339 dut is default to the trimmed_size. If a file is modified between 340 multiple result collections and is collected multiple times during the 341 test run, the collected_size will be the sum of the multiple 342 collections. Therefore, its value will be greater than the trimmed_size 343 of the last copy. 344 """ 345 return self.details.get(utils_lib.COLLECTED_SIZE_BYTES, 346 self.trimmed_size) 347 348 @collected_size.setter 349 def collected_size(self, value): 350 """Set the collected size in bytes of the result. 351 352 @param value: The collected size in bytes of the result. 353 """ 354 self.details[utils_lib.COLLECTED_SIZE_BYTES] = value 355 # Update the size of parent result infos if the object is already 356 # initialized. 357 if self._initialized and self._parent_result_info is not None: 358 self._parent_result_info.update_collected_size() 359 360 @property 361 def is_collected_size_recorded(self): 362 """Flag to indicate if the result has collected size set. 363 364 This flag is used to avoid unnecessary entry in result details, as the 365 default value of collected size is the trimmed size. Removing the 366 redundant information helps to reduce the size of the json file. 367 """ 368 return utils_lib.COLLECTED_SIZE_BYTES in self.details 369 370 def add_file(self, name, original_info): 371 """Add a file to the result. 372 373 @param name: Name of the file. 374 @param original_info: A dictionary of the file's size and sub-directory 375 information. 376 """ 377 self.details[utils_lib.DIRS].append( 378 ResultInfo(parent_dir=self._path, 379 name=name, 380 parent_result_info=self, 381 original_info=original_info)) 382 # After a new ResultInfo is added, update the sizes if the object is 383 # already initialized. 384 if self._initialized: 385 self.update_sizes() 386 387 def remove_file(self, name): 388 """Remove a file with the given name from the result. 389 390 @param name: Name of the file to be removed. 391 """ 392 self.files.remove(self.get_file(name)) 393 394 def get_file_names(self): 395 """Get a set of all the files under the result. 396 """ 397 return set([f.keys()[0] for f in self.files]) 398 399 def get_file(self, name): 400 """Get a file with the given name under the result. 401 402 @param name: Name of the file. 403 @return: A ResultInfo object of the file. 404 @raise ResultInfoError: If the result is not a directory, or the file 405 with the given name is not found. 406 """ 407 if not self.is_dir: 408 raise ResultInfoError('%s is not a directory. Can\'t locate file ' 409 '%s' % (self.path, name)) 410 for file_info in self.files: 411 if file_info.name == name: 412 return file_info 413 raise ResultInfoError('Can\'t locate file %s in directory %s' % 414 (name, self.path)) 415 416 def convert_to_dir(self): 417 """Convert the result file to a directory. 418 419 This happens when a result file was overwritten by a directory. The 420 conversion will reset the details of this result to be a directory, 421 and save the collected_size to attribute `_previous_collected_size`, 422 so it can be counted when merging multiple result infos. 423 424 @raise ResultInfoError: If the result is already a directory. 425 """ 426 if self.is_dir: 427 raise ResultInfoError('%s is already a directory.' % self.path) 428 # The size that's collected before the file was replaced as a directory. 429 collected_size = self.collected_size 430 self._is_dir = True 431 self.details[utils_lib.DIRS] = [] 432 self.original_size = 0 433 self.trimmed_size = 0 434 self._previous_collected_size = collected_size 435 self.collected_size = collected_size 436 437 def update_original_size(self): 438 """Update the original size of the result and trigger its parent to 439 update. 440 """ 441 if self.is_dir: 442 self.original_size = sum([ 443 f.original_size for f in self.files]) 444 elif self.original_size is None: 445 # Only set original_size if it's not initialized yet. 446 self.orginal_size = self.size 447 448 # Update the size of parent result infos. 449 if self._parent_result_info is not None: 450 self._parent_result_info.update_original_size() 451 452 def update_trimmed_size(self): 453 """Update the trimmed size of the result and trigger its parent to 454 update. 455 """ 456 if self.is_dir: 457 new_trimmed_size = sum([f.trimmed_size for f in self.files]) 458 else: 459 new_trimmed_size = self.size 460 461 # Only set trimmed_size if the value is changed or different from the 462 # original size. 463 if (new_trimmed_size != self.original_size or 464 new_trimmed_size != self.trimmed_size): 465 self.trimmed_size = new_trimmed_size 466 467 # Update the size of parent result infos. 468 if self._parent_result_info is not None: 469 self._parent_result_info.update_trimmed_size() 470 471 def update_collected_size(self): 472 """Update the collected size of the result and trigger its parent to 473 update. 474 """ 475 if self.is_dir: 476 new_collected_size = ( 477 self._previous_collected_size + 478 sum([f.collected_size for f in self.files])) 479 else: 480 new_collected_size = self.size 481 482 # Only set collected_size if the value is changed or different from the 483 # trimmed size or existing collected size. 484 if (new_collected_size != self.trimmed_size or 485 new_collected_size != self.collected_size): 486 self.collected_size = new_collected_size 487 488 # Update the size of parent result infos. 489 if self._parent_result_info is not None: 490 self._parent_result_info.update_collected_size() 491 492 def update_sizes(self): 493 """Update all sizes information of the result. 494 """ 495 self.update_original_size() 496 self.update_trimmed_size() 497 self.update_collected_size() 498 499 def set_parent_result_info(self, parent_result_info): 500 """Set the parent result info. 501 502 It's used when a ResultInfo object is moved to a different file 503 structure. 504 505 @param parent_result_info: A ResultInfo object for the parent directory. 506 """ 507 self._parent_result_info = parent_result_info 508 # As the parent reference changed, update all sizes of the parent. 509 if parent_result_info: 510 self._parent_result_info.update_sizes() 511 512 def merge(self, new_info, is_final=False): 513 """Merge a ResultInfo instance to the current one. 514 515 Update the old directory's ResultInfo with the new one. Also calculate 516 the total size of results collected from the client side based on the 517 difference between the two ResultInfo. 518 519 When merging with newer collected results, any results not existing in 520 the new ResultInfo or files with size different from the newer files 521 collected are considered as extra results collected or overwritten by 522 the new results. 523 Therefore, the size of the collected result should include such files, 524 and the collected size can be larger than trimmed size. 525 As an example: 526 current: {'file1': {TRIMMED_SIZE_BYTES: 1024, 527 ORIGINAL_SIZE_BYTES: 1024, 528 COLLECTED_SIZE_BYTES: 1024}} 529 This means a result `file1` of original size 1KB was collected with size 530 of 1KB byte. 531 new_info: {'file1': {TRIMMED_SIZE_BYTES: 1024, 532 ORIGINAL_SIZE_BYTES: 2048, 533 COLLECTED_SIZE_BYTES: 1024}} 534 This means a result `file1` of 2KB was trimmed down to 1KB and was 535 collected with size of 1KB byte. 536 Note that the second result collection has an updated result `file1` 537 (because of the different ORIGINAL_SIZE_BYTES), and it needs to be 538 rsync-ed to the drone. Therefore, the merged ResultInfo will be: 539 {'file1': {TRIMMED_SIZE_BYTES: 1024, 540 ORIGINAL_SIZE_BYTES: 2048, 541 COLLECTED_SIZE_BYTES: 2048}} 542 Note that: 543 * TRIMMED_SIZE_BYTES is still at 1KB, which reflects the actual size of 544 the file be collected. 545 * ORIGINAL_SIZE_BYTES is updated to 2KB, which is the size of the file 546 in the new result `file1`. 547 * COLLECTED_SIZE_BYTES is 2KB because rsync will copy `file1` twice as 548 it's changed. 549 550 The only exception is that the new ResultInfo's ORIGINAL_SIZE_BYTES is 551 the same as the current ResultInfo's TRIMMED_SIZE_BYTES. That means the 552 file was trimmed in the current ResultInfo and the new ResultInfo is 553 collecting the trimmed file. Therefore, the merged summary will keep the 554 data in the current ResultInfo. 555 556 @param new_info: New ResultInfo to be merged into the current one. 557 @param is_final: True if new_info is built from the final result folder. 558 Default is set to False. 559 """ 560 new_files = new_info.get_file_names() 561 old_files = self.get_file_names() 562 for name in new_files: 563 new_file = new_info.get_file(name) 564 if not name in old_files: 565 # A file/dir exists in new client dir, but not in the old one, 566 # which means that the file or a directory is newly collected. 567 copy_file = copy.deepcopy(new_file) 568 self.files.append(copy_file) 569 copy_file.set_parent_result_info(self) 570 elif new_file.is_dir: 571 # `name` is a directory in the new ResultInfo, try to merge it 572 # with the current ResultInfo. 573 old_file = self.get_file(name) 574 575 if not old_file.is_dir: 576 # If `name` is a file in the current ResultInfo but a 577 # directory in new ResultInfo, the file in the current 578 # ResultInfo will be overwritten by the new directory by 579 # rsync. Therefore, force it to be an empty directory in 580 # the current ResultInfo, so that the new directory can be 581 # merged. 582 old_file.convert_to_dir() 583 584 old_file.merge(new_file, is_final) 585 else: 586 old_file = self.get_file(name) 587 588 # If `name` is a directory in the current ResultInfo, but a file 589 # in the new ResultInfo, rsync will fail to copy the file as it 590 # can't overwrite an directory. Therefore, skip the merge. 591 if old_file.is_dir: 592 continue 593 594 new_size = new_file.original_size 595 old_size = old_file.original_size 596 new_trimmed_size = new_file.trimmed_size 597 old_trimmed_size = old_file.trimmed_size 598 599 # Keep current information if the sizes are not changed. 600 if (new_size == old_size and 601 new_trimmed_size == old_trimmed_size): 602 continue 603 604 # Keep current information if the newer size is the same as the 605 # current trimmed size, and the file is not trimmed in new 606 # ResultInfo. That means the file was trimmed earlier and stays 607 # the same when collecting the information again. 608 if (new_size == old_trimmed_size and 609 new_size == new_trimmed_size): 610 continue 611 612 # If the file is merged from the final result folder to an older 613 # ResultInfo, it's not considered to be trimmed if the size is 614 # not changed. The reason is that the file on the server side 615 # does not have the info of its original size. 616 if is_final and new_trimmed_size == old_trimmed_size: 617 continue 618 619 # `name` is a file, and both the original_size and trimmed_size 620 # are changed, that means the file is overwritten, so increment 621 # the collected_size. 622 # Before trimming is implemented, collected_size is the 623 # value of original_size. 624 new_collected_size = new_file.collected_size 625 old_collected_size = old_file.collected_size 626 627 old_file.collected_size = ( 628 new_collected_size + old_collected_size) 629 # Only set trimmed_size if one of the following two conditions 630 # are true: 631 # 1. In the new summary the file's trimmed size is different 632 # from the original size, which means the file was trimmed 633 # in the new summary. 634 # 2. The original size in the new summary equals the trimmed 635 # size in the old summary, which means the file was trimmed 636 # again in the new summary. 637 if (new_size == old_trimmed_size or 638 new_size != new_trimmed_size): 639 old_file.trimmed_size = new_file.trimmed_size 640 old_file.original_size = new_size 641 642 643# An empty directory, used to compare with a ResultInfo. 644EMPTY = ResultInfo(parent_dir='', 645 original_info={'': {utils_lib.ORIGINAL_SIZE_BYTES: 0, 646 utils_lib.DIRS: []}}) 647 648 649def save_summary(summary, json_file): 650 """Save the given directory summary to a file. 651 652 @param summary: A ResultInfo object for a result directory. 653 @param json_file: Path to a json file to save to. 654 """ 655 with open(json_file, 'w') as f: 656 json.dump(summary, f) 657 658 659def load_summary_json_file(json_file): 660 """Load result info from the given json_file. 661 662 @param json_file: Path to a json file containing a directory summary. 663 @return: A ResultInfo object containing the directory summary. 664 """ 665 with open(json_file, 'r') as f: 666 summary = json.load(f) 667 668 # Convert summary to ResultInfo objects 669 result_dir = os.path.dirname(json_file) 670 return ResultInfo(parent_dir=result_dir, original_info=summary) 671