result_info.py revision 393fc8c903d61bad72ad0ab13d56955ac2888912
1# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrapper class to store size related information of test results.
6"""
7
8import copy
9import json
10import os
11
12import result_info_lib
13import utils_lib
14
15
16class ResultInfoError(Exception):
17    """Exception to raise when error occurs in ResultInfo collection."""
18
19
20class ResultInfo(dict):
21    """A wrapper class to store result file information.
22
23    Details of a result include:
24    original_size: Original size in bytes of the result, before throttling.
25    trimmed_size: Size in bytes after the result is throttled.
26    collected_size: Size in bytes of the results collected from the dut.
27    files: A list of ResultInfo for the files and sub-directories of the result.
28
29    The class contains the size information of a result file/directory, and the
30    information can be merged if a file was collected multiple times during
31    the test.
32    For example, `messages` of size 100 bytes was collected before the test
33    starts, ResultInfo for this file shall be:
34        {'messages': {'/S': 100}}
35    Later in the test, the file was collected again when it's size becomes 200
36    bytes, the new ResultInfo will be:
37        {'messages': {'/S': 200}}
38    At the end of the test, the file is considered too big, and trimmed down to
39    150 bytes, thus the final ResultInfo of the file becomes:
40        {'messages': {# The original size is 200 bytes
41                      '/S': 200,
42                      # The total collected size is 300(100+200} bytes
43                      '/C': 300,
44                      # The trimmed size is the final size on disk
45                      '/T': 150}
46    From this example, the original size tells us how large the file was.
47    The collected size tells us how much data was transfered from dut to drone
48    to get this file. And the trimmed size shows the final size of the file when
49    the test is finished and the results are throttled again on the server side.
50
51    The class is a wrapper of dictionary. The properties are all keyvals in a
52    dictionary. For example, an instance of ResultInfo can have following
53    dictionary value:
54    {'debug': {
55            # Original size of the debug folder is 1000 bytes.
56            '/S': 1000,
57            # The debug folder was throttled and the size is reduced to 500
58            # bytes.
59            '/T': 500,
60            # collected_size ('/C') can be ignored, its value falls back to
61            # trimmed_size ('/T'). If trimmed_size is not set, its value falls
62            # back to original_size ('S')
63
64            # Sub-files and sub-directories are included in a list of '/D''s
65            # value.
66            # In this example, debug folder has a file `file1`, whose original
67            # size is 1000 bytes, which is trimmed down to 500 bytes.
68            '/D': [
69                    {'file1': {
70                            '/S': 1000,
71                            '/T': 500,
72                        }
73                    }
74                ]
75        }
76    }
77    """
78
79    def __init__(self, parent_dir, name=None, parent_result_info=None,
80                 original_info=None):
81        """Initialize a collection of size information for a given result path.
82
83        A ResultInfo object can be initialized in two ways:
84        1. Create from a physical file, which reads the size from the file.
85           In this case, `name` value should be given, and `original_info` shoud
86           not be set.
87        2. Create from previously collected information, i.e., a dictionary
88           deserialized from persisted json file. In this case, `original_info`
89           should be given, and `name` should not be set.
90
91        @param parent_dir: Path to the parent directory.
92        @param name: Name of the result file or directory.
93        @param parent_result_info: A ResultInfo object for the parent directory.
94        @param original_info: A dictionary of the result's size information.
95                This is retrieved from the previously serialized json string.
96                For example: {'file_name':
97                            {'/S': 100, '/T': 50}
98                         }
99                which means a file's original size is 100 bytes, and trimmed
100                down to 50 bytes. This argument is used when the object is
101                restored from a json string.
102        """
103        super(ResultInfo, self).__init__()
104
105        if name is not None and original_info is not None:
106            raise ResultInfoError(
107                    'Only one of parameter `name` and `original_info` can be '
108                    'set.')
109
110        # _initialized is a flag to indicating the object is in constructor.
111        # It can be used to block any size update to make restoring from json
112        # string faster. For example, if file_details has sub-directories,
113        # all sub-directories will be added to this class recursively, blocking
114        # the size updates can reduce unnecessary calculations.
115        self._initialized = False
116        self._parent_result_info = parent_result_info
117
118        if original_info is None:
119            self._init_from_file(parent_dir, name)
120        else:
121            self._init_with_original_info(parent_dir, original_info)
122
123        # Size of bytes collected in an overwritten or removed directory.
124        self._previous_collected_size = 0
125        self._initialized = True
126
127    def _init_from_file(self, parent_dir, name):
128        """Initialize with the physical file.
129
130        @param parent_dir: Path to the parent directory.
131        @param name: Name of the result file or directory.
132        """
133        assert name != None
134        self._name = name
135
136        # Dictionary to store details of the given path is set to a keyval of
137        # the wrapper class.
138        self[self.name] = {}
139
140        # rstrip is to remove / when name is ROOT_DIR ('').
141        self._path = os.path.join(parent_dir, self.name).rstrip(os.sep)
142        self._is_dir = os.path.isdir(self._path)
143
144        if self.is_dir:
145            # The value of key utils_lib.DIRS is a list of ResultInfo objects.
146            self.details[utils_lib.DIRS] = []
147
148        # Set original size to be the physical size if file details are not
149        # given and the path is for a file.
150        if self.is_dir:
151            # Set directory size to 0, it will be updated later after its
152            # sub-directories are added.
153            self.original_size = 0
154        else:
155            self.original_size = self.size
156
157    def _init_with_original_info(self, parent_dir, original_info):
158        """Initialize with pre-collected information.
159
160        @param parent_dir: Path to the parent directory.
161        @param original_info: A dictionary of the result's size information.
162                This is retrieved from the previously serialized json string.
163                For example: {'file_name':
164                            {'/S': 100, '/T': 50}
165                         }
166                which means a file's original size is 100 bytes, and trimmed
167                down to 50 bytes. This argument is used when the object is
168                restored from a json string.
169        """
170        assert original_info
171        # The result information dictionary has only 1 key, which is the file or
172        # directory name.
173        self._name = original_info.keys()[0]
174
175        # Dictionary to store details of the given path is set to a keyval of
176        # the wrapper class.
177        self[self.name] = {}
178
179        # rstrip is to remove / when name is ROOT_DIR ('').
180        self._path = os.path.join(parent_dir, self.name).rstrip(os.sep)
181
182        self._is_dir = utils_lib.DIRS in original_info[self.name]
183
184        if self.is_dir:
185            # The value of key utils_lib.DIRS is a list of ResultInfo objects.
186            self.details[utils_lib.DIRS] = []
187
188        # This is restoring ResultInfo from a json string.
189        self.original_size = original_info[self.name][
190                utils_lib.ORIGINAL_SIZE_BYTES]
191        if utils_lib.TRIMMED_SIZE_BYTES in original_info[self.name]:
192            self.trimmed_size = original_info[self.name][
193                    utils_lib.TRIMMED_SIZE_BYTES]
194        if self.is_dir:
195            for sub_file in original_info[self.name][utils_lib.DIRS]:
196                self.add_file(None, sub_file)
197
198    @staticmethod
199    def build_from_path(parent_dir,
200                        name=utils_lib.ROOT_DIR,
201                        parent_result_info=None, top_dir=None,
202                        all_dirs=None):
203        """Get the ResultInfo for the given path.
204
205        @param parent_dir: The parent directory of the given file.
206        @param name: Name of the result file or directory.
207        @param parent_result_info: A ResultInfo instance for the parent
208                directory.
209        @param top_dir: The top directory to collect ResultInfo. This is to
210                check if a directory is a subdir of the original directory to
211                collect summary.
212        @param all_dirs: A set of paths that have been collected. This is to
213                prevent infinite recursive call caused by symlink.
214
215        @return: A ResultInfo instance containing the directory summary.
216        """
217        top_dir = top_dir or parent_dir
218        all_dirs = all_dirs or set()
219
220        dir_info = ResultInfo(parent_dir=parent_dir,
221                              name=name,
222                              parent_result_info=parent_result_info)
223        path = os.path.join(parent_dir, name)
224        if os.path.isdir(path):
225            real_path = os.path.realpath(path)
226            # The assumption here is that results are copied back to drone by
227            # copying the symlink, not the content, which is true with currently
228            # used rsync in cros_host.get_file call.
229            # Skip scanning the child folders if any of following condition is
230            # true:
231            # 1. The directory is a symlink and link to a folder under `top_dir`
232            # 2. The directory was scanned already.
233            if ((os.path.islink(path) and real_path.startswith(top_dir)) or
234                real_path in all_dirs):
235                return dir_info
236            all_dirs.add(real_path)
237            for f in sorted(os.listdir(path)):
238                dir_info.files.append(ResultInfo.build_from_path(
239                        parent_dir=path,
240                        name=f,
241                        parent_result_info=dir_info,
242                        top_dir=top_dir,
243                        all_dirs=all_dirs))
244                dir_info.update_sizes()
245
246        return dir_info
247
248    @property
249    def details(self):
250        """Get the details of the result.
251
252        @return: A dictionary of size and sub-directory information.
253        """
254        return self[self._name]
255
256    @property
257    def is_dir(self):
258        """Get if the result is a directory.
259        """
260        return self._is_dir
261
262    @property
263    def name(self):
264        """Name of the result.
265        """
266        return self._name
267
268    @property
269    def path(self):
270        """Full path to the result.
271        """
272        return self._path
273
274    @property
275    def files(self):
276        """All files or sub-directories of the result.
277
278        @return: A list of ResultInfo objects.
279        @raise ResultInfoError: If the result is not a directory.
280        """
281        if not self.is_dir:
282            raise ResultInfoError('%s is not a directory.' % self.path)
283        return self.details[utils_lib.DIRS]
284
285    @property
286    def size(self):
287        """Physical size in bytes for the result file.
288
289        @raise ResultInfoError: If the result is a directory.
290        """
291        if self.is_dir:
292            raise ResultInfoError(
293                    '`size` property does not support directory. Try to use '
294                    '`original_size` property instead.')
295        return result_info_lib.get_file_size(self._path)
296
297    @property
298    def original_size(self):
299        """The original size in bytes of the result before it's throttled.
300        """
301        return self.details[utils_lib.ORIGINAL_SIZE_BYTES]
302
303    @original_size.setter
304    def original_size(self, value):
305        """Set the original size in bytes of the result.
306
307        @param value: The original size in bytes of the result.
308        """
309        self.details[utils_lib.ORIGINAL_SIZE_BYTES] = value
310        # Update the size of parent result infos if the object is already
311        # initialized.
312        if self._initialized and self._parent_result_info is not None:
313            self._parent_result_info.update_original_size()
314
315    @property
316    def trimmed_size(self):
317        """The size in bytes of the result after it's throttled.
318        """
319        return self.details.get(utils_lib.TRIMMED_SIZE_BYTES,
320                                self.original_size)
321
322    @trimmed_size.setter
323    def trimmed_size(self, value):
324        """Set the trimmed size in bytes of the result.
325
326        @param value: The trimmed size in bytes of the result.
327        """
328        self.details[utils_lib.TRIMMED_SIZE_BYTES] = value
329        # Update the size of parent result infos if the object is already
330        # initialized.
331        if self._initialized and self._parent_result_info is not None:
332            self._parent_result_info.update_trimmed_size()
333
334    @property
335    def collected_size(self):
336        """The collected size in bytes of the result.
337
338        The file is throttled on the dut, so the number of bytes collected from
339        dut is default to the trimmed_size. If a file is modified between
340        multiple result collections and is collected multiple times during the
341        test run, the collected_size will be the sum of the multiple
342        collections. Therefore, its value will be greater than the trimmed_size
343        of the last copy.
344        """
345        return self.details.get(utils_lib.COLLECTED_SIZE_BYTES,
346                                self.trimmed_size)
347
348    @collected_size.setter
349    def collected_size(self, value):
350        """Set the collected size in bytes of the result.
351
352        @param value: The collected size in bytes of the result.
353        """
354        self.details[utils_lib.COLLECTED_SIZE_BYTES] = value
355        # Update the size of parent result infos if the object is already
356        # initialized.
357        if self._initialized and self._parent_result_info is not None:
358            self._parent_result_info.update_collected_size()
359
360    @property
361    def is_collected_size_recorded(self):
362        """Flag to indicate if the result has collected size set.
363
364        This flag is used to avoid unnecessary entry in result details, as the
365        default value of collected size is the trimmed size. Removing the
366        redundant information helps to reduce the size of the json file.
367        """
368        return utils_lib.COLLECTED_SIZE_BYTES in self.details
369
370    def add_file(self, name, original_info):
371        """Add a file to the result.
372
373        @param name: Name of the file.
374        @param original_info: A dictionary of the file's size and sub-directory
375                information.
376        """
377        self.details[utils_lib.DIRS].append(
378                ResultInfo(parent_dir=self._path,
379                           name=name,
380                           parent_result_info=self,
381                           original_info=original_info))
382        # After a new ResultInfo is added, update the sizes if the object is
383        # already initialized.
384        if self._initialized:
385            self.update_sizes()
386
387    def remove_file(self, name):
388        """Remove a file with the given name from the result.
389
390        @param name: Name of the file to be removed.
391        """
392        self.files.remove(self.get_file(name))
393
394    def get_file_names(self):
395        """Get a set of all the files under the result.
396        """
397        return set([f.keys()[0] for f in self.files])
398
399    def get_file(self, name):
400        """Get a file with the given name under the result.
401
402        @param name: Name of the file.
403        @return: A ResultInfo object of the file.
404        @raise ResultInfoError: If the result is not a directory, or the file
405                with the given name is not found.
406        """
407        if not self.is_dir:
408            raise ResultInfoError('%s is not a directory. Can\'t locate file '
409                                  '%s' % (self.path, name))
410        for file_info in self.files:
411            if file_info.name == name:
412                return file_info
413        raise ResultInfoError('Can\'t locate file %s in directory %s' %
414                              (name, self.path))
415
416    def convert_to_dir(self):
417        """Convert the result file to a directory.
418
419        This happens when a result file was overwritten by a directory. The
420        conversion will reset the details of this result to be a directory,
421        and save the collected_size to attribute `_previous_collected_size`,
422        so it can be counted when merging multiple result infos.
423
424        @raise ResultInfoError: If the result is already a directory.
425        """
426        if self.is_dir:
427            raise ResultInfoError('%s is already a directory.' % self.path)
428        # The size that's collected before the file was replaced as a directory.
429        collected_size = self.collected_size
430        self._is_dir = True
431        self.details[utils_lib.DIRS] = []
432        self.original_size = 0
433        self.trimmed_size = 0
434        self._previous_collected_size = collected_size
435        self.collected_size = collected_size
436
437    def update_original_size(self):
438        """Update the original size of the result and trigger its parent to
439        update.
440        """
441        if self.is_dir:
442            self.original_size = sum([
443                    f.original_size for f in self.files])
444        elif self.original_size is None:
445            # Only set original_size if it's not initialized yet.
446            self.orginal_size = self.size
447
448        # Update the size of parent result infos.
449        if self._parent_result_info is not None:
450            self._parent_result_info.update_original_size()
451
452    def update_trimmed_size(self):
453        """Update the trimmed size of the result and trigger its parent to
454        update.
455        """
456        if self.is_dir:
457            new_trimmed_size = sum([f.trimmed_size for f in self.files])
458        else:
459            new_trimmed_size = self.size
460
461        # Only set trimmed_size if the value is changed or different from the
462        # original size.
463        if (new_trimmed_size != self.original_size or
464            new_trimmed_size != self.trimmed_size):
465            self.trimmed_size = new_trimmed_size
466
467        # Update the size of parent result infos.
468        if self._parent_result_info is not None:
469            self._parent_result_info.update_trimmed_size()
470
471    def update_collected_size(self):
472        """Update the collected size of the result and trigger its parent to
473        update.
474        """
475        if self.is_dir:
476            new_collected_size = (
477                    self._previous_collected_size +
478                    sum([f.collected_size for f in self.files]))
479        else:
480            new_collected_size = self.size
481
482        # Only set collected_size if the value is changed or different from the
483        # trimmed size or existing collected size.
484        if (new_collected_size != self.trimmed_size or
485            new_collected_size != self.collected_size):
486            self.collected_size = new_collected_size
487
488        # Update the size of parent result infos.
489        if self._parent_result_info is not None:
490            self._parent_result_info.update_collected_size()
491
492    def update_sizes(self):
493        """Update all sizes information of the result.
494        """
495        self.update_original_size()
496        self.update_trimmed_size()
497        self.update_collected_size()
498
499    def set_parent_result_info(self, parent_result_info):
500        """Set the parent result info.
501
502        It's used when a ResultInfo object is moved to a different file
503        structure.
504
505        @param parent_result_info: A ResultInfo object for the parent directory.
506        """
507        self._parent_result_info = parent_result_info
508        # As the parent reference changed, update all sizes of the parent.
509        if parent_result_info:
510            self._parent_result_info.update_sizes()
511
512    def merge(self, new_info, is_final=False):
513        """Merge a ResultInfo instance to the current one.
514
515        Update the old directory's ResultInfo with the new one. Also calculate
516        the total size of results collected from the client side based on the
517        difference between the two ResultInfo.
518
519        When merging with newer collected results, any results not existing in
520        the new ResultInfo or files with size different from the newer files
521        collected are considered as extra results collected or overwritten by
522        the new results.
523        Therefore, the size of the collected result should include such files,
524        and the collected size can be larger than trimmed size.
525        As an example:
526        current: {'file1': {TRIMMED_SIZE_BYTES: 1024,
527                            ORIGINAL_SIZE_BYTES: 1024,
528                            COLLECTED_SIZE_BYTES: 1024}}
529        This means a result `file1` of original size 1KB was collected with size
530        of 1KB byte.
531        new_info: {'file1': {TRIMMED_SIZE_BYTES: 1024,
532                             ORIGINAL_SIZE_BYTES: 2048,
533                             COLLECTED_SIZE_BYTES: 1024}}
534        This means a result `file1` of 2KB was trimmed down to 1KB and was
535        collected with size of 1KB byte.
536        Note that the second result collection has an updated result `file1`
537        (because of the different ORIGINAL_SIZE_BYTES), and it needs to be
538        rsync-ed to the drone. Therefore, the merged ResultInfo will be:
539        {'file1': {TRIMMED_SIZE_BYTES: 1024,
540                   ORIGINAL_SIZE_BYTES: 2048,
541                   COLLECTED_SIZE_BYTES: 2048}}
542        Note that:
543        * TRIMMED_SIZE_BYTES is still at 1KB, which reflects the actual size of
544          the file be collected.
545        * ORIGINAL_SIZE_BYTES is updated to 2KB, which is the size of the file
546          in the new result `file1`.
547        * COLLECTED_SIZE_BYTES is 2KB because rsync will copy `file1` twice as
548          it's changed.
549
550        The only exception is that the new ResultInfo's ORIGINAL_SIZE_BYTES is
551        the same as the current ResultInfo's TRIMMED_SIZE_BYTES. That means the
552        file was trimmed in the current ResultInfo and the new ResultInfo is
553        collecting the trimmed file. Therefore, the merged summary will keep the
554        data in the current ResultInfo.
555
556        @param new_info: New ResultInfo to be merged into the current one.
557        @param is_final: True if new_info is built from the final result folder.
558                Default is set to False.
559        """
560        new_files = new_info.get_file_names()
561        old_files = self.get_file_names()
562        for name in new_files:
563            new_file = new_info.get_file(name)
564            if not name in old_files:
565                # A file/dir exists in new client dir, but not in the old one,
566                # which means that the file or a directory is newly collected.
567                copy_file = copy.deepcopy(new_file)
568                self.files.append(copy_file)
569                copy_file.set_parent_result_info(self)
570            elif new_file.is_dir:
571                # `name` is a directory in the new ResultInfo, try to merge it
572                # with the current ResultInfo.
573                old_file = self.get_file(name)
574
575                if not old_file.is_dir:
576                    # If `name` is a file in the current ResultInfo but a
577                    # directory in new ResultInfo, the file in the current
578                    # ResultInfo will be overwritten by the new directory by
579                    # rsync. Therefore, force it to be an empty directory in
580                    # the current ResultInfo, so that the new directory can be
581                    # merged.
582                    old_file.convert_to_dir()
583
584                old_file.merge(new_file, is_final)
585            else:
586                old_file = self.get_file(name)
587
588                # If `name` is a directory in the current ResultInfo, but a file
589                # in the new ResultInfo, rsync will fail to copy the file as it
590                # can't overwrite an directory. Therefore, skip the merge.
591                if old_file.is_dir:
592                    continue
593
594                new_size = new_file.original_size
595                old_size = old_file.original_size
596                new_trimmed_size = new_file.trimmed_size
597                old_trimmed_size = old_file.trimmed_size
598
599                # Keep current information if the sizes are not changed.
600                if (new_size == old_size and
601                    new_trimmed_size == old_trimmed_size):
602                    continue
603
604                # Keep current information if the newer size is the same as the
605                # current trimmed size, and the file is not trimmed in new
606                # ResultInfo. That means the file was trimmed earlier and stays
607                # the same when collecting the information again.
608                if (new_size == old_trimmed_size and
609                    new_size == new_trimmed_size):
610                    continue
611
612                # If the file is merged from the final result folder to an older
613                # ResultInfo, it's not considered to be trimmed if the size is
614                # not changed. The reason is that the file on the server side
615                # does not have the info of its original size.
616                if is_final and new_trimmed_size == old_trimmed_size:
617                    continue
618
619                # `name` is a file, and both the original_size and trimmed_size
620                # are changed, that means the file is overwritten, so increment
621                # the collected_size.
622                # Before trimming is implemented, collected_size is the
623                # value of original_size.
624                new_collected_size = new_file.collected_size
625                old_collected_size = old_file.collected_size
626
627                old_file.collected_size = (
628                        new_collected_size + old_collected_size)
629                # Only set trimmed_size if one of the following two conditions
630                # are true:
631                # 1. In the new summary the file's trimmed size is different
632                #    from the original size, which means the file was trimmed
633                #    in the new summary.
634                # 2. The original size in the new summary equals the trimmed
635                #    size in the old summary, which means the file was trimmed
636                #    again in the new summary.
637                if (new_size == old_trimmed_size or
638                    new_size != new_trimmed_size):
639                    old_file.trimmed_size = new_file.trimmed_size
640                old_file.original_size = new_size
641
642
643# An empty directory, used to compare with a ResultInfo.
644EMPTY = ResultInfo(parent_dir='',
645                   original_info={'': {utils_lib.ORIGINAL_SIZE_BYTES: 0,
646                                       utils_lib.DIRS: []}})
647
648
649def save_summary(summary, json_file):
650    """Save the given directory summary to a file.
651
652    @param summary: A ResultInfo object for a result directory.
653    @param json_file: Path to a json file to save to.
654    """
655    with open(json_file, 'w') as f:
656        json.dump(summary, f)
657
658
659def load_summary_json_file(json_file):
660    """Load result info from the given json_file.
661
662    @param json_file: Path to a json file containing a directory summary.
663    @return: A ResultInfo object containing the directory summary.
664    """
665    with open(json_file, 'r') as f:
666        summary = json.load(f)
667
668    # Convert summary to ResultInfo objects
669    result_dir = os.path.dirname(json_file)
670    return ResultInfo(parent_dir=result_dir, original_info=summary)
671