utils.py revision e2e995d750616022e2d5cfeda5d9eb2bcd78df2d
1#!/usr/bin/python
2# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""
7This is a utility to build a summary of the given directory. and save to a json
8file.
9
10Example usage:
11    result_utils.py -p path
12
13The content of the json file looks like:
14{'default': {'/D': {'control': {'/S': 734},
15                      'debug': {'/D': {'client.0.DEBUG': {'/S': 5698},
16                                       'client.0.ERROR': {'/S': 254},
17                                       'client.0.INFO': {'/S': 1020},
18                                       'client.0.WARNING': {'/S': 242}},
19                                '/S': 7214}
20                      },
21              '/S': 7948
22            }
23}
24"""
25
26import argparse
27import copy
28import glob
29import json
30import logging
31import os
32import time
33
34import utils_lib
35
36
37# Do NOT import autotest_lib modules here. This module can be executed without
38# dependency on other autotest modules. This is to keep the logic of result
39# trimming on the server side, instead of depending on the autotest client
40# module.
41
42DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
43# Minimum disk space should be available after saving the summary file.
44MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
45
46# Autotest uses some state files to track process running state. The files are
47# deleted from test results. Therefore, these files can be ignored.
48FILES_TO_IGNORE = set([
49    'control.autoserv.state'
50])
51
52def get_unique_dir_summary_file(path):
53    """Get a unique file path to save the directory summary json string.
54
55    @param path: The directory path to save the summary file to.
56    """
57    summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
58    # Make sure the summary file name is unique.
59    file_name = os.path.join(path, summary_file)
60    if os.path.exists(file_name):
61        count = 1
62        name, ext = os.path.splitext(summary_file)
63        while os.path.exists(file_name):
64            file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
65            count += 1
66    return file_name
67
68
69def get_dir_summary(path, top_dir, all_dirs=set()):
70    """Get the directory summary for the given path.
71
72    @param path: The directory to collect summary.
73    @param top_dir: The top directory to collect summary. This is to check if a
74            directory is a subdir of the original directory to collect summary.
75    @param all_dirs: A set of paths that have been collected. This is to prevent
76            infinite recursive call caused by symlink.
77
78    @return: A dictionary of the directory summary.
79    """
80    dir_info = {}
81    dir_info[utils_lib.ORIGINAL_SIZE_BYTES] = 0
82    summary = {os.path.basename(path): dir_info}
83
84    if os.path.isfile(path):
85        dir_info[utils_lib.ORIGINAL_SIZE_BYTES] = os.stat(path).st_size
86    else:
87        dir_info[utils_lib.DIRS] = {}
88        real_path = os.path.realpath(path)
89        # The assumption here is that results are copied back to drone by
90        # copying the symlink, not the content, which is true with currently
91        # used rsync in cros_host.get_file call.
92        # Skip scanning the child folders if any of following condition is true:
93        # 1. The directory is a symlink and link to a folder under `top_dir`.
94        # 2. The directory was scanned already.
95        if ((os.path.islink(path) and real_path.startswith(top_dir)) or
96            real_path in all_dirs):
97            return summary
98
99        all_dirs.add(real_path)
100        for f in sorted(os.listdir(path)):
101            f_summary = get_dir_summary(os.path.join(path, f), top_dir,
102                                        all_dirs)
103            dir_info[utils_lib.DIRS][f] = f_summary[f]
104            dir_info[utils_lib.ORIGINAL_SIZE_BYTES] += (
105                    f_summary[f][utils_lib.ORIGINAL_SIZE_BYTES])
106
107    return summary
108
109
110def build_summary_json(path):
111    """Build summary of files in the given path and return a json string.
112
113    @param path: The directory to build summary.
114    @return: A json string of the directory summary.
115    @raise IOError: If the given path doesn't exist.
116    """
117    if not os.path.exists(path):
118        raise IOError('Path %s does not exist.' % path)
119
120    if not os.path.isdir(path):
121        raise ValueError('The given path %s is a file. It must be a '
122                         'directory.' % path)
123
124    # Make sure the path ends with `/` so the root key of summary json is always
125    # utils_lib.ROOT_DIR ('')
126    if not path.endswith(os.sep):
127        path = path + os.sep
128
129    return get_dir_summary(path, top_dir=path)
130
131
132def _update_sizes(entry):
133    """Update a directory entry's sizes.
134
135    Values of ORIGINAL_SIZE_BYTES, TRIMMED_SIZE_BYTES and COLLECTED_SIZE_BYTES
136    are re-calculated based on the files under the directory. If the entry is a
137    file, skip the updating.
138
139    @param entry: A dict of directory entry in a summary.
140    """
141    if utils_lib.DIRS not in entry:
142        return
143
144    entry[utils_lib.ORIGINAL_SIZE_BYTES] = sum([
145            entry[utils_lib.DIRS][s][utils_lib.ORIGINAL_SIZE_BYTES]
146            for s in entry[utils_lib.DIRS]])
147    # Before trimming is implemented, COLLECTED_SIZE_BYTES and
148    # TRIMMED_SIZE_BYTES have the same value of ORIGINAL_SIZE_BYTES.
149    entry[utils_lib.COLLECTED_SIZE_BYTES] = sum([
150            entry[utils_lib.DIRS][s].get(
151                utils_lib.COLLECTED_SIZE_BYTES,
152                entry[utils_lib.DIRS][s].get(
153                    utils_lib.TRIMMED_SIZE_BYTES,
154                    entry[utils_lib.DIRS][s][utils_lib.ORIGINAL_SIZE_BYTES]))
155            for s in entry[utils_lib.DIRS]])
156    entry[utils_lib.TRIMMED_SIZE_BYTES] = sum([
157            entry[utils_lib.DIRS][s].get(
158                    utils_lib.TRIMMED_SIZE_BYTES,
159                    entry[utils_lib.DIRS][s][utils_lib.ORIGINAL_SIZE_BYTES])
160            for s in entry[utils_lib.DIRS]])
161
162
163def _delete_missing_entries(summary_old, summary_new):
164    """Delete files/directories only exists in old summary.
165
166    When the new summary is final, i.e., it's built from the final result
167    directory, files or directories missing are considered to be deleted and
168    trimmed to size 0.
169
170    @param summary_old: Old directory summary.
171    @param summary_new: New directory summary.
172    """
173    for name in summary_old.keys():
174        if name not in summary_new:
175            if utils_lib.DIRS in summary_old[name]:
176                # Trim sub-directories.
177                _delete_missing_entries(summary_old[name][utils_lib.DIRS], {})
178                _update_sizes(summary_old[name])
179            elif name in FILES_TO_IGNORE:
180                # Remove the file from the summary as it can be ignored.
181                del summary_old[name]
182            else:
183                # Before setting the trimmed size to 0, update the collected
184                # size if it's not set yet.
185                if utils_lib.COLLECTED_SIZE_BYTES not in summary_old[name]:
186                    trimmed_size = summary_old[name].get(
187                            utils_lib.TRIMMED_SIZE_BYTES,
188                            summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES])
189                    summary_old[name][utils_lib.COLLECTED_SIZE_BYTES] = (
190                            trimmed_size)
191                summary_old[name][utils_lib.TRIMMED_SIZE_BYTES] = 0
192        elif utils_lib.DIRS in summary_old[name]:
193            _delete_missing_entries(summary_old[name][utils_lib.DIRS],
194                                    summary_new[name][utils_lib.DIRS])
195            _update_sizes(summary_old[name])
196    _update_sizes(summary_old)
197
198
199def _merge(summary_old, summary_new, is_final=False):
200    """Merge a new directory summary to an old one.
201
202    Update the old directory summary with the new summary. Also calculate the
203    total size of results collected from the client side.
204
205    When merging with previously collected results, any results not existing in
206    the new summary or files with size different from the new files collected
207    are considered as extra results collected or overwritten by the new results.
208    Therefore, the size of the collected result should include such files, and
209    the COLLECTED_SIZE_BYTES can be larger than TRIMMED_SIZE_BYTES.
210    As an example:
211    summary_old: {'file1': {TRIMMED_SIZE_BYTES: 1000,
212                            ORIGINAL_SIZE_BYTES: 1000,
213                            COLLECTED_SIZE_BYTES: 1000}}
214    This means a result `file1` of original size 1KB was collected with size of
215    1KB byte.
216    summary_new: {'file1': {TRIMMED_SIZE_BYTES: 1000,
217                            ORIGINAL_SIZE_BYTES: 2000,
218                            COLLECTED_SIZE_BYTES: 1000}}
219    This means a result `file1` of 2KB was trimmed down to 1KB and was collected
220    with size of 1KB byte.
221    Note that the second result collection has an updated result `file1`
222    (because of the different ORIGINAL_SIZE_BYTES), and it needs to be rsync-ed
223    to the drone. Therefore, the merged summary will be:
224    {'file1': {TRIMMED_SIZE_BYTES: 1000,
225               ORIGINAL_SIZE_BYTES: 2000,
226               COLLECTED_SIZE_BYTES: 2000}}
227    Note that:
228    * TRIMMED_SIZE_BYTES is still at 1KB, which reflects the actual size of the
229      file be collected.
230    * ORIGINAL_SIZE_BYTES is updated to 2KB, which is the size of the file in
231      the new result `file1`.
232    * COLLECTED_SIZE_BYTES is 2KB because rsync will copy `file1` twice as it's
233      changed.
234
235    @param summary_old: Old directory summary.
236    @param summary_new: New directory summary.
237    @param is_final: True if summary_new is built from the final result folder.
238            Default is set to False.
239    @return: A tuple of (bytes_diff, merged_summary):
240            bytes_diff: The size of results collected based on the diff of the
241                old summary and the new summary.
242            merged_summary: Merged directory summary.
243    """
244    for name in summary_new:
245        if not name in summary_old:
246            # A file/dir exists in new client dir, but not in the old one, which
247            # means that the file or a directory is newly collected.
248            summary_old[name] = copy.deepcopy(summary_new[name])
249        elif utils_lib.DIRS in summary_new[name]:
250            # `name` is a directory in new summary, merge the directories of the
251            # old and new summaries under `name`.
252
253            if utils_lib.DIRS not in summary_old[name]:
254                # If `name` is a file in old summary but a directory in new
255                # summary, the file in the old summary will be overwritten by
256                # the new directory by rsync. Therefore, force it to be an empty
257                # directory in old summary, so that the new directory can be
258                # merged.
259                summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES] = 0
260                summary_old[name][utils_lib.TRIMMED_SIZE_BYTES] = 0
261                summary_old[name][utils_lib.COLLECTED_SIZE_BYTES] = 0
262                summary_old[name][utils_lib.DIRS] = {}
263
264            _merge(summary_old[name][utils_lib.DIRS],
265                   summary_new[name][utils_lib.DIRS], is_final)
266        else:
267            # `name` is a file. Compare the original size, if they are
268            # different, the file was overwritten, so increment the
269            # COLLECTED_SIZE_BYTES.
270
271            if utils_lib.DIRS in summary_old[name]:
272                # If `name` is a directory in old summary, but a file in the new
273                # summary, rsync will fail to copy the file as it can't
274                # overwrite an directory. Therefore, skip the merge.
275                continue
276
277            new_size = summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES]
278            old_size = summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES]
279            new_trimmed_size = summary_new[name].get(
280                    utils_lib.TRIMMED_SIZE_BYTES,
281                    summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES])
282            old_trimmed_size = summary_old[name].get(
283                    utils_lib.TRIMMED_SIZE_BYTES,
284                    summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES])
285            if new_size != old_size:
286                if is_final and new_trimmed_size == old_trimmed_size:
287                    # If the file is merged from the final result folder to an
288                    # older summary, it's not considered to be trimmed if the
289                    # size is not changed. The reason is that the file on the
290                    # server side does not have the info of its original size.
291                    continue
292
293                # Before trimming is implemented, COLLECTED_SIZE_BYTES is the
294                # value of ORIGINAL_SIZE_BYTES.
295                new_collected_size = summary_new[name].get(
296                        utils_lib.COLLECTED_SIZE_BYTES,
297                        summary_new[name].get(
298                            utils_lib.TRIMMED_SIZE_BYTES,
299                            summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES]))
300                old_collected_size = summary_old[name].get(
301                        utils_lib.COLLECTED_SIZE_BYTES,
302                        summary_old[name].get(
303                            utils_lib.TRIMMED_SIZE_BYTES,
304                            summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES]))
305
306                summary_old[name][utils_lib.COLLECTED_SIZE_BYTES] = (
307                        new_collected_size + old_collected_size)
308                summary_old[name][utils_lib.TRIMMED_SIZE_BYTES] = (
309                        summary_new[name].get(
310                            utils_lib.TRIMMED_SIZE_BYTES,
311                            summary_new[name][utils_lib.ORIGINAL_SIZE_BYTES]))
312                summary_old[name][utils_lib.ORIGINAL_SIZE_BYTES] = new_size
313
314        # Update COLLECTED_SIZE_BYTES and ORIGINAL_SIZE_BYTES based on the
315        # merged directory summary.
316        _update_sizes(summary_old[name])
317
318
319def merge_summaries(path):
320    """Merge all directory summaries in the given path.
321
322    This function calculates the total size of result files being collected for
323    the test device and the files generated on the drone. It also returns merged
324    directory summary.
325
326    @param path: A path to search for directory summaries.
327    @return a tuple of (client_collected_bytes, merged_summary):
328            client_collected_bytes: The total size of results collected from
329                the DUT. The number can be larger than the total file size of
330                the given path, as files can be overwritten or removed.
331            merged_summary: The merged directory summary of the given path.
332    """
333    # Find all directory summary files and sort them by the time stamp in file
334    # name.
335    summary_files = glob.glob(os.path.join(path, 'dir_summary_*.json'))
336    summary_files = sorted(summary_files, key=os.path.getmtime)
337
338    all_summaries = []
339    for summary_file in summary_files:
340        with open(summary_file) as f:
341            all_summaries.append(json.load(f))
342
343    # Merge all summaries.
344    merged_summary = (copy.deepcopy(all_summaries[0]) if len(all_summaries) > 0
345                      else {})
346    for summary in all_summaries[1:]:
347        _merge(merged_summary, summary)
348    # After all summaries from the test device (client side) are merged, we can
349    # get the total size of result files being transfered from the test device.
350    # If there is no directory summary collected, default client_collected_bytes
351    # to 0.
352    client_collected_bytes = 0
353    if merged_summary:
354        client_collected_bytes = (
355            merged_summary[utils_lib.ROOT_DIR][utils_lib.COLLECTED_SIZE_BYTES])
356
357    # Get the summary of current directory
358
359    # Make sure the path ends with /, so the top directory in the summary will
360    # be '', which is consistent with other summaries.
361    if not path.endswith(os.sep):
362        path += os.sep
363
364    last_summary = get_dir_summary(path, top_dir=path)
365    _merge(merged_summary, last_summary, is_final=True)
366    _delete_missing_entries(merged_summary, last_summary)
367
368    return client_collected_bytes, merged_summary
369
370
371def main():
372    """main script. """
373    parser = argparse.ArgumentParser()
374    parser.add_argument('-p', type=str, dest='path',
375                        help='Path to build directory summary.')
376    parser.add_argument('-m', type=int, dest='max_size_KB', default=0,
377                        help='Maximum result size in KB. Set to 0 to disable '
378                        'result throttling.')
379    options = parser.parse_args()
380
381    summary = build_summary_json(options.path)
382    summary_json = json.dumps(summary)
383    summary_file = get_unique_dir_summary_file(options.path)
384
385    # Make sure there is enough free disk to write the file
386    stat = os.statvfs(options.path)
387    free_space = stat.f_frsize * stat.f_bavail
388    if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
389        raise IOError('Not enough disk space after saving the summary file. '
390                      'Available free disk: %s bytes. Summary file size: %s '
391                      'bytes.' % (free_space, len(summary_json)))
392
393    with open(summary_file, 'w') as f:
394        f.write(summary_json)
395    logging.info('Directory summary of %s is saved to file %s.', options.path,
396                 summary_file)
397
398
399if __name__ == '__main__':
400    main()
401