1#!/usr/bin/python
2# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""
7This is a utility to build a summary of the given directory. and save to a json
8file.
9
10usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB]
11
12optional arguments:
13  -p PATH         Path to build directory summary.
14  -m MAX_SIZE_KB  Maximum result size in KB. Set to 0 to disable result
15                  throttling.
16
17The content of the json file looks like:
18{'default': {'/D': [{'control': {'/S': 734}},
19                    {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}},
20                                       {'client.0.ERROR': {'/S': 254}},
21                                       {'client.0.INFO': {'/S': 1020}},
22                                       {'client.0.WARNING': {'/S': 242}}],
23                               '/S': 7214}}
24                      ],
25              '/S': 7948
26            }
27}
28"""
29
30import argparse
31import copy
32import fnmatch
33import glob
34import json
35import logging
36import os
37import random
38import sys
39import time
40import traceback
41
42import dedupe_file_throttler
43import delete_file_throttler
44import result_info
45import shrink_file_throttler
46import throttler_lib
47import utils_lib
48import zip_file_throttler
49
50
51# Do NOT import autotest_lib modules here. This module can be executed without
52# dependency on other autotest modules. This is to keep the logic of result
53# trimming on the server side, instead of depending on the autotest client
54# module.
55
56DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
57SUMMARY_FILE_PATTERN = 'dir_summary_*.json'
58MERGED_SUMMARY_FILENAME = 'dir_summary_final.json'
59
60# Minimum disk space should be available after saving the summary file.
61MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
62
63# Autotest uses some state files to track process running state. The files are
64# deleted from test results. Therefore, these files can be ignored.
65FILES_TO_IGNORE = set([
66    'control.autoserv.state'
67])
68
69# Smallest file size to shrink to.
70MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024
71
72def get_unique_dir_summary_file(path):
73    """Get a unique file path to save the directory summary json string.
74
75    @param path: The directory path to save the summary file to.
76    """
77    summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
78    # Make sure the summary file name is unique.
79    file_name = os.path.join(path, summary_file)
80    if os.path.exists(file_name):
81        count = 1
82        name, ext = os.path.splitext(summary_file)
83        while os.path.exists(file_name):
84            file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
85            count += 1
86    return file_name
87
88
89def _preprocess_result_dir_path(path):
90    """Verify the result directory path is valid and make sure it ends with `/`.
91
92    @param path: A path to the result directory.
93    @return: A verified and processed path to the result directory.
94    @raise IOError: If the path doesn't exist.
95    @raise ValueError: If the path is not a directory.
96    """
97    if not os.path.exists(path):
98        raise IOError('Path %s does not exist.' % path)
99
100    if not os.path.isdir(path):
101        raise ValueError('The given path %s is a file. It must be a '
102                         'directory.' % path)
103
104    # Make sure the path ends with `/` so the root key of summary json is always
105    # utils_lib.ROOT_DIR ('')
106    if not path.endswith(os.sep):
107        path = path + os.sep
108
109    return path
110
111
112def _delete_missing_entries(summary_old, summary_new):
113    """Delete files/directories only exists in old summary.
114
115    When the new summary is final, i.e., it's built from the final result
116    directory, files or directories missing are considered to be deleted and
117    trimmed to size 0.
118
119    @param summary_old: Old directory summary.
120    @param summary_new: New directory summary.
121    """
122    new_files = summary_new.get_file_names()
123    old_files = summary_old.get_file_names()
124    for name in old_files:
125        old_file = summary_old.get_file(name)
126        if name not in new_files:
127            if old_file.is_dir:
128                # Trim sub-directories.
129                with old_file.disable_updating_parent_size_info():
130                    _delete_missing_entries(old_file, result_info.EMPTY)
131                old_file.update_sizes()
132            elif name in FILES_TO_IGNORE:
133                # Remove the file from the summary as it can be ignored.
134                summary_old.remove_file(name)
135            else:
136                with old_file.disable_updating_parent_size_info():
137                    # Before setting the trimmed size to 0, update the collected
138                    # size if it's not set yet.
139                    if not old_file.is_collected_size_recorded:
140                        old_file.collected_size = old_file.trimmed_size
141                    old_file.trimmed_size = 0
142        elif old_file.is_dir:
143            # If `name` is a directory in the old summary, but a file in the new
144            # summary, delete the entry in the old summary.
145            new_file = summary_new.get_file(name)
146            if not new_file.is_dir:
147                new_file = result_info.EMPTY
148            _delete_missing_entries(old_file, new_file)
149
150
151def _relocate_summary(result_dir, summary_file, summary):
152    """Update the given summary with the path relative to the result_dir.
153
154    @param result_dir: Path to the result directory.
155    @param summary_file: Path to the summary file.
156    @param summary: A directory summary inside the given result_dir or its
157            sub-directory.
158    @return: An updated summary with the path relative to the result_dir.
159    """
160    sub_path = os.path.dirname(summary_file).replace(
161            result_dir.rstrip(os.sep), '')
162    if sub_path == '':
163        return summary
164
165    folders = sub_path.split(os.sep)
166
167    # The first folder is always '' because of the leading `/` in sub_path.
168    parent = result_info.ResultInfo(
169            result_dir, utils_lib.ROOT_DIR, parent_result_info=None)
170    root = parent
171
172    # That makes sure root has only one folder of utils_lib.ROOT_DIR.
173    for i in range(1, len(folders)):
174        child = result_info.ResultInfo(
175                parent.path, folders[i], parent_result_info=parent)
176        if i == len(folders) - 1:
177            # Add files in summary to child.
178            for info in summary.files:
179                child.files.append(info)
180
181        parent.files.append(child)
182        parent = child
183
184    parent.update_sizes()
185    return root
186
187
188def merge_summaries(path):
189    """Merge all directory summaries in the given path.
190
191    This function calculates the total size of result files being collected for
192    the test device and the files generated on the drone. It also returns merged
193    directory summary.
194
195    @param path: A path to search for directory summaries.
196    @return a tuple of (client_collected_bytes, merged_summary, files):
197            client_collected_bytes: The total size of results collected from
198                the DUT. The number can be larger than the total file size of
199                the given path, as files can be overwritten or removed.
200            merged_summary: The merged directory summary of the given path.
201            files: All summary files in the given path, including
202                sub-directories.
203    """
204    path = _preprocess_result_dir_path(path)
205    # Find all directory summary files and sort them by the time stamp in file
206    # name.
207    summary_files = []
208    for root, _, filenames in os.walk(path):
209        for filename in fnmatch.filter(filenames, 'dir_summary_*.json'):
210            summary_files.append(os.path.join(root, filename))
211    summary_files = sorted(summary_files, key=os.path.getmtime)
212
213    all_summaries = []
214    for summary_file in summary_files:
215        try:
216            summary = result_info.load_summary_json_file(summary_file)
217            summary = _relocate_summary(path, summary_file, summary)
218            all_summaries.append(summary)
219        except (IOError, ValueError) as e:
220            utils_lib.LOG('Failed to load summary file %s Error: %s' %
221                          (summary_file, e))
222
223    # Merge all summaries.
224    merged_summary = all_summaries[0] if len(all_summaries) > 0 else None
225    for summary in all_summaries[1:]:
226        merged_summary.merge(summary)
227    # After all summaries from the test device (client side) are merged, we can
228    # get the total size of result files being transfered from the test device.
229    # If there is no directory summary collected, default client_collected_bytes
230    # to 0.
231    client_collected_bytes = 0
232    if merged_summary:
233        client_collected_bytes = merged_summary.collected_size
234
235    # Get the summary of current directory
236    last_summary = result_info.ResultInfo.build_from_path(path)
237
238    if merged_summary:
239        merged_summary.merge(last_summary, is_final=True)
240        _delete_missing_entries(merged_summary, last_summary)
241    else:
242        merged_summary = last_summary
243
244    return client_collected_bytes, merged_summary, summary_files
245
246
247def _throttle_results(summary, max_result_size_KB):
248    """Throttle the test results by limiting to the given maximum size.
249
250    @param summary: A ResultInfo object containing result summary.
251    @param max_result_size_KB: Maximum test result size in KB.
252    """
253    if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
254        utils_lib.LOG(
255                'Result size is %s, which is less than %d KB. No need to '
256                'throttle.' %
257                (utils_lib.get_size_string(summary.trimmed_size),
258                 max_result_size_KB))
259        return
260
261    args = {'summary': summary,
262            'max_result_size_KB': max_result_size_KB}
263    args_skip_autotest_log = copy.copy(args)
264    args_skip_autotest_log['skip_autotest_log'] = True
265    # Apply the throttlers in following order.
266    throttlers = [
267            (shrink_file_throttler, copy.copy(args_skip_autotest_log)),
268            (zip_file_throttler, copy.copy(args_skip_autotest_log)),
269            (shrink_file_throttler, copy.copy(args)),
270            (dedupe_file_throttler, copy.copy(args)),
271            (zip_file_throttler, copy.copy(args)),
272            ]
273
274    # Add another zip_file_throttler to compress the files being shrunk.
275    # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of
276    # shrink_file_throttler.
277    new_args = copy.copy(args)
278    new_args['file_size_threshold_byte'] = 50 * 1024
279    throttlers.append((zip_file_throttler, new_args))
280
281    # If the above throttlers still can't reduce the result size to be under
282    # max_result_size_KB, try to delete files with various threshold, starting
283    # at 5MB then lowering to 100KB.
284    delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024]
285    # Try to keep tgz files first.
286    exclude_file_patterns = ['.*\.tgz']
287    for threshold in delete_file_thresholds:
288        new_args = copy.copy(args)
289        new_args.update({'file_size_threshold_byte': threshold,
290                         'exclude_file_patterns': exclude_file_patterns})
291        throttlers.append((delete_file_throttler, new_args))
292    # Add one more delete_file_throttler to not skipping tgz files.
293    new_args = copy.copy(args)
294    new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]})
295    throttlers.append((delete_file_throttler, new_args))
296
297    # Run the throttlers in order until result size is under max_result_size_KB.
298    old_size = summary.trimmed_size
299    for throttler, args in throttlers:
300        try:
301            args_without_summary = copy.copy(args)
302            del args_without_summary['summary']
303            utils_lib.LOG('Applying throttler %s, args: %s' %
304                          (throttler.__name__, args_without_summary))
305            throttler.throttle(**args)
306            if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
307                return
308        except:
309            utils_lib.LOG('Failed to apply throttler %s. Exception: %s' %
310                          (throttler, traceback.format_exc()))
311        finally:
312            new_size = summary.trimmed_size
313            if new_size == old_size:
314                utils_lib.LOG('Result size was not changed: %s.' % old_size)
315            else:
316                utils_lib.LOG('Result size was reduced from %s to %s.' %
317                              (utils_lib.get_size_string(old_size),
318                               utils_lib.get_size_string(new_size)))
319
320
321def _setup_logging():
322    """Set up logging to direct logs to stdout."""
323    # Direct logging to stdout
324    logger = logging.getLogger()
325    logger.setLevel(logging.DEBUG)
326    handler = logging.StreamHandler(sys.stdout)
327    handler.setLevel(logging.DEBUG)
328    formatter = logging.Formatter('%(asctime)s %(message)s')
329    handler.setFormatter(formatter)
330    logger.handlers = []
331    logger.addHandler(handler)
332
333
334def _parse_options():
335    """Options for the main script.
336
337    @return: An option object container arg values.
338    """
339    parser = argparse.ArgumentParser()
340    parser.add_argument('-p', type=str, dest='path',
341                        help='Path to build directory summary.')
342    parser.add_argument('-m', type=int, dest='max_size_KB', default=0,
343                        help='Maximum result size in KB. Set to 0 to disable '
344                        'result throttling.')
345    parser.add_argument('-d', action='store_true', dest='delete_summaries',
346                        default=False,
347                        help='-d to delete all result summary files in the '
348                        'given path.')
349    return parser.parse_args()
350
351
352def execute(path, max_size_KB):
353    """Execute the script with given arguments.
354
355    @param path: Path to build directory summary.
356    @param max_size_KB: Maximum result size in KB.
357    """
358    utils_lib.LOG('Running result_tools/utils on path: %s' % path)
359    if max_size_KB > 0:
360        utils_lib.LOG('Throttle result size to : %s' %
361                      utils_lib.get_size_string(max_size_KB * 1024))
362
363    result_dir = path
364    if not os.path.isdir(result_dir):
365        result_dir = os.path.dirname(result_dir)
366    summary = result_info.ResultInfo.build_from_path(path)
367    summary_json = json.dumps(summary)
368    summary_file = get_unique_dir_summary_file(result_dir)
369
370    # Make sure there is enough free disk to write the file
371    stat = os.statvfs(path)
372    free_space = stat.f_frsize * stat.f_bavail
373    if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
374        raise utils_lib.NotEnoughDiskError(
375                'Not enough disk space after saving the summary file. '
376                'Available free disk: %s bytes. Summary file size: %s bytes.' %
377                (free_space, len(summary_json)))
378
379    with open(summary_file, 'w') as f:
380        f.write(summary_json)
381    utils_lib.LOG('Directory summary of %s is saved to file %s.' %
382                  (path, summary_file))
383
384    if max_size_KB > 0 and summary.trimmed_size > 0:
385        old_size = summary.trimmed_size
386        throttle_probability = float(max_size_KB * 1024) / old_size
387        if random.random() < throttle_probability:
388            utils_lib.LOG(
389                    'Skip throttling %s: size=%s, throttle_probability=%s' %
390                    (path, old_size, throttle_probability))
391        else:
392            _throttle_results(summary, max_size_KB)
393            if summary.trimmed_size < old_size:
394                # Files are throttled, save the updated summary file.
395                utils_lib.LOG('Overwrite the summary file: %s' % summary_file)
396                result_info.save_summary(summary, summary_file)
397
398
399def _delete_summaries(path):
400    """Delete all directory summary files in the given directory.
401
402    This is to cleanup the directory so no summary files are left behind to
403    affect later tests.
404
405    @param path: Path to cleanup directory summary.
406    """
407    # Only summary files directly under the `path` needs to be cleaned.
408    summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN))
409    for summary in summary_files:
410        try:
411            os.remove(summary)
412        except IOError as e:
413            utils_lib.LOG('Failed to delete summary: %s. Error: %s' %
414                          (summary, e))
415
416
417def main():
418    """main script. """
419    _setup_logging()
420    options = _parse_options()
421    if options.delete_summaries:
422        _delete_summaries(options.path)
423    else:
424        execute(options.path, options.max_size_KB)
425
426
427if __name__ == '__main__':
428    main()
429