1#!/usr/bin/env python
2
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""
8This module is used to upload csv files generated by performance related tests
9to cns. More details about the implementation can be found in crbug.com/598504.
10
11The overall work flow is as follows.
121. Query tko_test_attributes table for perf_csv_folder attribute. The attribute
13contains a path to csv files need to be uploaded to cns.
142. Filter the perf_csv_folder attributes only for test jobs have finished an
15hour before. This is to make sure the results have already being uploaded to GS.
163. Locate the csv files in GS, and upload them to desired cns location.
17
18After every run, the script saves the maximum test idx to a local file, and
19repeats the workflow.
20
21"""
22
23import argparse
24import datetime
25import logging
26import os
27import shutil
28import tempfile
29import time
30
31import common
32from autotest_lib.client.bin import utils
33from autotest_lib.client.common_lib import logging_config
34from autotest_lib.client.common_lib.cros import retry
35from autotest_lib.frontend import setup_django_environment
36from autotest_lib.frontend.tko import models as tko_models
37
38
39# Number of hours that a test has to be finished for the script to process.
40# This allows gs_offloader to have enough time to upload the results to GS.
41CUTOFF_TIME_HOURS = 1
42
43# Default wait time in seconds after each run.
44DEFAULT_INTERVAL_SEC = 60
45
46# Timeout in minutes for upload attempts for a given folder.
47UPLOAD_TIMEOUT_MINS = 5
48
49class CsvNonexistenceException(Exception):
50    """Exception raised when csv files not found in GS."""
51
52
53class CsvFolder(object):
54    """A class contains the information of a folder storing csv files to be
55    uploaded, and logic to upload the csv files.
56    """
57
58    # A class variable whose value is the GoogleStorage path to the test
59    # results.
60    gs_path = None
61
62    # A class variable whose value is the cns path to upload the csv files to.
63    cns_path = None
64
65    def __init__(self, test_attribute_id, perf_csv_folder, test_view):
66        """Initialize a CsvFolder object.
67
68        @param test_attribute_id: ID of test attribute record.
69        @param perf_csv_folder: Path of the folder contains csv files in test
70                results. It's the value of perf_csv_folder attribute from
71                tko_test_attributes table.
72        @param test_view: A db object from querying tko_test_view_2 for the
73                related tko_test_attributes.
74        """
75        self.test_attribute_id = test_attribute_id
76        self.perf_csv_folder = perf_csv_folder
77        self.test_view = test_view
78
79
80    def __str__(self):
81        return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag,
82                             self.perf_csv_folder)
83
84
85    def _get_url(self):
86        """Get the url to the folder storing csv files in GS.
87
88        The url can be formulated based on csv folder, test_name and hostname.
89        For example:
90        gs://chromeos-autotest-results/123-chromeos-test/host1/
91        gsutil is used to download the csv files with this gs url.
92        """
93        return os.path.join(self.gs_path, self.test_view.job_tag)
94
95
96    def _download(self, dest_dir):
97        """Download the folder containing csv files to the given dest_dir.
98
99        @param dest_dir: A directory to store the downloaded csv files.
100
101        @return: A list of strings, each is a path to a csv file in the
102                 downloaded folder.
103        @raise CsvNonexistenceException: If no csv file found in the GS.
104        """
105        gs_url = self._get_url()
106        # Find all csv files in given GS url recursively
107        files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' %
108                          gs_url, ignore_status=True).stdout.strip().split('\n')
109        if not files or files == ['']:
110            raise CsvNonexistenceException('No csv file found in %s', gs_url)
111
112        # Copy files from GS to temp_dir
113        for f in files:
114            utils.run('gsutil cp %s %s' % (f, dest_dir))
115
116
117    @retry.retry(Exception, blacklist=[CsvNonexistenceException],
118                 timeout_min=UPLOAD_TIMEOUT_MINS)
119    def upload(self):
120        """Upload the folder to cns.
121        """
122        temp_dir = tempfile.mkdtemp(suffix='perf_csv')
123        try:
124            self._download(temp_dir)
125            files = os.listdir(temp_dir)
126            # File in cns is stored under folder with format of:
127            # <test_name>/<host_name>/YYYY/mm/dd/hh/mm
128            path_in_cns = os.path.join(
129                    self.cns_path,
130                    self.test_view.test_name, self.test_view.hostname,
131                    str(self.test_view.job_finished_time.year),
132                    str(self.test_view.job_finished_time.month).zfill(2),
133                    str(self.test_view.job_finished_time.day).zfill(2),
134                    str(self.test_view.job_finished_time.hour).zfill(2),
135                    str(self.test_view.job_finished_time.minute).zfill(2))
136            utils.run('fileutil mkdir -p %s' % path_in_cns)
137            for f in files:
138                utils.run('fileutil copytodir -f %s %s' %
139                          (os.path.join(temp_dir, f), path_in_cns))
140        finally:
141            shutil.rmtree(temp_dir)
142
143
144class DBScanner(object):
145    """Class contains the logic to query tko_test_attributes table for
146    new perf_csv_folder attributes and create CsvFolder object for each
147    new perf_csv_folder attribute.
148    """
149
150    # Minimum test_attribute id for querying tko_test_attributes table.
151    min_test_attribute_id = -1
152
153    @classmethod
154    def get_perf_csv_folders(cls):
155        """Query tko_test_attributes table for new entries of perf_csv_folder.
156
157        @return: A list of CsvFolder objects for each new entry of
158                 perf_csv_folder attribute in tko_test_attributes table.
159        """
160        attributes = tko_models.TestAttribute.objects.filter(
161                attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id)
162        folders = []
163
164        cutoff_time = (datetime.datetime.now() -
165                       datetime.timedelta(hours=CUTOFF_TIME_HOURS))
166        for attribute in attributes:
167            test_views = tko_models.TestView.objects.filter(
168                    test_idx=attribute.test_id)
169            if test_views[0].job_finished_time > cutoff_time:
170                continue
171            folders.append(CsvFolder(attribute.id, attribute.value,
172                                     test_views[0]))
173        return folders
174
175
176def setup_logging(log_dir):
177    """Setup logging information.
178
179    @param log_dir: Path to the directory storing logs of this script.
180    """
181    config = logging_config.LoggingConfig()
182    logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log')
183    config.add_file_handler(file_path=logfile, level=logging.DEBUG)
184
185
186def save_min_test_attribute_id(test_attribute_id_file):
187    """Save the minimum test attribute id to a cached file.
188
189    @param test_attribute_id_file: Path to the file storing the value of
190            min_test_attribute_id.
191    """
192    with open(test_attribute_id_file, 'w') as f:
193        return f.write(str(DBScanner.min_test_attribute_id))
194
195
196def get_min_test_attribute_id(test_attribute_id_file):
197    """Get the minimum test attribute id from a cached file.
198
199    @param test_attribute_id_file: Path to the file storing the value of
200            min_test_attribute_id.
201    """
202    try:
203        with open(test_attribute_id_file, 'r') as f:
204            return int(f.read())
205    except IOError:
206        # min_test_attribute_id has not been set, default to -1.
207        return -1
208
209
210def get_options():
211    """Get the command line options.
212
213    @return: Command line options of the script.
214    """
215    parser = argparse.ArgumentParser()
216    parser.add_argument('--gs_path', type=str, dest='gs_path',
217                        help='GoogleStorage path that stores test results.')
218    parser.add_argument('--cns_path', type=str, dest='cns_path',
219                        help='cns path to where csv files are uploaded to.')
220    parser.add_argument('--log_dir', type=str, dest='log_dir',
221                        help='Directory used to store logs.')
222
223    options = parser.parse_args()
224    CsvFolder.gs_path = options.gs_path
225    CsvFolder.cns_path = options.cns_path
226
227    return options
228
229
230def main():
231    """Main process to repeat the workflow of searching/uploading csv files.
232    """
233    options = get_options()
234    setup_logging(options.log_dir)
235    test_attribute_id_file = os.path.join(options.log_dir,
236                                          'perf_csv_uploader_test_attr_id')
237    DBScanner.min_test_attribute_id = get_min_test_attribute_id(
238            test_attribute_id_file)
239
240    while True:
241        folders = DBScanner.get_perf_csv_folders()
242        if not folders:
243            logging.info('No new folders found. Wait...')
244            time.sleep(DEFAULT_INTERVAL_SEC)
245            continue
246
247        failed_folders = []
248        for folder in folders:
249            try:
250                logging.info('Uploading folder: %s', folder)
251                folder.upload()
252            except CsvNonexistenceException:
253                # Ignore the failure if CSV files are not found in GS.
254                pass
255            except Exception as e:
256                failed_folders.append(folder)
257                logging.error('Failed to upload folder %s, error: %s',
258                              folder, e)
259        if failed_folders:
260            # Set the min_test_attribute_id to be the smallest one that failed
261            # to upload.
262            min_test_attribute_id = min([folder.test_attribute_id for folder in
263                                         failed_folders])
264        else:
265            min_test_attribute_id = max([folder.test_attribute_id for folder in
266                                         folders]) + 1
267        if DBScanner.min_test_attribute_id != min_test_attribute_id:
268            DBScanner.min_test_attribute_id = min_test_attribute_id
269            save_min_test_attribute_id(test_attribute_id_file)
270
271
272if __name__ == '__main__':
273    main()
274