1# Copyright 2015 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5# pylint: disable=module-missing-docstring
6# pylint: disable=docstring-section-name
7
8import csv
9import glob
10import httplib
11import json
12import logging
13import os
14import re
15import shutil
16import time
17import urllib
18import urllib2
19
20from autotest_lib.client.bin import site_utils
21from autotest_lib.client.bin import test
22from autotest_lib.client.bin import utils
23from autotest_lib.client.common_lib import error
24from autotest_lib.client.cros import constants
25
26# TODO(scunningham): Return to 72000 (20 hrs) after server-side stabilizes.
27TEST_DURATION = 10800  # Duration of test (3 hrs) in seconds.
28SAMPLE_INTERVAL = 60  # Length of measurement samples in seconds.
29METRIC_INTERVAL = 3600  # Length between metric calculation in seconds.
30STABILIZATION_DURATION = 60  # Time for test stabilization in seconds.
31TMP_DIRECTORY = '/tmp/'
32EXIT_FLAG_FILE = TMP_DIRECTORY + 'longevity_terminate'
33PERF_FILE_NAME_PREFIX = 'perf'
34OLD_FILE_AGE = 14400  # Age of old files to be deleted in minutes = 10 days.
35# The manifest.json file for a Chrome Extension contains the app name, id,
36# version, and other app info. It is accessible by the OS only when the app
37# is running, and thus it's cryptohome directory mounted. Only one Kiosk app
38# can be running at a time.
39MANIFEST_PATTERN = '/home/.shadow/*/mount/user/Extensions/%s/*/manifest.json'
40VERSION_PATTERN = r'^(\d+)\.(\d+)\.(\d+)\.(\d+)$'
41DASHBOARD_UPLOAD_URL = 'https://chromeperf.appspot.com/add_point'
42
43
44class PerfUploadingError(Exception):
45    """Exception raised in perf_uploader."""
46    pass
47
48
49class longevity_Tracker(test.test):
50    """Monitor device and App stability over long periods of time."""
51
52    version = 1
53
54    def initialize(self):
55        self.temp_dir = os.path.split(self.tmpdir)[0]
56
57    def _get_cpu_usage(self):
58        """Compute percent CPU in active use over the sample interval.
59
60        Note: This method introduces a sleep period into the test, equal to
61        90% of the sample interval.
62
63        @returns float of percent active use of CPU.
64        """
65        # Time between measurements is ~90% of the sample interval.
66        measurement_time_delta = SAMPLE_INTERVAL * 0.90
67        cpu_usage_start = site_utils.get_cpu_usage()
68        time.sleep(measurement_time_delta)
69        cpu_usage_end = site_utils.get_cpu_usage()
70        return site_utils.compute_active_cpu_time(cpu_usage_start,
71                                                  cpu_usage_end) * 100
72
73    def _get_mem_usage(self):
74        """Compute percent memory in active use.
75
76        @returns float of percent memory in use.
77        """
78        total_memory = site_utils.get_mem_total()
79        free_memory = site_utils.get_mem_free()
80        return ((total_memory - free_memory) / total_memory) * 100
81
82    def _get_max_temperature(self):
83        """Get temperature of hottest sensor in Celsius.
84
85        @returns float of temperature of hottest sensor.
86        """
87        temperature = utils.get_current_temperature_max()
88        if not temperature:
89            temperature = 0
90        return temperature
91
92    def _get_hwid(self):
93        """Get hwid of test device, e.g., 'WOLF C4A-B2B-A47'.
94
95        @returns string of hwid (Hardware ID) of device under test.
96        """
97        with os.popen('crossystem hwid 2>/dev/null', 'r') as hwid_proc:
98            hwid = hwid_proc.read()
99        if not hwid:
100            hwid = 'undefined'
101        return hwid
102
103    def elapsed_time(self, mark_time):
104        """Get time elapsed since |mark_time|.
105
106        @param mark_time: point in time from which elapsed time is measured.
107        @returns time elapsed since the marked time.
108        """
109        return time.time() - mark_time
110
111    def modulo_time(self, timer, interval):
112        """Get time eplased on |timer| for the |interval| modulus.
113
114        Value returned is used to adjust the timer so that it is synchronized
115        with the current interval.
116
117        @param timer: time on timer, in seconds.
118        @param interval: period of time in seconds.
119        @returns time elapsed from the start of the current interval.
120        """
121        return timer % int(interval)
122
123    def syncup_time(self, timer, interval):
124        """Get time remaining on |timer| for the |interval| modulus.
125
126        Value returned is used to induce sleep just long enough to put the
127        process back in sync with the timer.
128
129        @param timer: time on timer, in seconds.
130        @param interval: period of time in seconds.
131        @returns time remaining till the end of the current interval.
132        """
133        return interval - (timer % int(interval))
134
135    def _record_perf_measurements(self, perf_values, perf_writer):
136        """Record attribute performance measurements, and write to file.
137
138        @param perf_values: dict of attribute performance values.
139        @param perf_writer: file to write performance measurements.
140        """
141        # Get performance measurements.
142        cpu_usage = '%.3f' % self._get_cpu_usage()
143        mem_usage = '%.3f' % self._get_mem_usage()
144        max_temp = '%.3f' % self._get_max_temperature()
145
146        # Append measurements to attribute lists in perf values dictionary.
147        perf_values['cpu'].append(cpu_usage)
148        perf_values['mem'].append(mem_usage)
149        perf_values['temp'].append(max_temp)
150
151        # Write performance measurements to perf timestamped file.
152        time_stamp = time.strftime('%Y/%m/%d %H:%M:%S')
153        perf_writer.writerow([time_stamp, cpu_usage, mem_usage, max_temp])
154        logging.info('Time: %s, CPU: %s, Mem: %s, Temp: %s',
155                     time_stamp, cpu_usage, mem_usage, max_temp)
156
157    def _record_90th_metrics(self, perf_values, perf_metrics):
158        """Record 90th percentile metric of attribute performance values.
159
160        @param perf_values: dict attribute performance values.
161        @param perf_metrics: dict attribute 90%-ile performance metrics.
162        """
163        # Calculate 90th percentile for each attribute.
164        cpu_values = perf_values['cpu']
165        mem_values = perf_values['mem']
166        temp_values = perf_values['temp']
167        cpu_metric = sorted(cpu_values)[(len(cpu_values) * 9) // 10]
168        mem_metric = sorted(mem_values)[(len(mem_values) * 9) // 10]
169        temp_metric = sorted(temp_values)[(len(temp_values) * 9) // 10]
170        logging.info('== Performance values: %s', perf_values)
171        logging.info('== 90th percentile: cpu: %s, mem: %s, temp: %s',
172                     cpu_metric, mem_metric, temp_metric)
173
174        # Append 90th percentile to each attribute performance metric.
175        perf_metrics['cpu'].append(cpu_metric)
176        perf_metrics['mem'].append(mem_metric)
177        perf_metrics['temp'].append(temp_metric)
178
179    def _get_median_metrics(self, metrics):
180        """Returns median of each attribute performance metric.
181
182        If no metric values were recorded, return 0 for each metric.
183
184        @param metrics: dict of attribute performance metric lists.
185        @returns dict of attribute performance metric medians.
186        """
187        if len(metrics['cpu']):
188            cpu_metric = sorted(metrics['cpu'])[len(metrics['cpu']) // 2]
189            mem_metric = sorted(metrics['mem'])[len(metrics['mem']) // 2]
190            temp_metric = sorted(metrics['temp'])[len(metrics['temp']) // 2]
191        else:
192            cpu_metric = 0
193            mem_metric = 0
194            temp_metric = 0
195        logging.info('== Median: cpu: %s, mem: %s, temp: %s',
196                     cpu_metric, mem_metric, temp_metric)
197        return {'cpu': cpu_metric, 'mem': mem_metric, 'temp': temp_metric}
198
199    def _append_to_aggregated_file(self, ts_file, ag_file):
200        """Append contents of perf timestamp file to perf aggregated file.
201
202        @param ts_file: file handle for performance timestamped file.
203        @param ag_file: file handle for performance aggregated file.
204        """
205        next(ts_file)  # Skip fist line (the header) of timestamped file.
206        for line in ts_file:
207            ag_file.write(line)
208
209    def _copy_aggregated_to_resultsdir(self, aggregated_fpath):
210        """Copy perf aggregated file to results dir for AutoTest results.
211
212        Note: The AutoTest results default directory is located at /usr/local/
213        autotest/results/default/longevity_Tracker/results
214
215        @param aggregated_fpath: file path to Aggregated performance values.
216        """
217        results_fpath = os.path.join(self.resultsdir, 'perf.csv')
218        shutil.copy(aggregated_fpath, results_fpath)
219        logging.info('Copied %s to %s)', aggregated_fpath, results_fpath)
220
221    def _write_perf_keyvals(self, perf_results):
222        """Write perf results to keyval file for AutoTest results.
223
224        @param perf_results: dict of attribute performance metrics.
225        """
226        perf_keyval = {}
227        perf_keyval['cpu_usage'] = perf_results['cpu']
228        perf_keyval['memory_usage'] = perf_results['mem']
229        perf_keyval['temperature'] = perf_results['temp']
230        self.write_perf_keyval(perf_keyval)
231
232    def _write_perf_results(self, perf_results):
233        """Write perf results to results-chart.json file for Perf Dashboard.
234
235        @param perf_results: dict of attribute performance metrics.
236        """
237        cpu_metric = perf_results['cpu']
238        mem_metric = perf_results['mem']
239        ec_metric = perf_results['temp']
240        self.output_perf_value(description='cpu_usage', value=cpu_metric,
241                               units='%', higher_is_better=False)
242        self.output_perf_value(description='mem_usage', value=mem_metric,
243                               units='%', higher_is_better=False)
244        self.output_perf_value(description='max_temp', value=ec_metric,
245                               units='Celsius', higher_is_better=False)
246
247    def _read_perf_results(self):
248        """Read perf results from results-chart.json file for Perf Dashboard.
249
250        @returns dict of perf results, formatted as JSON chart data.
251        """
252        results_file = os.path.join(self.resultsdir, 'results-chart.json')
253        with open(results_file, 'r') as fp:
254            contents = fp.read()
255            chart_data = json.loads(contents)
256        return chart_data
257
258    def _get_point_id(self, cros_version, epoch_minutes):
259        """Compute point ID from ChromeOS version number and epoch minutes.
260
261        @param cros_version: String of ChromeOS version number.
262        @param epoch_minutes: String of minutes since 1970.
263
264        @return unique integer ID computed from given version and epoch.
265        """
266        # Number of digits from each part of the Chrome OS version string.
267        cros_version_col_widths = [0, 4, 3, 2]
268
269        def get_digits(version_num, column_widths):
270            if re.match(VERSION_PATTERN, version_num):
271                computed_string = ''
272                version_parts = version_num.split('.')
273                for i, version_part in enumerate(version_parts):
274                    if column_widths[i]:
275                        computed_string += version_part.zfill(column_widths[i])
276                return computed_string
277            else:
278                return None
279
280        cros_digits = get_digits(cros_version, cros_version_col_widths)
281        epoch_digits = epoch_minutes[-8:]
282        if not cros_digits:
283            return None
284        return int(epoch_digits + cros_digits)
285
286    def _get_kiosk_app_info(self, app_id):
287        """Get kiosk app name and version from manifest.json file.
288
289        Get the Kiosk App name and version strings from the manifest file of
290        the specified |app_id| Extension in the currently running session. If
291        |app_id| is empty or None, then return 'none' for the kiosk app info.
292
293        Raise an error if no manifest is found (ie, |app_id| is not running),
294        or if multiple manifest files are found (ie, |app_id| is running, but
295        the |app_id| dir contains multiple versions or manifest files).
296
297        @param app_id: string kiosk application identification.
298        @returns dict of Kiosk name and version number strings.
299        @raises: An error.TestError if single manifest is not found.
300        """
301        kiosk_app_info = {'name': 'none', 'version': 'none'}
302        if not app_id:
303            return kiosk_app_info
304
305        # Get path to manifest file of the running Kiosk app_id.
306        app_manifest_pattern = (MANIFEST_PATTERN % app_id)
307        logging.info('app_manifest_pattern: %s', app_manifest_pattern)
308        file_paths = glob.glob(app_manifest_pattern)
309        # Raise error if current session has no Kiosk Apps running.
310        if len(file_paths) == 0:
311            raise error.TestError('Kiosk App ID=%s is not running.' % app_id)
312        # Raise error if running Kiosk App has multiple manifest files.
313        if len(file_paths) > 1:
314            raise error.TestError('Kiosk App ID=%s has multiple manifest '
315                                  'files.' % app_id)
316        kiosk_manifest = open(file_paths[0], 'r').read()
317        manifest_json = json.loads(kiosk_manifest)
318        # If manifest is missing name or version key, set to 'undefined'.
319        kiosk_app_info['name'] = manifest_json.get('name', 'undefined')
320        kiosk_app_info['version'] = manifest_json.get('version', 'undefined')
321        return kiosk_app_info
322
323    def _format_data_for_upload(self, chart_data):
324        """Collect chart data into an uploadable data JSON object.
325
326        @param chart_data: performance results formatted as chart data.
327        """
328        perf_values = {
329            'format_version': '1.0',
330            'benchmark_name': self.test_suite_name,
331            'charts': chart_data,
332        }
333
334        dash_entry = {
335            'master': 'ChromeOS_Enterprise',
336            'bot': 'cros-%s' % self.board_name,
337            'point_id': self.point_id,
338            'versions': {
339                'cros_version': self.chromeos_version,
340                'chrome_version': self.chrome_version,
341            },
342            'supplemental': {
343                'default_rev': 'r_cros_version',
344                'hardware_identifier': 'a_' + self.hw_id,
345                'kiosk_app_name': 'a_' + self.kiosk_app_name,
346                'kiosk_app_version': 'r_' + self.kiosk_app_version
347            },
348            'chart_data': perf_values
349        }
350        return {'data': json.dumps(dash_entry)}
351
352    def _send_to_dashboard(self, data_obj):
353        """Send formatted perf data to the perf dashboard.
354
355        @param data_obj: data object as returned by _format_data_for_upload().
356
357        @raises PerfUploadingError if an exception was raised when uploading.
358        """
359        logging.debug('data_obj: %s', data_obj)
360        encoded = urllib.urlencode(data_obj)
361        req = urllib2.Request(DASHBOARD_UPLOAD_URL, encoded)
362        try:
363            urllib2.urlopen(req)
364        except urllib2.HTTPError as e:
365            raise PerfUploadingError('HTTPError: %d %s for JSON %s\n' %
366                                     (e.code, e.msg, data_obj['data']))
367        except urllib2.URLError as e:
368            raise PerfUploadingError('URLError: %s for JSON %s\n' %
369                                     (str(e.reason), data_obj['data']))
370        except httplib.HTTPException:
371            raise PerfUploadingError('HTTPException for JSON %s\n' %
372                                     data_obj['data'])
373
374    def _get_chrome_version(self):
375        """Get the Chrome version number and milestone as strings.
376
377        Invoke "chrome --version" to get the version number and milestone.
378
379        @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
380            current Chrome version number as a string (in the form "W.X.Y.Z")
381            and "milestone" is the first component of the version number
382            (the "W" from "W.X.Y.Z").  If the version number cannot be parsed
383            in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
384            of "chrome --version" and the milestone will be the empty string.
385        """
386        chrome_version = utils.system_output(constants.CHROME_VERSION_COMMAND,
387                                             ignore_status=True)
388        chrome_version = utils.parse_chrome_version(chrome_version)
389        return chrome_version
390
391    def _open_perf_file(self, file_path):
392        """Open a perf file. Write header line if new. Return file object.
393
394        If the file on |file_path| already exists, then open file for
395        appending only. Otherwise open for writing only.
396
397        @param file_path: file path for perf file.
398        @returns file object for the perf file.
399        """
400        # If file exists, open it for appending. Do not write header.
401        if os.path.isfile(file_path):
402            perf_file = open(file_path, 'a+')
403        # Otherwise, create it for writing. Write header on first line.
404        else:
405            perf_file = open(file_path, 'w')  # Erase if existing file.
406            perf_file.write('Time,CPU,Memory,Temperature (C)\r\n')
407        return perf_file
408
409    def _run_test_cycle(self):
410        """Track performance of Chrome OS over a long period of time.
411
412        This method collects performance measurements, and calculates metrics
413        to upload to the performance dashboard. It creates two files to
414        collect and store performance values and results: perf_<timestamp>.csv
415        and perf_aggregated.csv.
416
417        At the start, it creates a unique perf timestamped file in the test's
418        temp_dir. As the cycle runs, it saves a time-stamped performance
419        value after each sample interval. Periodically, it calculates
420        the 90th percentile performance metrics from these values.
421
422        The perf_<timestamp> files on the device will survive multiple runs
423        of the longevity_Tracker by the server-side test, and will also
424        survive multiple runs of the server-side test. The script will
425        delete them after 10 days, to prevent filling up the SSD.
426
427        At the end, it opens the perf aggregated file in the test's temp_dir,
428        and appends the contents of the perf timestamped file. It then
429        copies the perf aggregated file to the results directory as perf.csv.
430        This perf.csv file will be consumed by the AutoTest backend when the
431        server-side test ends.
432
433        Note that the perf_aggregated.csv file will grow larger with each run
434        of longevity_Tracker on the device by the server-side test. However,
435        the server-side test will delete file in the end.
436
437        This method also calculates 90th percentile and median metrics, and
438        returns the median metrics. Median metrics will be pushed to the perf
439        dashboard with a unique point_id.
440
441        @returns list of median performance metrics.
442        """
443        # Allow system to stabilize before start taking measurements.
444        test_start_time = time.time()
445        time.sleep(STABILIZATION_DURATION)
446
447        perf_values = {'cpu': [], 'mem': [], 'temp': []}
448        perf_metrics = {'cpu': [], 'mem': [], 'temp': []}
449
450        # Create perf_<timestamp> file and writer.
451        timestamp_fname = (PERF_FILE_NAME_PREFIX +
452                           time.strftime('_%Y-%m-%d_%H-%M') + '.csv')
453        timestamp_fpath = os.path.join(self.temp_dir, timestamp_fname)
454        timestamp_file = self._open_perf_file(timestamp_fpath)
455        timestamp_writer = csv.writer(timestamp_file)
456
457        # Align time of loop start with the sample interval.
458        test_elapsed_time = self.elapsed_time(test_start_time)
459        time.sleep(self.syncup_time(test_elapsed_time, SAMPLE_INTERVAL))
460        test_elapsed_time = self.elapsed_time(test_start_time)
461
462        metric_start_time = time.time()
463        metric_prev_time = metric_start_time
464
465        metric_elapsed_prev_time = self.elapsed_time(metric_prev_time)
466        offset = self.modulo_time(metric_elapsed_prev_time, METRIC_INTERVAL)
467        metric_timer = metric_elapsed_prev_time + offset
468        while self.elapsed_time(test_start_time) <= TEST_DURATION:
469            if os.path.isfile(EXIT_FLAG_FILE):
470                logging.info('Exit flag file detected. Exiting test.')
471                break
472            self._record_perf_measurements(perf_values, timestamp_writer)
473
474            # Periodically calculate and record 90th percentile metrics.
475            metric_elapsed_prev_time = self.elapsed_time(metric_prev_time)
476            metric_timer = metric_elapsed_prev_time + offset
477            if metric_timer >= METRIC_INTERVAL:
478                self._record_90th_metrics(perf_values, perf_metrics)
479                perf_values = {'cpu': [], 'mem': [], 'temp': []}
480
481                # Set previous time to current time.
482                metric_prev_time = time.time()
483                metric_elapsed_prev_time = self.elapsed_time(metric_prev_time)
484
485                # Calculate offset based on the original start time.
486                metric_elapsed_time = self.elapsed_time(metric_start_time)
487                offset = self.modulo_time(metric_elapsed_time, METRIC_INTERVAL)
488
489                # Set the timer to time elapsed plus offset to next interval.
490                metric_timer = metric_elapsed_prev_time + offset
491
492            # Sync the loop time to the sample interval.
493            test_elapsed_time = self.elapsed_time(test_start_time)
494            time.sleep(self.syncup_time(test_elapsed_time, SAMPLE_INTERVAL))
495
496        # Close perf timestamp file.
497        timestamp_file.close()
498
499        # Open perf timestamp file to read, and aggregated file to append.
500        timestamp_file = open(timestamp_fpath, 'r')
501        aggregated_fname = (PERF_FILE_NAME_PREFIX + '_aggregated.csv')
502        aggregated_fpath = os.path.join(self.temp_dir, aggregated_fname)
503        aggregated_file = self._open_perf_file(aggregated_fpath)
504
505        # Append contents of perf timestamp file to perf aggregated file.
506        self._append_to_aggregated_file(timestamp_file, aggregated_file)
507        timestamp_file.close()
508        aggregated_file.close()
509
510        # Copy perf aggregated file to test results directory.
511        self._copy_aggregated_to_resultsdir(aggregated_fpath)
512
513        # Return median of each attribute performance metric.
514        return self._get_median_metrics(perf_metrics)
515
516    def run_once(self, kiosk_app_attributes=None):
517        if kiosk_app_attributes:
518            app_name, app_id, ext_page = (
519                kiosk_app_attributes.rstrip().split(':'))
520        self.subtest_name = app_name
521        self.board_name = utils.get_board()
522        self.hw_id = self._get_hwid()
523        self.chrome_version = self._get_chrome_version()[0]
524        self.chromeos_version = '0.' + utils.get_chromeos_release_version()
525        self.epoch_minutes = str(int(time.time() / 60))  # Minutes since 1970.
526        self.point_id = self._get_point_id(self.chromeos_version,
527                                           self.epoch_minutes)
528
529        kiosk_info = self._get_kiosk_app_info(app_id)
530        self.kiosk_app_name = kiosk_info['name']
531        self.kiosk_app_version = kiosk_info['version']
532        self.test_suite_name = self.tagged_testname
533        if self.subtest_name:
534            self.test_suite_name += '.' + self.subtest_name
535
536        # Delete exit flag file at start of test run.
537        if os.path.isfile(EXIT_FLAG_FILE):
538            os.remove(EXIT_FLAG_FILE)
539
540        # Run a single test cycle.
541        self.perf_results = {'cpu': '0', 'mem': '0', 'temp': '0'}
542        self.perf_results = self._run_test_cycle()
543
544        # Write results for AutoTest to pick up at end of test.
545        self._write_perf_keyvals(self.perf_results)
546        self._write_perf_results(self.perf_results)
547
548        # Post perf results directly to performance dashboard. You may view
549        # uploaded data at https://chromeperf.appspot.com/new_points,
550        # with test path pattern=ChromeOS_Enterprise/cros-*/longevity*/*
551        chart_data = self._read_perf_results()
552        data_obj = self._format_data_for_upload(chart_data)
553        self._send_to_dashboard(data_obj)
554
555    def cleanup(self):
556        """Delete aged perf data files and the exit flag file."""
557        cmd = ('find %s -name %s* -type f -mmin +%s -delete' %
558               (self.temp_dir, PERF_FILE_NAME_PREFIX, OLD_FILE_AGE))
559        os.system(cmd)
560        if os.path.isfile(EXIT_FLAG_FILE):
561            os.remove(EXIT_FLAG_FILE)
562