1# Copyright (c) 2009 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import logging
7import os
8import re
9import shutil
10import time
11import utils
12
13from autotest_lib.client.bin import test
14from autotest_lib.client.common_lib import error
15
16class platform_BootPerf(test.test):
17    """Test to gather recorded boot time statistics.
18
19    The primary function of this test is to gather a rather large
20    assortment of performance keyvals that capture timing and disk
21    usage statistics associated with the most recent boot or reboot.
22
23    The test calculates some or all of the following keyvals:
24      * seconds_kernel_to_startup
25      * seconds_kernel_to_startup_done
26      * seconds_kernel_to_x_started
27      * seconds_kernel_to_chrome_exec
28      * seconds_kernel_to_chrome_main
29      * seconds_kernel_to_signin_start
30      * seconds_kernel_to_signin_wait
31      * seconds_kernel_to_signin_users
32      * seconds_kernel_to_signin_seen
33      * seconds_kernel_to_login
34      * seconds_kernel_to_network
35      * rdbytes_kernel_to_startup
36      * rdbytes_kernel_to_startup_done
37      * rdbytes_kernel_to_x_started
38      * rdbytes_kernel_to_chrome_exec
39      * rdbytes_kernel_to_chrome_main
40      * rdbytes_kernel_to_login
41      * seconds_power_on_to_lf_start
42      * seconds_power_on_to_lf_end
43      * seconds_power_on_to_lk_start
44      * seconds_power_on_to_lk_end
45      * seconds_power_on_to_kernel
46      * seconds_shutdown_time
47      * seconds_reboot_time
48      * seconds_reboot_error
49      * mhz_primary_cpu
50
51    """
52
53    version = 2
54
55    # Names of keyvals, their associated bootstat events, 'needs_x' flag and
56    # 'Required' flag.
57    # Events that needs X are not checked in the freon world.
58    # Test fails if a required event is not found.
59    # Each event samples statistics measured since kernel startup
60    # at a specific moment on the boot critical path:
61    #   pre-startup - The start of the `chromeos_startup` script;
62    #     roughly, the time when /sbin/init emits the `startup`
63    #     Upstart event.
64    #   post-startup - Completion of the `chromeos_startup` script.
65    #   x-started - Completion of X server initialization.
66    #   chrome-exec - The moment when session_manager exec's the
67    #     first Chrome process.
68    #   chrome-main - The moment when the first Chrome process
69    #     begins executing in main().
70    #   kernel_to_signin_start - The moment when LoadPage(loginSceenURL)
71    #     is called, i.e. initialization starts.
72    #   kernel_to_signin_wait - The moment when UI thread has finished signin
73    #     screen initialization and now waits until JS sends "ready" event.
74    #   kernel_to_signin_users - The moment when UI thread receives "ready" from
75    #     JS code. So V8 is initialized and running, etc...
76    #   kernel_to_signin_seen - The moment when user can actually see signin UI.
77    #   boot-complete - Completion of boot after Chrome presents the
78    #     login screen.
79    _EVENT_KEYVALS = [
80        # N.B.  Keyval attribute names go into a database that
81        # truncates after 30 characters.  The key names below are
82        # prefixed with 8 characters, either 'seconds_' or
83        # 'rdbytes_', so we have 22 characters wiggle room.
84        #
85        # ----+----1----+----2--
86        ('kernel_to_startup',       'pre-startup',               False, True),
87        ('kernel_to_startup_done',  'post-startup',              False, True),
88        ('kernel_to_x_started',     'x-started',                 True,  True),
89        ('kernel_to_chrome_exec',   'chrome-exec',               False, True),
90        ('kernel_to_chrome_main',   'chrome-main',               False, True),
91        # These two events do not happen if device is in OOBE.
92        ('kernel_to_signin_start',  'login-start-signin-screen', False, False),
93        ('kernel_to_signin_wait',
94            'login-wait-for-signin-state-initialize',            False, False),
95        # This event doesn't happen if device has no users.
96        ('kernel_to_signin_users',  'login-send-user-list',      False, False),
97        ('kernel_to_signin_seen',   'login-prompt-visible',      False, True),
98        ('kernel_to_login',         'boot-complete',             False, True)
99    ]
100
101    _CPU_FREQ_FILE = ('/sys/devices/system/cpu/cpu0'
102                      '/cpufreq/cpuinfo_max_freq')
103
104    _UPTIME_PREFIX = 'uptime-'
105    _DISK_PREFIX = 'disk-'
106
107    _FIRMWARE_TIME_FILE = '/tmp/firmware-boot-time'
108
109    _BOOTSTAT_ARCHIVE_GLOB = '/var/log/metrics/shutdown.[0-9]*'
110    _UPTIME_FILE_GLOB = os.path.join('/tmp', _UPTIME_PREFIX + '*')
111    _DISK_FILE_GLOB = os.path.join('/tmp', _DISK_PREFIX + '*')
112
113    _RAMOOPS_FILE = "/dev/pstore/console-ramoops"
114
115
116    def _copy_timestamp_files(self):
117        """Copy raw data files to the test results."""
118        statlist = (glob.glob(self._UPTIME_FILE_GLOB) +
119                            glob.glob(self._DISK_FILE_GLOB))
120        for fname in statlist:
121            shutil.copy(fname, self.resultsdir)
122        try:
123            shutil.copy(self._FIRMWARE_TIME_FILE, self.resultsdir)
124        except Exception:
125            pass
126
127    def _copy_console_ramoops(self):
128        """Copy console_ramoops from previous reboot."""
129        # If reboot was misbehaving, looking at ramoops may provide clues.
130        try:
131            shutil.copy(self._RAMOOPS_FILE, self.resultsdir)
132        except Exception:
133            pass
134
135    def _parse_bootstat(self, filename, fieldnum):
136        """Read values from a bootstat event file.
137
138        Each line of a bootstat event file represents one occurrence
139        of the event.  Each line is a copy of the content of
140        /proc/uptime ("uptime-" files) or /sys/block/<dev>/stat
141        ("disk-" files), captured at the time of the occurrence.
142        For either kind of file, each line is a blank separated list
143        of fields.
144
145        The given event file can contain either uptime or disk data.
146        This function reads all lines (occurrences) in the event
147        file, and returns the value of the given field
148
149        @param filename         Filename of the bootstat event.
150        @param fieldnum         Which field of the file.
151        @return                 List of values of `fieldnum` for
152                                all occurrences in the file.
153        @raises error.TestFail  Raised if the event file is missing,
154                                unreadable, or malformed.
155
156        """
157        try:
158            with open(filename) as statfile:
159                values = map(lambda l: float(l.split()[fieldnum]),
160                             statfile.readlines())
161            return values
162        except IOError:
163            raise error.TestFail('Failed to read bootstat file "%s"' %
164                                 filename)
165
166
167    def _parse_uptime(self, eventname, bootstat_dir='/tmp', index=0):
168        """Return time since boot for a bootstat event.
169
170        @param eventname        Name of the bootstat event.
171        @param boostat_dir      Directory containing the bootstat
172                                files.
173        @param index            Index of which occurrence of the event
174                                to select.
175        @return                 Time since boot for the selected
176                                event.
177
178        """
179        event_file = os.path.join(bootstat_dir,
180                                  self._UPTIME_PREFIX) + eventname
181        return self._parse_bootstat(event_file, 0)[index]
182
183
184    def _parse_diskstat(self, eventname, bootstat_dir='/tmp', index=0):
185        """Return sectors read since boot for a bootstat event.
186
187        @param eventname        Name of the bootstat event.
188        @param boostat_dir      Directory containing the bootstat files.
189        @param index            Index of which occurrence of the event
190                                to select.
191        @return                 Number of sectors read since boot for
192                                the selected event.
193
194        """
195        event_file = os.path.join(bootstat_dir,
196                                  self._DISK_PREFIX) + eventname
197        return self._parse_bootstat(event_file, 2)[index]
198
199
200    def _gather_vboot_times(self, results):
201        """Read and report firmware internal timestamps.
202
203        The firmware for all Chrome platforms except Mario records
204        the ticks since power on at selected times during startup.
205        These timestamps can be extracted from the `crossystem`
206        command.
207
208        If the timestamps are available, convert the tick times to
209        seconds and record the following keyvals in `results`:
210          * seconds_power_on_to_lf_start
211          * seconds_power_on_to_lf_end
212          * seconds_power_on_to_lk_start
213          * seconds_power_on_to_lk_end
214
215        The frequency of the recorded tick timestamps is determined
216        by reading `_CPU_FREQ_FILE` and is recorded in the keyval
217        mhz_primary_cpu.
218
219        @param results  Keyvals dictionary.
220
221        """
222        try:
223            khz = int(utils.read_one_line(self._CPU_FREQ_FILE))
224        except IOError:
225            logging.info('Test is unable to read "%s", not calculating the '
226                         'vboot times.', self._CPU_FREQ_FILE)
227            return
228        hertz = khz * 1000.0
229        results['mhz_primary_cpu'] = khz / 1000.0
230        try:
231            out = utils.system_output('crossystem')
232        except error.CmdError:
233            logging.info('Unable to run crossystem, not calculating the vboot '
234                         'times.')
235            return
236        # Parse the crossystem output, we are looking for vdat_timers
237        items = out.splitlines()
238        for item in items:
239            times_re = re.compile(r'LF=(\d+),(\d+) LK=(\d+),(\d+)')
240            match = re.findall(times_re, item)
241            if match:
242                times = map(lambda s: round(float(s) / hertz, 2), match[0])
243                results['seconds_power_on_to_lf_start'] = times[0]
244                results['seconds_power_on_to_lf_end'] = times[1]
245                results['seconds_power_on_to_lk_start'] = times[2]
246                results['seconds_power_on_to_lk_end'] = times[3]
247
248
249    def _gather_firmware_boot_time(self, results):
250        """Read and report firmware startup time.
251
252        The boot process writes the firmware startup time to the
253        file named in `_FIRMWARE_TIME_FILE`.  Read the time from that
254        file, and record it in `results` as the keyval
255        seconds_power_on_to_kernel.
256
257        @param results  Keyvals dictionary.
258
259        """
260        try:
261            # If the firmware boot time is not available, the file
262            # will not exist.
263            data = utils.read_one_line(self._FIRMWARE_TIME_FILE)
264        except IOError:
265            return
266        firmware_time = float(data)
267        boot_time = results['seconds_kernel_to_login']
268        results['seconds_power_on_to_kernel'] = firmware_time
269        results['seconds_power_on_to_login'] = (
270                round(firmware_time + boot_time, 2))
271
272
273    def _gather_time_keyvals(self, results):
274        """Read and report boot time keyvals.
275
276        Read "seconds since kernel startup" from the bootstat files
277        for the events named in `_EVENT_KEYVALS`, and store the
278        values as perf keyvals.  The following keyvals are recorded:
279          * seconds_kernel_to_startup
280          * seconds_kernel_to_startup_done
281          * seconds_kernel_to_x_started
282          * seconds_kernel_to_chrome_exec
283          * seconds_kernel_to_chrome_main
284          * seconds_kernel_to_login
285          * seconds_kernel_to_network
286        All of these keyvals are considered mandatory, except
287        for seconds_kernel_to_network.
288
289        @param results          Keyvals dictionary.
290        @raises error.TestFail  Raised if any mandatory keyval can't
291                                be determined.
292
293        """
294        for keyval_name, event_name, needs_x, required in self._EVENT_KEYVALS:
295            if needs_x and utils.is_freon():
296                continue
297            key = 'seconds_' + keyval_name
298            try:
299                results[key] = self._parse_uptime(event_name)
300            except error.TestFail:
301                if required:
302                    raise;
303
304        # Not all 'uptime-network-*-ready' files necessarily exist;
305        # probably there's only one.  We go through a list of
306        # possibilities and pick the first one we find.  We're not
307        # looking for 3G here, so we're not guaranteed to find any
308        # file.
309        network_ready_events = [
310            'network-wifi-ready',
311            'network-ethernet-ready'
312        ]
313
314        for event_name in network_ready_events:
315            try:
316                network_time = self._parse_uptime(event_name)
317                results['seconds_kernel_to_network'] = network_time
318                break
319            except error.TestFail:
320                pass
321
322
323    def _gather_disk_keyvals(self, results):
324        """Read and report disk read keyvals.
325
326        Read "sectors read since kernel startup" from the bootstat
327        files for the events named in `_EVENT_KEYVALS`, convert the
328        values to "bytes read since boot", and store the values as
329        perf keyvals.  The following keyvals are recorded:
330          * rdbytes_kernel_to_startup
331          * rdbytes_kernel_to_startup_done
332          * rdbytes_kernel_to_x_started
333          * rdbytes_kernel_to_chrome_exec
334          * rdbytes_kernel_to_chrome_main
335          * rdbytes_kernel_to_login
336
337        Disk statistics are reported in units of 512 byte sectors;
338        we convert the keyvals to bytes so that downstream consumers
339        don't have to ask "How big is a sector?".
340
341        @param results  Keyvals dictionary.
342
343        """
344        # We expect an error when reading disk statistics for the
345        # "chrome-main" event because Chrome (not bootstat) generates
346        # that event, and it doesn't include the disk statistics.
347        # We get around that by ignoring all errors.
348        for keyval_name, event_name, needs_x, required in self._EVENT_KEYVALS:
349            if needs_x and utils.is_freon():
350                continue
351            try:
352                key = 'rdbytes_' + keyval_name
353                results[key] = 512 * self._parse_diskstat(event_name)
354            except Exception:
355                pass
356
357
358    def _calculate_timeval(self, event, t0, t1, t_uptime):
359        """Estimate the absolute time of a time since boot.
360
361        Input values `event` and `t_uptime` are times measured as
362        seconds since boot (for the same boot event, as from
363        /proc/uptime).  The input values `t0` and `t1` are two
364        values measured as seconds since the epoch.  The three "t"
365        values were sampled in the order `t0`, `t_uptime`, `t1`.
366
367        Estimate the time of `event` measured as seconds since the
368        epoch.  Also estimate the worst-case error based on the time
369        elapsed between `t0` and `t1`.
370
371        All values are floats.  The precision of `event` and
372        `t_uptime` is expected to be kernel jiffies (i.e. one
373        centisecond).  The output result is rounded to the nearest
374        jiffy.
375
376        @param event    A time to be converted from "seconds since
377                        boot" into "seconds since the epoch".
378        @param t0       A reference time earlier than time `t1`, and
379                        measured as "seconds since the epoch".
380        @param t1       A reference time later than time `t0`, and
381                        measured as "seconds since the epoch".
382        @param t_uptime A reference time measured as "seconds since
383                        boot", in between time `t0` and `t1`.
384
385        @return         An estimate of the time of `event` measured
386                        as seconds since the epoch, rounded to the
387                        nearest jiffy.
388
389        """
390        # Floating point geeks may argue that these calculations
391        # don't guarantee the promised precision:  I don't care;
392        # it's good enough.
393        boot_timeval = round((t0 + t1) / 2, 2) - t_uptime
394        error = (t1 - t0) / 2
395        return boot_timeval + event, error
396
397
398    def _gather_reboot_keyvals(self, results):
399        """Read and report shutdown and reboot times.
400
401        The shutdown process saves all bootstat files in /var/log,
402        plus it saves a timestamp file that can be used to convert
403        "time since boot" into times in UTC.  Read the saved files
404        from the most recent shutdown, and use them to calculate
405        the time spent from the start of that shutdown until the
406        completion of the most recent boot.  Record these keyvals:
407          * seconds_shutdown_time
408          * seconds_reboot_time
409          * seconds_reboot_error
410
411        @param results  Keyvals dictionary.
412
413        """
414        bootstat_archives = glob.glob(self._BOOTSTAT_ARCHIVE_GLOB)
415        if not bootstat_archives:
416            return
417        bootstat_dir = max(bootstat_archives)
418        boot_id = open("/proc/sys/kernel/random/boot_id", "r").read()
419        didrun_path = os.path.join(bootstat_dir, "bootperf_ran")
420        if not os.path.exists(didrun_path):
421            with open(didrun_path, "w") as didrun:
422                didrun.write(boot_id)
423        elif open(didrun_path, "r").read() != boot_id:
424            logging.warning("Ignoring reboot based on stale shutdown %s",
425                         os.path.basename(bootstat_dir))
426            return
427        timestamp_path = os.path.join(bootstat_dir, 'timestamp')
428        try:
429            with open(timestamp_path) as timestamp:
430                archive_t0 = float(timestamp.readline())
431                archive_t1 = float(timestamp.readline())
432        except IOError:
433            raise error.TestFail('Failed to read "%s"' % timestamp_path)
434        archive_uptime = self._parse_uptime('archive',
435                                            bootstat_dir=bootstat_dir)
436        shutdown_uptime = self._parse_uptime('ui-post-stop',
437                                             bootstat_dir=bootstat_dir,
438                                             index=-1)
439        shutdown_timeval, shutdown_error = self._calculate_timeval(
440                shutdown_uptime, archive_t0, archive_t1, archive_uptime)
441        boot_t0 = time.time()
442        with open('/proc/uptime') as uptime_file:
443            uptime = float(uptime_file.readline().split()[0])
444        boot_t1 = time.time()
445        boot_timeval, boot_error = self._calculate_timeval(
446                results['seconds_kernel_to_login'],
447                boot_t0, boot_t1, uptime)
448        reboot_time = round(boot_timeval - shutdown_timeval, 2)
449        poweron_time = results['seconds_power_on_to_login']
450        shutdown_time = round(reboot_time - poweron_time, 2)
451        results['seconds_reboot_time'] = reboot_time
452        results['seconds_reboot_error'] = shutdown_error + boot_error
453        results['seconds_shutdown_time'] = shutdown_time
454
455
456    def run_once(self):
457        """Gather boot time statistics.
458
459        Every shutdown and boot creates `bootstat` files with
460        summary statistics for time elapsed and disk usage.  Gather
461        the values reported for shutdown, boot time and network
462        startup time, and record them as perf keyvals.
463
464        Additionally, gather information about firmware startup time
465        from various sources, and record the times as perf keyvals.
466
467        Finally, copy the raw data files to the results directory
468        for reference.
469
470        """
471        # `results` is the keyvals dictionary
472        results = {}
473
474        self._gather_time_keyvals(results)
475        self._gather_disk_keyvals(results)
476        self._gather_firmware_boot_time(results)
477        self._gather_vboot_times(results)
478        self._gather_reboot_keyvals(results)
479
480        self._copy_timestamp_files()
481        self._copy_console_ramoops()
482
483        self.write_perf_keyval(results)
484