1# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import os
6
7from autotest_lib.client.bin import test
8from autotest_lib.client.bin import utils
9from autotest_lib.client.common_lib import error
10
11import numpy
12
13import perf_lbr_verification
14import perf_verification
15import stats_utils
16
17
18INTEL_LBR_UARCHS = (
19    # 'Broadwell',  # Waiting on kernel support.
20    'Haswell',
21    'IvyBridge',
22    'SandyBridge')
23
24
25class hardware_PerfCounterVerification(test.test):
26    """Verify perf counters count what we think they count.
27
28    For cycles and instructions, we expect a strong correlation between
29    the number of iterations of a "noploop" program and the number of
30    cycles and instructions. For TLB misses, we expect a strong correlation
31    between number of misses and number of iterations of a matching benchmark
32    Each loop iteration should retire a constant number of additional
33    instructions, and should take a nearly constant number of additional
34    cycles or misses.
35    """
36
37    version = 1
38    preserve_srcdir = True
39
40    def initialize(self, perf_cmd='stat', events=('cycles', 'instructions')):
41        self.job.require_gcc()
42        self.perf_cmd = perf_cmd
43        self.events = events
44
45    def setup(self):
46        os.chdir(self.srcdir)
47        utils.make('clean')
48        utils.make()
49
50    def warmup(self):
51        if self.perf_cmd == 'record -b':
52            uarch = utils.get_intel_cpu_uarch()
53            if uarch not in INTEL_LBR_UARCHS:
54                raise error.TestNAError('Unsupported microarchitecture.')
55        unsupported_boards = ['gizmo']
56        board = utils.get_board()
57        if board in unsupported_boards:
58            raise error.TestNAError('Unsupported board')
59
60    def run_once(self, program, multiplier, **kwargs):
61        program = os.path.join(self.srcdir, program)
62        if self.perf_cmd == 'stat':
63            self.facts = perf_verification.GatherPerfStats(
64                    program, ','.join(self.events), multiplier)
65        elif self.perf_cmd == 'record -b':
66            branch = perf_lbr_verification.ReadBranchAddressesFile(
67                    os.path.join(self.srcdir, 'noploop_branch.txt'))
68            self.facts = perf_lbr_verification.GatherPerfBranchSamples(
69                    program, branch, ','.join(self.events),
70                    10000)
71        else:
72            raise error.TestError('Unrecognized perf_cmd')
73
74
75    def postprocess_iteration(self):
76        if self.perf_cmd == 'stat':
77            dt = numpy.dtype([('loops', numpy.int)] +
78                             [(e, numpy.int) for e in self.events])
79        elif self.perf_cmd == 'record -b':
80            dt = numpy.dtype([('loops', numpy.int),
81                              ('branch_count', numpy.int)])
82        arr = stats_utils.FactsToNumpyArray(self.facts, dt)
83        results = {}
84        is_tlb_benchmark = ('iTLB-misses' in dt.names or
85                            'dTLB-misses' in dt.names)
86        for y_var in dt.names:
87            if y_var == 'loops': continue
88            if y_var == 'cycles' and is_tlb_benchmark: continue
89            (slope, intercept), r2 = stats_utils.LinearRegression(
90                    arr['loops'], arr[y_var])
91            prefix = y_var + '_'
92            results[prefix+'slope'] = slope
93            results[prefix+'intercept'] = intercept
94            results[prefix+'r_squared'] = r2
95            if y_var in ('dTLB-misses', 'iTLB-misses'):
96                misses_per_milion_cycles = [x[y_var] * 1.0e6 / x['cycles']
97                                            for x in self.facts]
98                rvar = prefix+'misses_per_milion_cycles'
99                results[rvar] = numpy.max(misses_per_milion_cycles)
100
101        # Output the standard Autotest way:
102        self.write_perf_keyval(results)
103        # ... And the CrOS-specific way:
104        for k, v in results.iteritems():
105          self.output_perf_value(k, v)
106
107        if ('cycles' in self.events and not is_tlb_benchmark and
108            results['cycles_r_squared'] < 0.996):
109            raise error.TestFail('Poor correlation for cycles ~ loops')
110        if ('instructions' in self.events and
111            results['instructions_r_squared'] < 0.999):
112            raise error.TestFail('Poor correlation for instructions ~ loops')
113        if ('iTLB-misses' in self.events and
114            results['iTLB-misses_r_squared'] < 0.999):
115            raise error.TestFail('Poor correlation for iTLB-misses ~ loops')
116        if ('dTLB-misses' in self.events and
117            results['dTLB-misses_r_squared'] < 0.999):
118            raise error.TestFail('Poor correlation for dTLB-misses ~ loops')
119        if (self.perf_cmd == 'record -b' and
120            results['branch_count_r_squared'] < 0.9999999):
121            raise error.TestFail('Poor correlation for branch_count ~ loops')
122