1#    Copyright 2015-2016 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""
17The SchedMatrix provides an ability to compare two executions
18of benchmarks with multiple processes.
19
20For example, consider a benchmark that spawns 4 identical threads
21and any two threads should exhibit a certain behaviours and the
22remaining another identical but different behaviour.
23
24SchedMatrix creates a Matrix of Scheduler Waveform Correlations
25
26A = Reference Execution
27B = Execution to be Evaluated
28
29.. code::
30
31               +---+  +---+
32               |   |  |   |
33    A1, B3 +---+   +--+   +--------------+
34                          +---+  +---+
35                          |   |  |   |
36    A2, B4 +--------------+   +--+   +---+
37               +---+  +---+
38               |   |  |   |
39    A3, B1 +---+   +--+   +--------------+
40                          +---+  +---+
41                          |   |  |   |
42    A4, B2 +--------------+   +--+   +---+
43
44
45**Correlation Matrix**
46
47    === ==== ==== ==== ====
48         B1   B2   B3   B4
49    === ==== ==== ==== ====
50    A1   1    0    1    0
51    A2   0    1    0    1
52    A3   1    0    1    0
53    A4   0    1    0    1
54    === ==== ==== ==== ====
55
56
57Thus a success criteria can be defined as A1 having two similar threads in the
58evaluated execution
59::
60
61    assertSiblings(A1, 2, operator.eq)
62    assertSiblings(A2, 2, operator.eq)
63    assertSiblings(A3, 2, operator.eq)
64    assertSiblings(A4, 2, operator.eq)
65"""
66
67
68import sys
69import trappy
70import numpy as np
71from trappy.stats.Aggregator import MultiTriggerAggregator
72from trappy.stats.Correlator import Correlator
73from bart.sched import functions as sched_funcs
74from bart.common import Utils
75
76POSITIVE_TOLERANCE = 0.80
77
78# pylint: disable=invalid-name
79# pylint: disable=too-many-arguments
80
81
82class SchedMatrix(object):
83
84    """
85    :param reference_trace: The trace file path/ftrace object
86        to be used as a reference
87    :type reference_trace: str, :mod:`trappy.ftrace.FTrace`
88
89    :param trace: The trace file path/ftrace object
90        to be verified
91    :type trace: str, :mod:`trappy.ftrace.FTrace`
92
93    :param topology: A topology that describes the arrangement of
94        CPU's on a system. This is useful for multi-cluster systems
95        where data needs to be aggregated at different topological
96        levels
97    :type topology: :mod:`trappy.stats.Topology.Topology`
98
99    :param execnames: The execnames of the task to be analysed
100
101        A single execname or a list of execnames can be passed.
102        There can be multiple processes associated with a single
103        execname parameter. The execnames are searched using a prefix
104        match.
105    :type execname: list, str
106
107    Consider the following processes which need to be analysed:
108
109    * **Reference Trace**
110
111            ===== ==============
112             PID    execname
113            ===== ==============
114             11     task_1
115             22     task_2
116             33     task_3
117            ===== ==============
118
119    * **Trace to be verified**
120
121            ===== ==============
122             PID    execname
123            ===== ==============
124             77     task_1
125             88     task_2
126             99     task_3
127            ===== ==============
128
129
130    A :mod:`bart.sched.SchedMatrix.SchedMatrix` instance be created
131    following different ways:
132
133        - Using execname prefix match
134          ::
135
136            SchedMatrix(r_trace, trace, topology,
137                        execnames="task_")
138
139        - Individual Task names
140          ::
141
142            SchedMatrix(r_trace, trace, topology,
143                        execnames=["task_1", "task_2", "task_3"])
144
145    """
146
147    def __init__(
148            self,
149            reference_trace,
150            trace,
151            topology,
152            execnames,
153            aggfunc=sched_funcs.csum):
154
155        run = Utils.init_ftrace(trace)
156        reference_run = Utils.init_ftrace(reference_trace)
157
158        self._execnames = Utils.listify(execnames)
159        self._reference_pids = self._populate_pids(reference_run)
160        self._pids = self._populate_pids(run)
161        self._dimension = len(self._pids)
162        self._topology = topology
163        self._matrix = self._generate_matrix(run, reference_run, aggfunc)
164
165        if len(self._pids) != len(self._reference_pids):
166            raise RuntimeError(
167                "The runs do not have the same number of PIDs for {0}".format(
168                    str(execnames)))
169
170    def _populate_pids(self, run):
171        """Populate the qualifying PIDs from the run"""
172
173        if len(self._execnames) == 1:
174            return sched_funcs.get_pids_for_process(run, self._execnames[0])
175
176        pids = []
177
178        for proc in self._execnames:
179            pids += sched_funcs.get_pids_for_process(run, proc)
180
181        return list(set(pids))
182
183    def _generate_matrix(self, run, reference_run, aggfunc):
184        """Generate the Correlation Matrix"""
185
186        reference_aggs = []
187        aggs = []
188
189        for idx in range(self._dimension):
190
191            reference_aggs.append(
192                MultiTriggerAggregator(
193                    sched_funcs.sched_triggers(
194                        reference_run,
195                        self._reference_pids[idx],
196                        trappy.sched.SchedSwitch
197                        ),
198                    self._topology,
199                    aggfunc))
200
201            aggs.append(
202                MultiTriggerAggregator(
203                    sched_funcs.sched_triggers(
204                        run,
205                        self._pids[idx],
206                        trappy.sched.SchedSwitch
207                        ),
208                    self._topology,
209                    aggfunc))
210
211        agg_pair_gen = ((r_agg, agg)
212                        for r_agg in reference_aggs for agg in aggs)
213
214        # pylint fails to recognize numpy members.
215        # pylint: disable=no-member
216        matrix = np.zeros((self._dimension, self._dimension))
217        # pylint: enable=no-member
218
219        for (ref_result, test_result) in agg_pair_gen:
220            i = reference_aggs.index(ref_result)
221            j = aggs.index(test_result)
222            corr = Correlator(
223                ref_result,
224                test_result,
225                corrfunc=sched_funcs.binary_correlate,
226                filter_gaps=True)
227            _, total = corr.correlate(level="cluster")
228
229            matrix[i][j] = total
230
231        return matrix
232
233    def print_matrix(self):
234        """Print the correlation matrix"""
235
236        # pylint fails to recognize numpy members.
237        # pylint: disable=no-member
238        np.set_printoptions(precision=5)
239        np.set_printoptions(suppress=False)
240        np.savetxt(sys.stdout, self._matrix, "%5.5f")
241        # pylint: enable=no-member
242
243    def getSiblings(self, pid, tolerance=POSITIVE_TOLERANCE):
244        """Return the number of processes in the
245        reference trace that have a correlation
246        greater than tolerance
247
248        :param pid: The PID of the process in the reference
249            trace
250        :type pid: int
251
252        :param tolerance: A correlation value > tolerance
253            will classify the resultant process as a sibling
254        :type tolerance: float
255
256        .. seealso:: :mod:`bart.sched.SchedMatrix.SchedMatrix.assertSiblings`
257        """
258
259        ref_pid_idx = self._reference_pids.index(pid)
260        pid_result = self._matrix[ref_pid_idx]
261        return len(pid_result[pid_result > tolerance])
262
263    def assertSiblings(self, pid, expected_value, operator,
264                       tolerance=POSITIVE_TOLERANCE):
265        """Assert that the number of siblings in the reference
266        trace match the expected value and the operator
267
268        :param pid: The PID of the process in the reference
269            trace
270        :type pid: int
271
272        :param operator: A binary operator function that returns
273            a boolean. For example:
274            ::
275
276                import operator
277                op = operator.eq
278                getSiblings(pid, expected_value, op)
279
280            Will do the following check:
281            ::
282
283                getSiblings(pid) == expected_value
284
285        :param tolerance: A correlation value > tolerance
286            will classify the resultant process as a sibling
287        :type tolerance: float
288
289        .. seealso:: :mod:`bart.sched.SchedMatrix.SchedMatrix.getSiblings`
290        """
291        num_siblings = self.getSiblings(pid, tolerance)
292        return operator(num_siblings, expected_value)
293