Constraint.py revision 435457c8af9d69383ba45e0bd7da022d967a8dea
1#    Copyright 2015-2015 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""This module provides the Constraint class for handling
17filters and pivots in a modular fashion. This enable easy
18constrain application
19
20What is a Constraint?
211. It is collection of data based on two rules:
22    a. A Pivot
23    b. A Set of Filters
24
25For Example:
26    for a dataframe
27
28    Time    CPU       Latency
29    1       x           <val>
30    2       y           <val>
31    3       z           <val>
32    4       a           <val>
33
34The resultant data will be for each unique pivot value with the filters applied
35
36result["x"] = pd.Series.filtered()
37result["y"] = pd.Series.filtered()
38result["z"] = pd.Series.filtered()
39result["a"] = pd.Series.filtered()
40
41"""
42# pylint: disable=R0913
43from trappy.plotter.Utils import decolonize, listify, normalize_list
44from trappy.plotter import AttrConf
45
46
47class Constraint(object):
48
49    """The constructor takes a filter and a pivot object,
50       The apply method takes a trappy Run object and a column
51       and applies the constraint on input object
52    """
53
54    def __init__(
55            self, trappy_run, pivot, column, template, run_index, filters):
56        self._trappy_run = trappy_run
57        self._filters = filters
58        self._pivot = pivot
59        self._column = column
60        self._template = template
61        self._dup_resolved = False
62        self._data = self.populate_data_frame()
63
64        try:
65            self.result = self._apply()
66        except ValueError:
67            if not self._dup_resolved:
68                self._handle_duplicate_index()
69                try:
70                    self.result = self._apply()
71                except:
72                    raise ValueError("Unable to handle duplicates")
73
74        self.run_index = run_index
75
76    def _apply(self):
77        """This method applies the filter on the resultant data
78           on the input column.
79           Do we need pivot_val?
80        """
81        data = self._data
82        result = {}
83
84        try:
85            values = data[self._column]
86        except KeyError:
87            return result
88
89        if self._pivot == AttrConf.PIVOT:
90            criterion = values.map(lambda x: True)
91            for key in self._filters.keys():
92                if key in data.columns:
93                    criterion = criterion & data[key].map(
94                        lambda x: x in self._filters[key])
95                    values = values[criterion]
96            result[AttrConf.PIVOT_VAL] = values
97            return result
98
99        pivot_vals = self.pivot_vals(data)
100
101        for pivot_val in pivot_vals:
102            criterion = values.map(lambda x: True)
103
104            for key in self._filters.keys():
105                if key != self._pivot and key in data.columns:
106                    criterion = criterion & data[key].map(
107                        lambda x: x in self._filters[key])
108                    values = values[criterion]
109
110            val_series = values[data[self._pivot] == pivot_val]
111            if len(val_series) != 0:
112               result[pivot_val] = val_series
113
114        return result
115
116    def _handle_duplicate_index(self):
117        """Handle duplicate values in index"""
118        data = self._data
119        self._dup_resolved = True
120        index = data.index
121        new_index = index.values
122
123        dups = index.get_duplicates()
124        for dup in dups:
125            # Leave one of the values intact
126            dup_index_left = index.searchsorted(dup, side="left")
127            dup_index_right = index.searchsorted(dup, side="right") - 1
128            num_dups = dup_index_right - dup_index_left + 1
129            delta = (index[dup_index_right + 1] - dup) / num_dups
130
131            if delta > AttrConf.DUPLICATE_VALUE_MAX_DELTA:
132                delta = AttrConf.DUPLICATE_VALUE_MAX_DELTA
133
134            # Add a delta to the others
135            dup_index_left += 1
136            while dup_index_left <= dup_index_right:
137                new_index[dup_index_left] += delta
138                delta += delta
139                dup_index_left += 1
140        self._data = self._data.reindex(new_index)
141
142    def _uses_trappy_run(self):
143        if not self._template:
144            return False
145        else:
146            return True
147
148    def populate_data_frame(self):
149        """Return the data frame"""
150        if not self._uses_trappy_run():
151            return self._trappy_run
152
153        data_container = getattr(
154            self._trappy_run,
155            decolonize(self._template.name))
156        return data_container.data_frame
157
158    def pivot_vals(self, data):
159        """This method returns the unique pivot values for the
160           Constraint's pivot and the column
161        """
162        if self._pivot == AttrConf.PIVOT:
163            return AttrConf.PIVOT_VAL
164
165        if self._pivot not in data.columns:
166            return []
167
168        pivot_vals = set(data[self._pivot])
169        if self._pivot in self._filters:
170            pivot_vals = pivot_vals & set(self._filters[self._pivot])
171
172        return list(pivot_vals)
173
174    def __str__(self):
175
176        name = self.get_data_name()
177
178        if not self._uses_trappy_run():
179            return name + ":" + self._column
180
181        return name + ":" + \
182            self._template.name + ":" + self._column
183
184
185    def get_data_name(self):
186        """Get name for the data Member"""
187        if self._uses_trappy_run():
188            if self._trappy_run.name != "":
189                return self._trappy_run.name
190            else:
191                return "Run {}".format(self.run_index)
192        else:
193            return "DataFrame {}".format(self.run_index)
194
195class ConstraintManager(object):
196
197    """A class responsible for converting inputs
198    to constraints and also ensuring sanity
199    """
200
201    def __init__(self, runs, columns, templates, pivot, filters,
202                 zip_constraints=True):
203
204        self._ip_vec = []
205        self._ip_vec.append(listify(runs))
206        self._ip_vec.append(listify(columns))
207        self._ip_vec.append(listify(templates))
208
209        self._lens = map(len, self._ip_vec)
210        self._max_len = max(self._lens)
211        self._pivot = pivot
212        self._filters = filters
213        self._constraints = []
214
215        self._run_expanded = False
216        self._expand()
217        if zip_constraints:
218            self._populate_zip_constraints()
219        else:
220            self._populate_constraints()
221
222    def _expand(self):
223        """This is really important. We need to
224           meet the following criteria for constraint
225           expansion:
226
227           Len[runs] == Len[columns] == Len[templates]
228                            OR
229           Permute(
230               Len[runs] = 1
231               Len[columns] = 1
232               Len[templates] != 1
233            }
234
235
236           Permute(
237               Len[runs] = 1
238               Len[columns] != 1
239               Len[templates] != 1
240            )
241
242        """
243        min_len = min(self._lens)
244        max_pos_comp = [
245            i for i,
246            j in enumerate(
247                self._lens) if j != self._max_len]
248
249        if self._max_len == 1 and min_len != 1:
250            raise RuntimeError("Essential Arg Missing")
251
252        if self._max_len > 1:
253
254            # Are they all equal?
255            if len(set(self._lens)) == 1:
256                return
257
258            if min_len > 1:
259                raise RuntimeError("Cannot Expand a list of Constraints")
260
261            for val in max_pos_comp:
262                if val == 0:
263                    self._run_expanded = True
264                self._ip_vec[val] = normalize_list(self._max_len,
265                                                   self._ip_vec[val])
266
267    def _populate_constraints(self):
268        """Populate the constraints creating one for each column in each run
269
270        In a multirun, multicolumn scenario, create constraints for
271        all the columns in each of the runs.  _populate_constraints()
272        creates one constraint for the first run and first column, the
273        next for the second run and second column,...  This function
274        creates a constraint for every combination of runs and columns
275        possible.
276        """
277
278        for run_idx, run in enumerate(self._ip_vec[0]):
279            for col in self._ip_vec[1]:
280                template = self._ip_vec[2][run_idx]
281                constraint = Constraint(run, self._pivot, col, template,
282                                        run_idx, self._filters)
283                self._constraints.append(constraint)
284
285    def get_column_index(self, constraint):
286        return self._ip_vec[1].index(constraint._column)
287
288    def _populate_zip_constraints(self):
289        """Populate the expanded constraints
290
291        In a multirun, multicolumn scenario, create constraints for
292        the first run and the first column, second run and second
293        column,... that is, as if you run zip(runs, columns)
294
295        """
296
297        for idx in range(self._max_len):
298            if self._run_expanded:
299                run_idx = 0
300            else:
301                run_idx = idx
302
303            run = self._ip_vec[0][idx]
304            col = self._ip_vec[1][idx]
305            template = self._ip_vec[2][idx]
306            self._constraints.append(
307                Constraint(
308                    run,
309                    self._pivot,
310                    col,
311                    template,
312                    run_idx,
313                    self._filters))
314
315    def generate_pivots(self, permute=False):
316        """Return a union of the pivot values"""
317        pivot_vals = []
318        for constraint in self._constraints:
319            pivot_vals += constraint.result.keys()
320
321        p_list = list(set(pivot_vals))
322        runs = range(self._lens[0])
323
324        try:
325            sorted_plist = sorted(p_list, key=int)
326        except ValueError, TypeError:
327            try:
328                sorted_plist = sorted(p_list, key=lambda x: int(x, 16))
329            except ValueError, TypeError:
330                sorted_plist = sorted(p_list)
331
332        if permute:
333            pivot_gen = ((run_idx, pivot) for run_idx in runs for pivot in sorted_plist)
334            return pivot_gen, len(sorted_plist) * self._lens[0]
335        else:
336            return sorted_plist, len(sorted_plist)
337
338    def constraint_labels(self):
339        """Get the Str representation of the constraints"""
340        return map(str, self._constraints)
341
342    def __len__(self):
343        return len(self._constraints)
344
345    def __iter__(self):
346        return iter(self._constraints)
347