1#    Copyright 2015-2017 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""Grammar module allows the user to easily define relations
17between data events and perform basic logical and arithmetic
18operations on the data. The parser also handles super-indexing
19and variable forwarding.
20"""
21from pyparsing import Literal, delimitedList, Optional, oneOf, nums,\
22    alphas, alphanums, Forward, Word, opAssoc, operatorPrecedence, Combine, Group
23import importlib
24import pandas as pd
25import types
26import numpy as np
27from trappy.stats.Topology import Topology
28from trappy.stats import StatConf
29from trappy.utils import handle_duplicate_index, listify
30
31
32def parse_num(tokens):
33    """Parser function for numerical data
34
35    :param tokens: The grammar tokens
36    :type tokens: list
37    """
38    return float(tokens[0])
39
40# Suppressed Literals
41LPAREN = Literal("(").suppress()
42RPAREN = Literal(")").suppress()
43COLON = Literal(":").suppress()
44EXP_START = Literal("[").suppress()
45EXP_END = Literal("]").suppress()
46
47# Grammar Tokens
48
49# DataFrame Accessor
50INTEGER = Combine(Optional(oneOf("+ -")) + Word(nums))\
51    .setParseAction(parse_num)
52REAL = Combine(Optional(oneOf("+ -")) + Word(nums) + "." +
53               Optional(Word(nums)) +
54               Optional(oneOf("e E") + Optional(oneOf("+ -")) + Word(nums)))\
55    .setParseAction(parse_num)
56
57# Generic Identifier
58IDENTIFIER = Word(alphas + '_', alphanums + '_')
59# Python Like Function Name
60FUNC_NAME = delimitedList(IDENTIFIER, delim=".", combine=True)
61# Exponentiation operators
62EXPONENTIATION_OPS = "**"
63# Unary Operators
64UNARY_OPS = oneOf("+ -")
65# Multiplication/Division Operators
66MULT_OPS = oneOf("* / // %")
67# Addition/Subtraction Operators
68SUM_OPS = oneOf("+ -")
69# Relational Operators
70REL_OPS = oneOf("> < >= <= == !=")
71# Logical Operators
72LOGICAL_OPS = oneOf("&& || & |")
73
74# Operator to function mapping
75OPERATOR_MAP = {
76    "+": lambda a, b: a + b,
77    "-": lambda a, b: a - b,
78    "*": lambda a, b: a * b,
79    "/": lambda a, b: a / b,
80    "//": lambda a, b: a // b,
81    "%": lambda a, b: a % b,
82    "**": lambda a, b: a ** b,
83    ">": lambda a, b: a > b,
84    "<": lambda a, b: a < b,
85    ">=": lambda a, b: a >= b,
86    "<=": lambda a, b: a <= b,
87    "||": lambda a, b: a or b,
88    "&&": lambda a, b: a and b,
89    "|": lambda a, b: a | b,
90    "==": lambda a, b: a == b,
91    "!=": lambda a, b: a != b,
92    "&": lambda a, b: a & b
93}
94
95
96def eval_unary_op(tokens):
97    """Unary Op Evaluation
98
99    :param tokens: The grammar tokens
100    :type tokens: list
101    """
102
103    params = tokens[0]
104    if params[0] == "-":
105        return -1 * params[1]
106    else:
107        return params[1]
108
109
110def iterate_binary_ops(tokens):
111    """An iterator for Binary Operation tokens
112
113    :param tokens: The grammar tokens
114    :type tokens: list
115    """
116
117    itr = iter(tokens)
118    while True:
119        try:
120            yield(itr.next(), itr.next())
121        except StopIteration:
122            break
123
124
125def eval_binary_op(tokens):
126    """Evaluate Binary operators
127
128    :param tokens: The grammar tokens
129    :type tokens: list
130    """
131
132    params = tokens[0]
133    result = params[0]
134
135    for opr, val in iterate_binary_ops(params[1:]):
136        result = OPERATOR_MAP[opr](result, val)
137
138    return result
139
140
141def str_to_attr(cls_str):
142    """Bring the attr specified into current scope
143       and return a handler
144
145    :param cls_str: A string representing the class
146    :type cls_str: str
147
148    :return: A class object
149    """
150    attr_name = cls_str.rsplit(".", 1)
151    if len(attr_name) == 2:
152        module_name, attr_name = attr_name
153        mod = importlib.import_module(module_name)
154        return getattr(mod, attr_name)
155    else:
156        attr_name = attr_name[0]
157        return globals()[attr_name]
158
159
160def get_parse_expression(parse_func, parse_var_id):
161    """return a parse expression with for the
162    input parseActions
163    """
164
165    var_id = Group(
166        FUNC_NAME + COLON + IDENTIFIER) | REAL | INTEGER | IDENTIFIER
167    var_id.setParseAction(parse_var_id)
168
169    # Forward declaration for an Arithmetic Expression
170    arith_expr = Forward()
171    func_call = Group(
172        FUNC_NAME +
173        LPAREN +
174        Optional(
175            Group(
176                delimitedList(arith_expr))) +
177        RPAREN)
178    # An Arithmetic expression can have a var_id or
179    # a function call as an operand
180    # pylint: disable=expression-not-assigned
181    arith_expr << operatorPrecedence(func_call | var_id,
182                                     [
183                                         (EXPONENTIATION_OPS, 2, opAssoc.LEFT,
184                                          eval_binary_op),
185                                         (UNARY_OPS, 1,
186                                          opAssoc.RIGHT, eval_unary_op),
187                                         (MULT_OPS, 2, opAssoc.LEFT,
188                                          eval_binary_op),
189                                         (SUM_OPS, 2, opAssoc.LEFT,
190                                          eval_binary_op),
191                                         (REL_OPS, 2, opAssoc.LEFT,
192                                          eval_binary_op),
193                                         (LOGICAL_OPS, 2,
194                                          opAssoc.LEFT, eval_binary_op)
195                                     ])
196
197    # pylint: enable=expression-not-assigned
198    # Argument expression for a function call
199    # An argument to a function can be an
200    # IDENTIFIER, Arithmetic expression, REAL number, INTEGER or a
201    # Function call itself
202    func_call.setParseAction(parse_func)
203    return arith_expr
204
205
206class Parser(object):
207
208    """A parser class for solving simple
209    data accesses and super-indexing data
210
211    :param data: Trace Object
212    :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child
213        class (like :mod:`trappy.ftrace.FTrace`)
214
215    :param pvars: A dictionary of variables that need to be
216        accessed from within the grammar
217    :type pvars: dict
218
219    :param method: The method to be used for reindexing data
220        This can be one of the standas :mod:`pandas.DataFrame`
221        methods (eg. pad, bfill, nearest). The default is pad
222        or use the last valid observation.
223    :type method: str
224
225    :param limit: The number of indices a value will be propagated
226        when reindexing. The default is None
227    :type limit: int
228
229    :param fill: Whether to fill the NaNs in the data.
230        The default value is True.
231    :type fill: bool
232
233    :param window: A window of time in which to apply the data
234        accesses.  By default the data accesses happen accross the
235        whole trace.  With the window parameter you can limit it to a
236        window of time inside the trace.  The first element of the
237        tuple is the starting time and the second the ending time (set
238        to None for end of trace).
239
240    :type window: tuple
241
242    :param filters: Restrict the parsing to the rows that match the
243        specified criteria. For Example:
244        ::
245
246            filters =
247                    {
248                        "pid": 3338,
249                        "cpu": [0, 2, 4],
250                    }
251
252        will only consider rows whose pid column is 3338 and cpu is
253        either 0, 2 or 4.
254    :type filters: dict
255
256    - **Operators**
257
258        +----------------+----------------------+---------------+
259        | Operation      |      operator        | Associativity |
260        +================+======================+===============+
261        | Exponentiation | \*\*                 |    Left       |
262        +----------------+----------------------+---------------+
263        |Unary           | \-                   |    Right      |
264        +----------------+----------------------+---------------+
265        | Multiply/Divide| \*, /, //, %         |    Left       |
266        +----------------+----------------------+---------------+
267        | Add/Subtract   | +, \-,               |    Left       |
268        +----------------+----------------------+---------------+
269        | Comparison     | >, <, >=, <=, ==, != |    Left       |
270        +----------------+----------------------+---------------+
271        | Logical        | &&, ||, \|, &        |    Left       |
272        +----------------+----------------------+---------------+
273
274    - **Data Accessors**
275
276        Since the goal of the grammar is to provide an
277        easy language to access and compare data
278        from a :mod:`trappy.trace.FTrace` object. The parser provides
279        a simple notation to access this data.
280
281        *Statically Defined Events*
282        ::
283
284            import trappy
285            from trappy.stats.grammar import Parser
286
287            trace = trappy.FTrace("path/to/trace/file")
288            parser = Parser(trace)
289            parser.solve("trappy.thermal.Thermal:temp * 2")
290
291        *Aliasing*
292        ::
293
294            import trappy
295            from trappy.stats.grammar import Parser
296
297            pvars = {"THERMAL": trappy.thermal.Thermal}
298            trace = trappy.FTrace("path/to/trace/file")
299            parser = Parser(trace, pvars=pvars)
300            parser.solve("THERMAL:temp * 2")
301
302        *Using Event Name*
303        ::
304
305            import trappy
306            from trappy.stats.grammar import Parser
307            trace = trappy.FTrace("path/to/trace/file")
308            parser = Parser(trace)
309            parser.solve("thermal:temp * 2")
310
311        The event :mod:`trappy.thermal.Thermal` is aliased
312        as **thermal** in the grammar
313
314        *Dynamic Events*
315        ::
316
317            import trappy
318            from trappy.stats.grammar import Parser
319
320            # Register Dynamic Event
321            cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name")
322
323            pvars = {"CUSTOM": cls}
324            trace = trappy.FTrace("path/to/trace/file")
325            parser = Parser(trace, pvars=pvars)
326            parser.solve("CUSTOM:col * 2")
327
328        .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace`
329
330    """
331
332    def __init__(self, data, pvars=None, window=(0, None), filters=None, **kwargs):
333        if pvars is None:
334            pvars = {}
335
336        self.data = data
337        self._pvars = pvars
338        self._accessor = Group(
339            FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process)
340        self._inspect = Group(
341            FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info)
342        self._parse_expr = get_parse_expression(
343            self._parse_func, self._parse_var_id)
344        self._agg_df = pd.DataFrame()
345        self._pivot_set = set()
346        self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT)
347        self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT)
348        self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT)
349        self._window = window
350        self._filters = filters
351
352    def solve(self, expr):
353        """Parses and solves the input expression
354
355        :param expr: The input expression
356        :type expr: str
357
358        :return: The return type may vary depending on
359            the expression. For example:
360
361            **Vector**
362            ::
363
364                import trappy
365                from trappy.stats.grammar import Parser
366
367                trace = trappy.FTrace("path/to/trace/file")
368                parser = Parser(trace)
369                parser.solve("trappy.thermal.Thermal:temp * 2")
370
371            **Scalar**
372            ::
373
374                import trappy
375                from trappy.stats.grammar import Parser
376
377                trace = trappy.FTrace("path/to/trace/file")
378                parser = Parser(trace)
379                parser.solve("numpy.mean(trappy.thermal.Thermal:temp)")
380
381            **Vector Mask**
382            ::
383
384                import trappy
385                from trappy.stats.grammar import Parser
386
387                trace = trappy.FTrace("path/to/trace/file")
388                parser = Parser(trace)
389                parser.solve("trappy.thermal.Thermal:temp > 65000")
390        """
391
392        # Pre-process accessors for indexing
393        self._accessor.searchString(expr)
394        return self._parse_expr.parseString(expr)[0]
395
396
397        """
398
399        # Pre-process accessors for indexing
400        self._accessor.searchString(expr)
401        return self._parse_expr.parseString(expr)[0]
402
403
404        """
405
406        # Pre-process accessors for indexing
407        self._accessor.searchString(expr)
408        return self._parse_expr.parseString(expr)[0]
409
410    def _pivot(self, cls, column):
411        """Pivot Data for concatenation"""
412
413        data_frame = self._get_data_frame(cls)
414        if data_frame.empty:
415            raise ValueError("No events found for {}".format(cls.name))
416
417        data_frame = handle_duplicate_index(data_frame)
418        new_index = self._agg_df.index.union(data_frame.index)
419
420        if hasattr(cls, "pivot") and cls.pivot:
421            pivot = cls.pivot
422            pivot_vals = list(np.unique(data_frame[pivot].values))
423            data = {}
424
425
426            for val in pivot_vals:
427                data[val] = data_frame[data_frame[pivot] == val][[column]]
428                if len(self._agg_df):
429                    data[val] = data[val].reindex(
430                        index=new_index,
431                        method=self._method,
432                        limit=self._limit)
433
434            return pd.concat(data, axis=1).swaplevel(0, 1, axis=1)
435
436        if len(self._agg_df):
437            data_frame = data_frame.reindex(
438                index=new_index,
439                method=self._method,
440                limit=self._limit)
441
442        return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[
443                         [column]]}, axis=1).swaplevel(0, 1, axis=1)
444
445    def _pre_process(self, tokens):
446        """Pre-process accessors for super-indexing"""
447
448        params = tokens[0]
449        if params[1] in self._agg_df.columns:
450            return self._agg_df[params[1]]
451
452        event = params[0]
453        column = params[1]
454
455        if event in self._pvars:
456            cls = self._pvars[event]
457        elif event in self.data.class_definitions:
458            cls = self.data.class_definitions[event]
459        else:
460            try:
461                cls = str_to_attr(event)
462            except KeyError:
463                raise ValueError(
464                    "Can't find parser class for event {}".format(event))
465
466        data_frame = self._pivot(cls, column)
467        self._agg_df = pd.concat(
468            [self._agg_df, data_frame], axis=1)
469
470        if self._fill:
471            self._agg_df = self._agg_df.fillna(method="pad")
472
473        return self._agg_df[params[1]]
474
475    def _parse_for_info(self, tokens):
476        """Parse Action for inspecting data accessors"""
477
478        params = tokens[0]
479        cls = params[0]
480        column = params[1]
481        info = {}
482        info["pivot"] = None
483        info["pivot_values"] = None
484
485        if cls in self._pvars:
486            cls = self._pvars[cls]
487        elif cls in self.data.class_definitions:
488            cls = self.data.class_definitions[cls]
489        else:
490            cls = str_to_attr(cls)
491
492        data_frame = self._get_data_frame(cls)
493
494        info["class"] = cls
495        info["length"] = len(data_frame)
496        if hasattr(cls, "pivot") and cls.pivot:
497            info["pivot"] = cls.pivot
498            info["pivot_values"] = list(np.unique(data_frame[cls.pivot]))
499        info["column"] = column
500        info["column_present"] = column in data_frame.columns
501        return info
502
503    def _parse_var_id(self, tokens):
504        """A function to parse a variable identifier
505        """
506
507        params = tokens[0]
508        try:
509            return float(params)
510        except (ValueError, TypeError):
511            try:
512                return self._pvars[params]
513            except KeyError:
514                return self._agg_df[params[1]]
515
516    def _parse_func(self, tokens):
517        """A function to parse a function string"""
518
519        params = tokens[0]
520        func_name = params[0]
521        if func_name in self._pvars and isinstance(
522                self._pvars[func_name],
523                types.FunctionType):
524            func = self._pvars[func_name]
525        else:
526            func = str_to_attr(params[0])
527        return func(*params[1])
528
529    def _get_data_frame(self, cls):
530        """Get the data frame from the BareTrace object, applying the window
531        and the filters"""
532
533        data_frame = getattr(self.data, cls.name).data_frame
534
535        if data_frame.empty:
536            return data_frame
537        elif self._window[1] is None:
538            data_frame = data_frame.loc[self._window[0]:]
539        else:
540            data_frame = data_frame.loc[self._window[0]:self._window[1]]
541
542        if self._filters:
543            criterion = pd.Series([True] * len(data_frame),
544                                  index=data_frame.index)
545
546            for filter_col, wanted_vals in self._filters.iteritems():
547                try:
548                    dfr_col = data_frame[filter_col]
549                except KeyError:
550                    continue
551
552                criterion &= dfr_col.isin(listify(wanted_vals))
553
554            data_frame = data_frame[criterion]
555
556        return data_frame
557
558    def ref(self, mask):
559        """Reference super indexed data with a boolean mask
560
561        :param mask: A boolean :mod:`pandas.Series` that
562            can be used to reference the aggregated data in
563            the parser
564        :type mask: :mod:`pandas.Series`
565
566        :return: aggregated_data[mask]
567        """
568
569        return self._agg_df[mask]
570
571    def inspect(self, accessor):
572        """A function to inspect the accessor for information
573
574        :param accessor: A data accessor of the format
575            <event>:<column>
576        :type accessor: str
577
578        :return: A dictionary of information
579        """
580        return self._inspect.parseString(accessor)[0]
581