1#    Copyright 2015-2017 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""Base class to parse trace.dat dumps"""
17
18import re
19import pandas as pd
20import warnings
21
22from resource import getrusage, RUSAGE_SELF
23
24def _get_free_memory_kb():
25    try:
26        with open("/proc/meminfo") as f:
27            memfree_line = [l for l in f.readlines() if "MemFree" in l][0]
28            _, num_kb, _ = memfree_line.split()
29            return int(num_kb)
30    except:
31        # Probably either not running on Linux (no /proc/meminfo), or format has
32        # changed (we didn't find num_kb).
33        return None
34
35def trace_parser_explode_array(string, array_lengths):
36    """Explode an array in the trace into individual elements for easy parsing
37
38    Basically, turn :code:`load={1 1 2 2}` into :code:`load0=1 load1=1 load2=2
39    load3=2`.
40
41    :param string: Input string from the trace
42    :type string: str
43
44    :param array_lengths: A dictionary of array names and their
45        expected length.  If we get array that's shorter than the expected
46        length, additional keys have to be introduced with value 0 to
47        compensate.
48    :type array_lengths: dict
49
50    For example:
51    ::
52
53        trace_parser_explode_array(string="load={1 2}",
54                                   array_lengths={"load": 4})
55        "load0=1 load1=2 load2=0 load3=0"
56    """
57
58    while True:
59        match = re.search(r"[^ ]+={[^}]+}", string)
60        if match is None:
61            break
62
63        to_explode = match.group()
64        col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
65        vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
66        vals_array = vals_str.split(' ')
67
68        exploded_str = ""
69        for (idx, val) in enumerate(vals_array):
70            exploded_str += "{}{}={} ".format(col_basename, idx, val)
71
72        vals_added = len(vals_array)
73        if vals_added < array_lengths[col_basename]:
74            for idx in range(vals_added, array_lengths[col_basename]):
75                exploded_str += "{}{}=0 ".format(col_basename, idx)
76
77        exploded_str = exploded_str[:-1]
78        begin_idx = match.start()
79        end_idx = match.end()
80
81        string = string[:begin_idx] + exploded_str + string[end_idx:]
82
83    return string
84
85class Base(object):
86    """Base class to parse trace.dat dumps.
87
88    Don't use directly, create a subclass that has a unique_word class
89    variable.  unique_word is a string that can uniquely identify
90    lines in the trace that correspond to this event.  This is usually
91    the trace_name (optionally followed by a semicolong,
92    e.g. "sched_switch:") but it can be anything else for trace points
93    generated using trace_printk().
94
95    :param parse_raw: If :code:`True`, raw trace data (-r option) to
96        trace-cmd will be used
97
98    :param fallback: If :code:`True`, the parsing class will be used
99        only if no other candidate class's unique_word matched. subclasses
100        should override this (for ex. TracingMarkWrite uses it)
101
102    This class acts as a base class for all TRAPpy events
103
104    """
105    def __init__(self, parse_raw=False, fallback=False):
106        self.fallback = fallback
107        self.tracer = None
108        self.data_frame = pd.DataFrame()
109        self.line_array = []
110        self.data_array = []
111        self.time_array = []
112        self.comm_array = []
113        self.pid_array = []
114        self.tgid_array = []
115        self.cpu_array = []
116        self.parse_raw = parse_raw
117        self.cached = False
118
119    def finalize_object(self):
120        pass
121
122    def __get_trace_array_lengths(self):
123        """Calculate the lengths of all arrays in the trace
124
125        Returns a dict with the name of each array found in the trace
126        as keys and their corresponding length as value
127
128        """
129        from collections import defaultdict
130
131        pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")
132
133        ret = defaultdict(int)
134
135        for line in self.data_array:
136            while True:
137                match = re.search(pat_array, line)
138                if not match:
139                    break
140
141                (array_name, array_elements) = match.groups()
142
143                array_len = len(array_elements.split(' '))
144
145                if array_len > ret[array_name]:
146                    ret[array_name] = array_len
147
148                line = line[match.end():]
149
150            # Stop scanning if the trace doesn't have arrays
151            if len(ret) == 0:
152                break
153
154        return ret
155
156    def append_data(self, time, comm, pid, tgid, cpu, line, data):
157        """Append data parsed from a line to the corresponding arrays
158
159        The :mod:`DataFrame` will be created from this when the whole trace
160        has been parsed.
161
162        :param time: The time for the line that was printed in the trace
163        :type time: float
164
165        :param comm: The command name or the execname from which the trace
166            line originated
167        :type comm: str
168
169        :param pid: The PID of the process from which the trace
170            line originated
171        :type pid: int
172
173        :param data: The data for matching line in the trace
174        :type data: str
175        """
176
177        self.time_array.append(time)
178        self.comm_array.append(comm)
179        self.pid_array.append(pid)
180        self.tgid_array.append(tgid)
181        self.cpu_array.append(cpu)
182        self.line_array.append(line)
183        self.data_array.append(data)
184
185    def string_cast(self, string, type):
186        """ Attempt to convert string to another type
187
188        Here we attempt to cast string to a type. Currently only
189        integer conversion is supported with future expansion
190        left open to other types.
191
192        :param string: The value to convert.
193        :type string: str
194
195        :param type: The type to convert to.
196        :type type: type
197        """
198        # Currently this function only supports int conversion
199        if type != int:
200            return
201        # Handle false-positives for negative numbers
202        if not string.lstrip("-").isdigit():
203            return string
204        return int(string)
205
206    def generate_data_dict(self, data_str):
207        data_dict = {}
208        prev_key = None
209        for field in data_str.split():
210            if "=" not in field:
211                # Concatenation is supported only for "string" values
212                if type(data_dict[prev_key]) is not str:
213                    continue
214                data_dict[prev_key] += ' ' + field
215                continue
216            (key, value) = field.split('=', 1)
217            value = self.string_cast(value, int)
218            data_dict[key] = value
219            prev_key = key
220        return data_dict
221
222    def generate_parsed_data(self):
223
224        # Get a rough idea of how much memory we have to play with
225        CHECK_MEM_COUNT = 10000
226        kb_free = _get_free_memory_kb()
227        starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss
228        check_memory_usage = True
229        check_memory_count = 1
230
231        for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
232                                              self.tgid_array, self.cpu_array,
233                                              self.line_array, self.data_array):
234            data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
235            data_dict.update(self.generate_data_dict(data_str))
236
237            # When running out of memory, Pandas has been observed to segfault
238            # rather than throwing a proper Python error.
239            # Look at how much memory our process is using and warn if we seem
240            # to be getting close to the system's limit, check it only once
241            # in the beginning and then every CHECK_MEM_COUNT events
242            check_memory_count -= 1
243            if check_memory_usage and check_memory_count == 0:
244                kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss)
245                if kb_free and kb_used > kb_free * 0.9:
246                    warnings.warn("TRAPpy: Appear to be low on memory. "
247                                  "If errors arise, try providing more RAM")
248                    check_memory_usage = False
249                check_memory_count = CHECK_MEM_COUNT
250
251            yield data_dict
252
253    def create_dataframe(self):
254        """Create the final :mod:`pandas.DataFrame`"""
255        if not self.time_array:
256            return
257
258        trace_arr_lengths = self.__get_trace_array_lengths()
259
260        if trace_arr_lengths.items():
261            for (idx, val) in enumerate(self.data_array):
262                expl_val = trace_parser_explode_array(val, trace_arr_lengths)
263                self.data_array[idx] = expl_val
264
265        time_idx = pd.Index(self.time_array, name="Time")
266        self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
267
268        self.time_array = []
269        self.line_array = []
270        self.comm_array = []
271        self.pid_array = []
272        self.cpu_array = []
273        self.data_array = []
274
275    def write_csv(self, fname):
276        """Write the csv info into a CSV file
277
278        :param fname: The name of the CSV file
279        :type fname: str
280        """
281        self.data_frame.to_csv(fname)
282
283    def read_csv(self, fname):
284        """Read the csv data into a DataFrame
285
286        :param fname: The name of the CSV file
287        :type fname: str
288        """
289        self.data_frame = pd.read_csv(fname, index_col = 0)
290
291    def normalize_time(self, basetime):
292        """Substract basetime from the Time of the data frame
293
294        :param basetime: The offset which needs to be subtracted from
295            the time index
296        :type basetime: float
297        """
298        if basetime and not self.data_frame.empty:
299            self.data_frame.reset_index(inplace=True)
300            self.data_frame["Time"] = self.data_frame["Time"] - basetime
301            self.data_frame.set_index("Time", inplace=True)
302