ftrace.py revision b1b1c9ec6deb6cb879c683f7ddaebb98d2f016ac
1#    Copyright 2015-2017 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16
17# pylint can't see any of the dynamically allocated classes of FTrace
18# pylint: disable=no-member
19
20import itertools
21import os
22import re
23import pandas as pd
24
25from trappy.bare_trace import BareTrace
26from trappy.utils import listify
27
28class FTraceParseError(Exception):
29    pass
30
31def _plot_freq_hists(allfreqs, what, axis, title):
32    """Helper function for plot_freq_hists
33
34    allfreqs is the output of a Cpu*Power().get_all_freqs() (for
35    example, CpuInPower.get_all_freqs()).  what is a string: "in" or
36    "out"
37
38    """
39    import trappy.plot_utils
40
41    for ax, actor in zip(axis, allfreqs):
42        this_title = "freq {} {}".format(what, actor)
43        this_title = trappy.plot_utils.normalize_title(this_title, title)
44        xlim = (0, allfreqs[actor].max())
45
46        trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20,
47                             "Frequency", xlim, "default")
48
49class GenericFTrace(BareTrace):
50    """Generic class to parse output of FTrace.  This class is meant to be
51subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
52
53    thermal_classes = {}
54
55    sched_classes = {}
56
57    dynamic_classes = {}
58
59    def __init__(self, name="", normalize_time=True, scope="all",
60                 events=[], window=(0, None), abs_window=(0, None)):
61        super(GenericFTrace, self).__init__(name)
62
63        if not hasattr(self, "needs_raw_parsing"):
64            self.needs_raw_parsing = False
65
66        self.class_definitions.update(self.dynamic_classes.items())
67        self.__add_events(listify(events))
68
69        if scope == "thermal":
70            self.class_definitions.update(self.thermal_classes.items())
71        elif scope == "sched":
72            self.class_definitions.update(self.sched_classes.items())
73        elif scope != "custom":
74            self.class_definitions.update(self.thermal_classes.items() +
75                                          self.sched_classes.items())
76
77        for attr, class_def in self.class_definitions.iteritems():
78            trace_class = class_def()
79            setattr(self, attr, trace_class)
80            self.trace_classes.append(trace_class)
81
82        self.__parse_trace_file(self.trace_path, window, abs_window)
83        if self.needs_raw_parsing and (self.trace_path_raw is not None):
84            self.__parse_trace_file(self.trace_path_raw, window, abs_window,
85                                    raw=True)
86        self.finalize_objects()
87
88        if normalize_time:
89            self.normalize_time()
90
91    @classmethod
92    def register_parser(cls, cobject, scope):
93        """Register the class as an Event. This function
94        can be used to register a class which is associated
95        with an FTrace unique word.
96
97        .. seealso::
98
99            :mod:`trappy.dynamic.register_dynamic_ftrace` :mod:`trappy.dynamic.register_ftrace_parser`
100
101        """
102
103        if not hasattr(cobject, "name"):
104            cobject.name = cobject.unique_word.split(":")[0]
105
106        # Add the class to the classes dictionary
107        if scope == "all":
108            cls.dynamic_classes[cobject.name] = cobject
109        else:
110            getattr(cls, scope + "_classes")[cobject.name] = cobject
111
112    @classmethod
113    def unregister_parser(cls, cobject):
114        """Unregister a parser
115
116        This is the opposite of FTrace.register_parser(), it removes a class
117        from the list of classes that will be parsed on the trace
118
119        """
120
121        # TODO: scopes should not be hardcoded (nor here nor in the FTrace object)
122        all_scopes = [cls.thermal_classes, cls.sched_classes,
123                      cls.dynamic_classes]
124        known_events = ((n, c, sc) for sc in all_scopes for n, c in sc.items())
125
126        for name, obj, scope_classes in known_events:
127            if cobject == obj:
128                del scope_classes[name]
129
130    def __add_events(self, events):
131        """Add events to the class_definitions
132
133        If the events are known to trappy just add that class to the
134        class definitions list.  Otherwise, register a class to parse
135        that event
136
137        """
138
139        from trappy.dynamic import DynamicTypeFactory, default_init
140        from trappy.base import Base
141
142        # TODO: scopes should not be hardcoded (nor here nor in the FTrace object)
143        all_scopes = [self.thermal_classes, self.sched_classes,
144                      self.dynamic_classes]
145        known_events = {k: v for sc in all_scopes for k, v in sc.iteritems()}
146
147        for event_name in events:
148            for cls in known_events.itervalues():
149                if (event_name == cls.unique_word) or \
150                   (event_name + ":" == cls.unique_word):
151                    self.class_definitions[event_name] = cls
152                    break
153            else:
154                kwords = {
155                    "__init__": default_init,
156                    "unique_word": event_name + ":",
157                    "name": event_name,
158                }
159                trace_class = DynamicTypeFactory(event_name, (Base,), kwords)
160                self.class_definitions[event_name] = trace_class
161
162    def __populate_data(self, fin, cls_for_unique_word, window, abs_window):
163        """Append to trace data from a txt trace"""
164
165        def contains_unique_word(line, unique_words=cls_for_unique_word.keys()):
166            for unique_word in unique_words:
167                if unique_word in line:
168                    return True
169            return False
170
171        special_fields_regexp = r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
172                                r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
173                                r"(?P<timestamp>[0-9]+\.[0-9]+):"
174        special_fields_regexp = re.compile(special_fields_regexp)
175        start_match = re.compile(r"[A-Za-z0-9_]+=")
176
177        actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin)
178        actual_trace = itertools.takewhile(self.trace_hasnt_finished(),
179                                           actual_trace)
180
181        for line in itertools.ifilter(contains_unique_word, actual_trace):
182            for unique_word, cls in cls_for_unique_word.iteritems():
183                if unique_word in line:
184                    trace_class = cls
185                    break
186            else:
187                raise FTraceParseError("No unique word in '{}'".format(line))
188
189            line = line[:-1]
190
191            special_fields_match = special_fields_regexp.match(line)
192            if not special_fields_match:
193                raise FTraceParseError("Couldn't match special fields in '{}'".format(line))
194            comm = special_fields_match.group('comm')
195            pid = int(special_fields_match.group('pid'))
196            cpu = int(special_fields_match.group('cpu'))
197            timestamp = float(special_fields_match.group('timestamp'))
198
199            if not self.basetime:
200                self.basetime = timestamp
201
202            if (timestamp < window[0] + self.basetime) or \
203               (timestamp < abs_window[0]):
204                continue
205
206            if (window[1] and timestamp > window[1] + self.basetime) or \
207               (abs_window[1] and timestamp > abs_window[1]):
208                return
209
210            try:
211                data_start_idx =  start_match.search(line).start()
212            except AttributeError:
213                continue
214
215            data_str = line[data_start_idx:]
216
217            # Remove empty arrays from the trace
218            data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
219
220            trace_class.append_data(timestamp, comm, pid, cpu, data_str)
221
222    def trace_hasnt_started(self):
223        """Return a function that accepts a line and returns true if this line
224is not part of the trace.
225
226        Subclasses of GenericFTrace may override this to skip the
227        beginning of a file that is not part of the trace.  The first
228        time the returned function returns False it will be considered
229        the beginning of the trace and this function will never be
230        called again (because once it returns False, the trace has
231        started).
232
233        """
234        return lambda x: False
235
236    def trace_hasnt_finished(self):
237        """Return a function that accepts a line and returns true if this line
238is part of the trace.
239
240        This function is called with each line of the file *after*
241        trace_hasnt_started() returns True so the first line it sees
242        is part of the trace.  The returned function should return
243        True as long as the line it receives is part of the trace.  As
244        soon as this function returns False, the rest of the file will
245        be dropped.  Subclasses of GenericFTrace may override this to
246        stop processing after the end of the trace is found to skip
247        parsing the end of the file if it contains anything other than
248        trace.
249
250        """
251        return lambda x: True
252
253    def __parse_trace_file(self, trace_file, window, abs_window, raw=False):
254        """parse the trace and create a pandas DataFrame"""
255
256        # Memoize the unique words to speed up parsing the trace file
257        cls_for_unique_word = {}
258        for trace_name in self.class_definitions.iterkeys():
259            trace_class = getattr(self, trace_name)
260
261            if self.needs_raw_parsing and (trace_class.parse_raw != raw):
262                continue
263
264            unique_word = trace_class.unique_word
265            cls_for_unique_word[unique_word] = trace_class
266
267        if len(cls_for_unique_word) == 0:
268            return
269
270        try:
271            with open(trace_file) as fin:
272                self.__populate_data(
273                    fin, cls_for_unique_word, window, abs_window)
274        except FTraceParseError as e:
275            raise ValueError('Failed to parse ftrace file {}:\n{}'.format(
276                trace_file, str(e)))
277
278    # TODO: Move thermal specific functionality
279
280    def get_all_freqs_data(self, map_label):
281        """get an array of tuple of names and DataFrames suitable for the
282        allfreqs plot"""
283
284        cpu_in_freqs = self.cpu_in_power.get_all_freqs(map_label)
285        cpu_out_freqs = self.cpu_out_power.get_all_freqs(map_label)
286
287        ret = []
288        for label in map_label.values():
289            in_label = label + "_freq_in"
290            out_label = label + "_freq_out"
291
292            cpu_inout_freq_dict = {in_label: cpu_in_freqs[label],
293                                   out_label: cpu_out_freqs[label]}
294            dfr = pd.DataFrame(cpu_inout_freq_dict).fillna(method="pad")
295            ret.append((label, dfr))
296
297        try:
298            gpu_freq_in_data = self.devfreq_in_power.get_all_freqs()
299            gpu_freq_out_data = self.devfreq_out_power.get_all_freqs()
300        except KeyError:
301            gpu_freq_in_data = gpu_freq_out_data = None
302
303        if gpu_freq_in_data is not None:
304            inout_freq_dict = {"gpu_freq_in": gpu_freq_in_data["freq"],
305                               "gpu_freq_out": gpu_freq_out_data["freq"]
306                           }
307            dfr = pd.DataFrame(inout_freq_dict).fillna(method="pad")
308            ret.append(("GPU", dfr))
309
310        return ret
311
312    def plot_freq_hists(self, map_label, ax):
313        """Plot histograms for each actor input and output frequency
314
315        ax is an array of axis, one for the input power and one for
316        the output power
317
318        """
319
320        in_base_idx = len(ax) / 2
321
322        try:
323            devfreq_out_all_freqs = self.devfreq_out_power.get_all_freqs()
324            devfreq_in_all_freqs = self.devfreq_in_power.get_all_freqs()
325        except KeyError:
326            devfreq_out_all_freqs = None
327            devfreq_in_all_freqs = None
328
329        out_allfreqs = (self.cpu_out_power.get_all_freqs(map_label),
330                        devfreq_out_all_freqs, ax[0:in_base_idx])
331        in_allfreqs = (self.cpu_in_power.get_all_freqs(map_label),
332                       devfreq_in_all_freqs, ax[in_base_idx:])
333
334        for cpu_allfreqs, devfreq_freqs, axis in (out_allfreqs, in_allfreqs):
335            if devfreq_freqs is not None:
336                devfreq_freqs.name = "GPU"
337                allfreqs = pd.concat([cpu_allfreqs, devfreq_freqs], axis=1)
338            else:
339                allfreqs = cpu_allfreqs
340
341            allfreqs.fillna(method="pad", inplace=True)
342            _plot_freq_hists(allfreqs, "out", axis, self.name)
343
344    def plot_load(self, mapping_label, title="", width=None, height=None,
345                  ax=None):
346        """plot the load of all the clusters, similar to how compare runs did it
347
348        the mapping_label has to be a dict whose keys are the cluster
349        numbers as found in the trace and values are the names that
350        will appear in the legend.
351
352        """
353        import trappy.plot_utils
354
355        load_data = self.cpu_in_power.get_load_data(mapping_label)
356        try:
357            gpu_data = pd.DataFrame({"GPU":
358                                     self.devfreq_in_power.data_frame["load"]})
359            load_data = pd.concat([load_data, gpu_data], axis=1)
360        except KeyError:
361            pass
362
363        load_data = load_data.fillna(method="pad")
364        title = trappy.plot_utils.normalize_title("Utilization", title)
365
366        if not ax:
367            ax = trappy.plot_utils.pre_plot_setup(width=width, height=height)
368
369        load_data.plot(ax=ax)
370
371        trappy.plot_utils.post_plot_setup(ax, title=title)
372
373    def plot_normalized_load(self, mapping_label, title="", width=None,
374                             height=None, ax=None):
375        """plot the normalized load of all the clusters, similar to how compare runs did it
376
377        the mapping_label has to be a dict whose keys are the cluster
378        numbers as found in the trace and values are the names that
379        will appear in the legend.
380
381        """
382        import trappy.plot_utils
383
384        load_data = self.cpu_in_power.get_normalized_load_data(mapping_label)
385        if "load" in self.devfreq_in_power.data_frame:
386            gpu_dfr = self.devfreq_in_power.data_frame
387            gpu_max_freq = max(gpu_dfr["freq"])
388            gpu_load = gpu_dfr["load"] * gpu_dfr["freq"] / gpu_max_freq
389
390            gpu_data = pd.DataFrame({"GPU": gpu_load})
391            load_data = pd.concat([load_data, gpu_data], axis=1)
392
393        load_data = load_data.fillna(method="pad")
394        title = trappy.plot_utils.normalize_title("Normalized Utilization", title)
395
396        if not ax:
397            ax = trappy.plot_utils.pre_plot_setup(width=width, height=height)
398
399        load_data.plot(ax=ax)
400
401        trappy.plot_utils.post_plot_setup(ax, title=title)
402
403    def plot_allfreqs(self, map_label, width=None, height=None, ax=None):
404        """Do allfreqs plots similar to those of CompareRuns
405
406        if ax is not none, it must be an array of the same size as
407        map_label.  Each plot will be done in each of the axis in
408        ax
409
410        """
411        import trappy.plot_utils
412
413        all_freqs = self.get_all_freqs_data(map_label)
414
415        setup_plot = False
416        if ax is None:
417            ax = [None] * len(all_freqs)
418            setup_plot = True
419
420        for this_ax, (label, dfr) in zip(ax, all_freqs):
421            this_title = trappy.plot_utils.normalize_title("allfreqs " + label,
422                                                        self.name)
423
424            if setup_plot:
425                this_ax = trappy.plot_utils.pre_plot_setup(width=width,
426                                                        height=height)
427
428            dfr.plot(ax=this_ax)
429            trappy.plot_utils.post_plot_setup(this_ax, title=this_title)
430
431class FTrace(GenericFTrace):
432    """A wrapper class that initializes all the classes of a given run
433
434    - The FTrace class can receive the following optional parameters.
435
436    :param path: Path contains the path to the trace file.  If no path is given, it
437        uses the current directory by default.  If path is a file, and ends in
438        .dat, it's run through "trace-cmd report".  If it doesn't end in
439        ".dat", then it must be the output of a trace-cmd report run.  If path
440        is a directory that contains a trace.txt, that is assumed to be the
441        output of "trace-cmd report".  If path is a directory that doesn't
442        have a trace.txt but has a trace.dat, it runs trace-cmd report on the
443        trace.dat, saves it in trace.txt and then uses that.
444
445    :param name: is a string describing the trace.
446
447    :param normalize_time: is used to make all traces start from time 0 (the
448        default).  If normalize_time is False, the trace times are the same as
449        in the trace file.
450
451    :param scope: can be used to limit the parsing done on the trace.  The default
452        scope parses all the traces known to trappy.  If scope is thermal, only
453        the thermal classes are parsed.  If scope is sched, only the sched
454        classes are parsed.
455
456    :param events: A list of strings containing the name of the trace
457        events that you want to include in this FTrace object.  The
458        string must correspond to the event name (what you would pass
459        to "trace-cmd -e", i.e. 4th field in trace.txt)
460
461    :param window: a tuple indicating a time window.  The first
462        element in the tuple is the start timestamp and the second one
463        the end timestamp.  Timestamps are relative to the first trace
464        event that's parsed.  If you want to trace until the end of
465        the trace, set the second element to None.  If you want to use
466        timestamps extracted from the trace file use "abs_window". The
467        window is inclusive: trace events exactly matching the start
468        or end timestamps will be included.
469
470    :param abs_window: a tuple indicating an absolute time window.
471        This parameter is similar to the "window" one but its values
472        represent timestamps that are not normalized, (i.e. the ones
473        you find in the trace file). The window is inclusive.
474
475
476    :type path: str
477    :type name: str
478    :type normalize_time: bool
479    :type scope: str
480    :type events: list
481    :type window: tuple
482    :type abs_window: tuple
483
484    This is a simple example:
485    ::
486
487        import trappy
488        trappy.FTrace("trace_dir")
489
490    """
491
492    def __init__(self, path=".", name="", normalize_time=True, scope="all",
493                 events=[], window=(0, None), abs_window=(0, None)):
494        self.trace_path, self.trace_path_raw = self.__process_path(path)
495        self.needs_raw_parsing = True
496
497        self.__populate_metadata()
498
499        super(FTrace, self).__init__(name, normalize_time, scope, events,
500                                     window, abs_window)
501
502    def __process_path(self, basepath):
503        """Process the path and return the path to the trace text file"""
504
505        if os.path.isfile(basepath):
506            trace_name = os.path.splitext(basepath)[0]
507        else:
508            trace_name = os.path.join(basepath, "trace")
509
510        trace_txt = trace_name + ".txt"
511        trace_raw = trace_name + ".raw.txt"
512        trace_dat = trace_name + ".dat"
513
514        if os.path.isfile(trace_dat):
515            # Both TXT and RAW traces must always be generated
516            if not os.path.isfile(trace_txt) or \
517               not os.path.isfile(trace_raw):
518                self.__run_trace_cmd_report(trace_dat)
519            # TXT (and RAW) traces must match the most recent binary trace
520            elif os.path.getmtime(trace_txt) < os.path.getmtime(trace_dat):
521                self.__run_trace_cmd_report(trace_dat)
522
523        if not os.path.isfile(trace_raw):
524            trace_raw = None
525
526        return trace_txt, trace_raw
527
528    def __run_trace_cmd_report(self, fname):
529        """Run "trace-cmd report fname > fname.txt"
530           and "trace-cmd report -R fname > fname.raw.txt"
531
532        The resulting traces are stored in files with extension ".txt"
533        and ".raw.txt" respectively.  If fname is "my_trace.dat", the
534        trace is stored in "my_trace.txt" and "my_trace.raw.txt".  The
535        contents of the destination files are overwritten if they
536        exist.
537
538        """
539        from subprocess import check_output
540
541        cmd = ["trace-cmd", "report"]
542
543        if not os.path.isfile(fname):
544            raise IOError("No such file or directory: {}".format(fname))
545
546        raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt"
547        trace_output = os.path.splitext(fname)[0] + ".txt"
548        cmd.append(fname)
549
550        with open(os.devnull) as devnull:
551            try:
552                out = check_output(cmd, stderr=devnull)
553            except OSError as exc:
554                if exc.errno == 2 and not exc.filename:
555                    raise OSError(2, "trace-cmd not found in PATH, is it installed?")
556                else:
557                    raise
558
559            # Add the -R flag to the trace-cmd
560            # for raw parsing
561            cmd.insert(-1, "-R")
562            raw_out = check_output(cmd, stderr=devnull)
563
564        with open(trace_output, "w") as fout:
565            fout.write(out)
566
567        with open(raw_trace_output, "w") as fout:
568            fout.write(raw_out)
569
570    def __populate_metadata(self):
571        """Populates trace metadata"""
572
573        # Meta Data as expected to be found in the parsed trace header
574        metadata_keys = ["version", "cpus"]
575
576        for key in metadata_keys:
577            setattr(self, "_" + key, None)
578
579        with open(self.trace_path) as fin:
580            for line in fin:
581                if not metadata_keys:
582                    return
583
584                metadata_pattern = r"^\b(" + "|".join(metadata_keys) + \
585                                   r")\b\s*=\s*([0-9]+)"
586                match = re.search(metadata_pattern, line)
587                if match:
588                    setattr(self, "_" + match.group(1), match.group(2))
589                    metadata_keys.remove(match.group(1))
590
591                if re.search(r"^\s+[^\[]+-\d+\s+\[\d+\]\s+\d+\.\d+:", line):
592                    # Reached a valid trace line, abort metadata population
593                    return
594