ftrace.py revision 658d4cc50a81cc0234bd4cdb3fa87f38782223b1
1#    Copyright 2015-2017 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16
17# pylint can't see any of the dynamically allocated classes of FTrace
18# pylint: disable=no-member
19
20import itertools
21import os
22import re
23import pandas as pd
24
25from trappy.bare_trace import BareTrace
26from trappy.utils import listify
27
28class FTraceParseError(Exception):
29    pass
30
31def _plot_freq_hists(allfreqs, what, axis, title):
32    """Helper function for plot_freq_hists
33
34    allfreqs is the output of a Cpu*Power().get_all_freqs() (for
35    example, CpuInPower.get_all_freqs()).  what is a string: "in" or
36    "out"
37
38    """
39    import trappy.plot_utils
40
41    for ax, actor in zip(axis, allfreqs):
42        this_title = "freq {} {}".format(what, actor)
43        this_title = trappy.plot_utils.normalize_title(this_title, title)
44        xlim = (0, allfreqs[actor].max())
45
46        trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20,
47                             "Frequency", xlim, "default")
48
49SPECIAL_FIELDS_RE = re.compile(
50                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
51                        r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
52                        r"(?P<timestamp>[0-9]+\.[0-9]+): (\w+:\s+)+(?P<data>.+)"
53)
54
55class GenericFTrace(BareTrace):
56    """Generic class to parse output of FTrace.  This class is meant to be
57subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
58
59    thermal_classes = {}
60
61    sched_classes = {}
62
63    dynamic_classes = {}
64
65    def __init__(self, name="", normalize_time=True, scope="all",
66                 events=[], window=(0, None), abs_window=(0, None)):
67        super(GenericFTrace, self).__init__(name)
68
69        if not hasattr(self, "needs_raw_parsing"):
70            self.needs_raw_parsing = False
71
72        self.class_definitions.update(self.dynamic_classes.items())
73        self.__add_events(listify(events))
74
75        if scope == "thermal":
76            self.class_definitions.update(self.thermal_classes.items())
77        elif scope == "sched":
78            self.class_definitions.update(self.sched_classes.items())
79        elif scope != "custom":
80            self.class_definitions.update(self.thermal_classes.items() +
81                                          self.sched_classes.items())
82
83        for attr, class_def in self.class_definitions.iteritems():
84            trace_class = class_def()
85            setattr(self, attr, trace_class)
86            self.trace_classes.append(trace_class)
87
88        self.__parse_trace_file(self.trace_path, window, abs_window)
89        if self.needs_raw_parsing and (self.trace_path_raw is not None):
90            self.__parse_trace_file(self.trace_path_raw, window, abs_window,
91                                    raw=True)
92        self.finalize_objects()
93
94        if normalize_time:
95            self.normalize_time()
96
97    @classmethod
98    def register_parser(cls, cobject, scope):
99        """Register the class as an Event. This function
100        can be used to register a class which is associated
101        with an FTrace unique word.
102
103        .. seealso::
104
105            :mod:`trappy.dynamic.register_dynamic_ftrace` :mod:`trappy.dynamic.register_ftrace_parser`
106
107        """
108
109        if not hasattr(cobject, "name"):
110            cobject.name = cobject.unique_word.split(":")[0]
111
112        # Add the class to the classes dictionary
113        if scope == "all":
114            cls.dynamic_classes[cobject.name] = cobject
115        else:
116            getattr(cls, scope + "_classes")[cobject.name] = cobject
117
118    @classmethod
119    def unregister_parser(cls, cobject):
120        """Unregister a parser
121
122        This is the opposite of FTrace.register_parser(), it removes a class
123        from the list of classes that will be parsed on the trace
124
125        """
126
127        # TODO: scopes should not be hardcoded (nor here nor in the FTrace object)
128        all_scopes = [cls.thermal_classes, cls.sched_classes,
129                      cls.dynamic_classes]
130        known_events = ((n, c, sc) for sc in all_scopes for n, c in sc.items())
131
132        for name, obj, scope_classes in known_events:
133            if cobject == obj:
134                del scope_classes[name]
135
136    def __add_events(self, events):
137        """Add events to the class_definitions
138
139        If the events are known to trappy just add that class to the
140        class definitions list.  Otherwise, register a class to parse
141        that event
142
143        """
144
145        from trappy.dynamic import DynamicTypeFactory, default_init
146        from trappy.base import Base
147
148        # TODO: scopes should not be hardcoded (nor here nor in the FTrace object)
149        all_scopes = [self.thermal_classes, self.sched_classes,
150                      self.dynamic_classes]
151        known_events = {k: v for sc in all_scopes for k, v in sc.iteritems()}
152
153        for event_name in events:
154            for cls in known_events.itervalues():
155                if (event_name == cls.unique_word) or \
156                   (event_name + ":" == cls.unique_word):
157                    self.class_definitions[event_name] = cls
158                    break
159            else:
160                kwords = {
161                    "__init__": default_init,
162                    "unique_word": event_name + ":",
163                    "name": event_name,
164                }
165                trace_class = DynamicTypeFactory(event_name, (Base,), kwords)
166                self.class_definitions[event_name] = trace_class
167
168    def __populate_data(self, fin, cls_for_unique_word, window, abs_window):
169        """Append to trace data from a txt trace"""
170
171        def contains_unique_word(line, unique_words=cls_for_unique_word.keys()):
172            for unique_word in unique_words:
173                if unique_word in line:
174                    return True
175            return False
176
177        actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin)
178        actual_trace = itertools.takewhile(self.trace_hasnt_finished(),
179                                           actual_trace)
180
181        for line in actual_trace:
182            if not contains_unique_word(line):
183                self.lines += 1
184                continue
185            for unique_word, cls in cls_for_unique_word.iteritems():
186                if unique_word in line:
187                    trace_class = cls
188                    if not cls.fallback:
189                        break
190            else:
191                if not trace_class:
192                    raise FTraceParseError("No unique word in '{}'".format(line))
193
194            line = line[:-1]
195
196            fields_match = SPECIAL_FIELDS_RE.match(line)
197            if not fields_match:
198                raise FTraceParseError("Couldn't match fields in '{}'".format(line))
199            comm = fields_match.group('comm')
200            pid = int(fields_match.group('pid'))
201            cpu = int(fields_match.group('cpu'))
202            timestamp = float(fields_match.group('timestamp'))
203            data_str = fields_match.group('data')
204
205            if not self.basetime:
206                self.basetime = timestamp
207
208            if (timestamp < window[0] + self.basetime) or \
209               (timestamp < abs_window[0]):
210                self.lines += 1
211                continue
212
213            if (window[1] and timestamp > window[1] + self.basetime) or \
214               (abs_window[1] and timestamp > abs_window[1]):
215                return
216
217            # Remove empty arrays from the trace
218            data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
219
220            trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str)
221            self.lines += 1
222
223    def trace_hasnt_started(self):
224        """Return a function that accepts a line and returns true if this line
225is not part of the trace.
226
227        Subclasses of GenericFTrace may override this to skip the
228        beginning of a file that is not part of the trace.  The first
229        time the returned function returns False it will be considered
230        the beginning of the trace and this function will never be
231        called again (because once it returns False, the trace has
232        started).
233
234        """
235        return lambda line: not SPECIAL_FIELDS_RE.match(line)
236
237    def trace_hasnt_finished(self):
238        """Return a function that accepts a line and returns true if this line
239is part of the trace.
240
241        This function is called with each line of the file *after*
242        trace_hasnt_started() returns True so the first line it sees
243        is part of the trace.  The returned function should return
244        True as long as the line it receives is part of the trace.  As
245        soon as this function returns False, the rest of the file will
246        be dropped.  Subclasses of GenericFTrace may override this to
247        stop processing after the end of the trace is found to skip
248        parsing the end of the file if it contains anything other than
249        trace.
250
251        """
252        return lambda x: True
253
254    def __parse_trace_file(self, trace_file, window, abs_window, raw=False):
255        """parse the trace and create a pandas DataFrame"""
256
257        # Memoize the unique words to speed up parsing the trace file
258        cls_for_unique_word = {}
259        for trace_name in self.class_definitions.iterkeys():
260            trace_class = getattr(self, trace_name)
261
262            if self.needs_raw_parsing and (trace_class.parse_raw != raw):
263                continue
264
265            unique_word = trace_class.unique_word
266            cls_for_unique_word[unique_word] = trace_class
267
268        if len(cls_for_unique_word) == 0:
269            return
270
271        try:
272            with open(trace_file) as fin:
273                self.lines = 0
274                self.__populate_data(
275                    fin, cls_for_unique_word, window, abs_window)
276        except FTraceParseError as e:
277            raise ValueError('Failed to parse ftrace file {}:\n{}'.format(
278                trace_file, str(e)))
279
280    # TODO: Move thermal specific functionality
281
282    def get_all_freqs_data(self, map_label):
283        """get an array of tuple of names and DataFrames suitable for the
284        allfreqs plot"""
285
286        cpu_in_freqs = self.cpu_in_power.get_all_freqs(map_label)
287        cpu_out_freqs = self.cpu_out_power.get_all_freqs(map_label)
288
289        ret = []
290        for label in map_label.values():
291            in_label = label + "_freq_in"
292            out_label = label + "_freq_out"
293
294            cpu_inout_freq_dict = {in_label: cpu_in_freqs[label],
295                                   out_label: cpu_out_freqs[label]}
296            dfr = pd.DataFrame(cpu_inout_freq_dict).fillna(method="pad")
297            ret.append((label, dfr))
298
299        try:
300            gpu_freq_in_data = self.devfreq_in_power.get_all_freqs()
301            gpu_freq_out_data = self.devfreq_out_power.get_all_freqs()
302        except KeyError:
303            gpu_freq_in_data = gpu_freq_out_data = None
304
305        if gpu_freq_in_data is not None:
306            inout_freq_dict = {"gpu_freq_in": gpu_freq_in_data["freq"],
307                               "gpu_freq_out": gpu_freq_out_data["freq"]
308                           }
309            dfr = pd.DataFrame(inout_freq_dict).fillna(method="pad")
310            ret.append(("GPU", dfr))
311
312        return ret
313
314    def plot_freq_hists(self, map_label, ax):
315        """Plot histograms for each actor input and output frequency
316
317        ax is an array of axis, one for the input power and one for
318        the output power
319
320        """
321
322        in_base_idx = len(ax) / 2
323
324        try:
325            devfreq_out_all_freqs = self.devfreq_out_power.get_all_freqs()
326            devfreq_in_all_freqs = self.devfreq_in_power.get_all_freqs()
327        except KeyError:
328            devfreq_out_all_freqs = None
329            devfreq_in_all_freqs = None
330
331        out_allfreqs = (self.cpu_out_power.get_all_freqs(map_label),
332                        devfreq_out_all_freqs, ax[0:in_base_idx])
333        in_allfreqs = (self.cpu_in_power.get_all_freqs(map_label),
334                       devfreq_in_all_freqs, ax[in_base_idx:])
335
336        for cpu_allfreqs, devfreq_freqs, axis in (out_allfreqs, in_allfreqs):
337            if devfreq_freqs is not None:
338                devfreq_freqs.name = "GPU"
339                allfreqs = pd.concat([cpu_allfreqs, devfreq_freqs], axis=1)
340            else:
341                allfreqs = cpu_allfreqs
342
343            allfreqs.fillna(method="pad", inplace=True)
344            _plot_freq_hists(allfreqs, "out", axis, self.name)
345
346    def plot_load(self, mapping_label, title="", width=None, height=None,
347                  ax=None):
348        """plot the load of all the clusters, similar to how compare runs did it
349
350        the mapping_label has to be a dict whose keys are the cluster
351        numbers as found in the trace and values are the names that
352        will appear in the legend.
353
354        """
355        import trappy.plot_utils
356
357        load_data = self.cpu_in_power.get_load_data(mapping_label)
358        try:
359            gpu_data = pd.DataFrame({"GPU":
360                                     self.devfreq_in_power.data_frame["load"]})
361            load_data = pd.concat([load_data, gpu_data], axis=1)
362        except KeyError:
363            pass
364
365        load_data = load_data.fillna(method="pad")
366        title = trappy.plot_utils.normalize_title("Utilization", title)
367
368        if not ax:
369            ax = trappy.plot_utils.pre_plot_setup(width=width, height=height)
370
371        load_data.plot(ax=ax)
372
373        trappy.plot_utils.post_plot_setup(ax, title=title)
374
375    def plot_normalized_load(self, mapping_label, title="", width=None,
376                             height=None, ax=None):
377        """plot the normalized load of all the clusters, similar to how compare runs did it
378
379        the mapping_label has to be a dict whose keys are the cluster
380        numbers as found in the trace and values are the names that
381        will appear in the legend.
382
383        """
384        import trappy.plot_utils
385
386        load_data = self.cpu_in_power.get_normalized_load_data(mapping_label)
387        if "load" in self.devfreq_in_power.data_frame:
388            gpu_dfr = self.devfreq_in_power.data_frame
389            gpu_max_freq = max(gpu_dfr["freq"])
390            gpu_load = gpu_dfr["load"] * gpu_dfr["freq"] / gpu_max_freq
391
392            gpu_data = pd.DataFrame({"GPU": gpu_load})
393            load_data = pd.concat([load_data, gpu_data], axis=1)
394
395        load_data = load_data.fillna(method="pad")
396        title = trappy.plot_utils.normalize_title("Normalized Utilization", title)
397
398        if not ax:
399            ax = trappy.plot_utils.pre_plot_setup(width=width, height=height)
400
401        load_data.plot(ax=ax)
402
403        trappy.plot_utils.post_plot_setup(ax, title=title)
404
405    def plot_allfreqs(self, map_label, width=None, height=None, ax=None):
406        """Do allfreqs plots similar to those of CompareRuns
407
408        if ax is not none, it must be an array of the same size as
409        map_label.  Each plot will be done in each of the axis in
410        ax
411
412        """
413        import trappy.plot_utils
414
415        all_freqs = self.get_all_freqs_data(map_label)
416
417        setup_plot = False
418        if ax is None:
419            ax = [None] * len(all_freqs)
420            setup_plot = True
421
422        for this_ax, (label, dfr) in zip(ax, all_freqs):
423            this_title = trappy.plot_utils.normalize_title("allfreqs " + label,
424                                                        self.name)
425
426            if setup_plot:
427                this_ax = trappy.plot_utils.pre_plot_setup(width=width,
428                                                        height=height)
429
430            dfr.plot(ax=this_ax)
431            trappy.plot_utils.post_plot_setup(this_ax, title=this_title)
432
433class FTrace(GenericFTrace):
434    """A wrapper class that initializes all the classes of a given run
435
436    - The FTrace class can receive the following optional parameters.
437
438    :param path: Path contains the path to the trace file.  If no path is given, it
439        uses the current directory by default.  If path is a file, and ends in
440        .dat, it's run through "trace-cmd report".  If it doesn't end in
441        ".dat", then it must be the output of a trace-cmd report run.  If path
442        is a directory that contains a trace.txt, that is assumed to be the
443        output of "trace-cmd report".  If path is a directory that doesn't
444        have a trace.txt but has a trace.dat, it runs trace-cmd report on the
445        trace.dat, saves it in trace.txt and then uses that.
446
447    :param name: is a string describing the trace.
448
449    :param normalize_time: is used to make all traces start from time 0 (the
450        default).  If normalize_time is False, the trace times are the same as
451        in the trace file.
452
453    :param scope: can be used to limit the parsing done on the trace.  The default
454        scope parses all the traces known to trappy.  If scope is thermal, only
455        the thermal classes are parsed.  If scope is sched, only the sched
456        classes are parsed.
457
458    :param events: A list of strings containing the name of the trace
459        events that you want to include in this FTrace object.  The
460        string must correspond to the event name (what you would pass
461        to "trace-cmd -e", i.e. 4th field in trace.txt)
462
463    :param window: a tuple indicating a time window.  The first
464        element in the tuple is the start timestamp and the second one
465        the end timestamp.  Timestamps are relative to the first trace
466        event that's parsed.  If you want to trace until the end of
467        the trace, set the second element to None.  If you want to use
468        timestamps extracted from the trace file use "abs_window". The
469        window is inclusive: trace events exactly matching the start
470        or end timestamps will be included.
471
472    :param abs_window: a tuple indicating an absolute time window.
473        This parameter is similar to the "window" one but its values
474        represent timestamps that are not normalized, (i.e. the ones
475        you find in the trace file). The window is inclusive.
476
477
478    :type path: str
479    :type name: str
480    :type normalize_time: bool
481    :type scope: str
482    :type events: list
483    :type window: tuple
484    :type abs_window: tuple
485
486    This is a simple example:
487    ::
488
489        import trappy
490        trappy.FTrace("trace_dir")
491
492    """
493
494    def __init__(self, path=".", name="", normalize_time=True, scope="all",
495                 events=[], window=(0, None), abs_window=(0, None)):
496        self.trace_path, self.trace_path_raw = self.__process_path(path)
497        self.needs_raw_parsing = True
498
499        self.__populate_metadata()
500
501        super(FTrace, self).__init__(name, normalize_time, scope, events,
502                                     window, abs_window)
503
504    def __process_path(self, basepath):
505        """Process the path and return the path to the trace text file"""
506
507        if os.path.isfile(basepath):
508            trace_name = os.path.splitext(basepath)[0]
509        else:
510            trace_name = os.path.join(basepath, "trace")
511
512        trace_txt = trace_name + ".txt"
513        trace_raw = trace_name + ".raw.txt"
514        trace_dat = trace_name + ".dat"
515
516        if os.path.isfile(trace_dat):
517            # Both TXT and RAW traces must always be generated
518            if not os.path.isfile(trace_txt) or \
519               not os.path.isfile(trace_raw):
520                self.__run_trace_cmd_report(trace_dat)
521            # TXT (and RAW) traces must match the most recent binary trace
522            elif os.path.getmtime(trace_txt) < os.path.getmtime(trace_dat):
523                self.__run_trace_cmd_report(trace_dat)
524
525        if not os.path.isfile(trace_raw):
526            trace_raw = None
527
528        return trace_txt, trace_raw
529
530    def __run_trace_cmd_report(self, fname):
531        """Run "trace-cmd report fname > fname.txt"
532           and "trace-cmd report -R fname > fname.raw.txt"
533
534        The resulting traces are stored in files with extension ".txt"
535        and ".raw.txt" respectively.  If fname is "my_trace.dat", the
536        trace is stored in "my_trace.txt" and "my_trace.raw.txt".  The
537        contents of the destination files are overwritten if they
538        exist.
539
540        """
541        from subprocess import check_output
542
543        cmd = ["trace-cmd", "report"]
544
545        if not os.path.isfile(fname):
546            raise IOError("No such file or directory: {}".format(fname))
547
548        raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt"
549        trace_output = os.path.splitext(fname)[0] + ".txt"
550        cmd.append(fname)
551
552        with open(os.devnull) as devnull:
553            try:
554                out = check_output(cmd, stderr=devnull)
555            except OSError as exc:
556                if exc.errno == 2 and not exc.filename:
557                    raise OSError(2, "trace-cmd not found in PATH, is it installed?")
558                else:
559                    raise
560
561            # Add the -R flag to the trace-cmd
562            # for raw parsing
563            cmd.insert(-1, "-R")
564            raw_out = check_output(cmd, stderr=devnull)
565
566        with open(trace_output, "w") as fout:
567            fout.write(out)
568
569        with open(raw_trace_output, "w") as fout:
570            fout.write(raw_out)
571
572    def __populate_metadata(self):
573        """Populates trace metadata"""
574
575        # Meta Data as expected to be found in the parsed trace header
576        metadata_keys = ["version", "cpus"]
577
578        for key in metadata_keys:
579            setattr(self, "_" + key, None)
580
581        with open(self.trace_path) as fin:
582            for line in fin:
583                if not metadata_keys:
584                    return
585
586                metadata_pattern = r"^\b(" + "|".join(metadata_keys) + \
587                                   r")\b\s*=\s*([0-9]+)"
588                match = re.search(metadata_pattern, line)
589                if match:
590                    setattr(self, "_" + match.group(1), match.group(2))
591                    metadata_keys.remove(match.group(1))
592
593                if SPECIAL_FIELDS_RE.match(line):
594                    # Reached a valid trace line, abort metadata population
595                    return
596