ftrace.py revision 658d4cc50a81cc0234bd4cdb3fa87f38782223b1
1# Copyright 2015-2017 ARM Limited 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16 17# pylint can't see any of the dynamically allocated classes of FTrace 18# pylint: disable=no-member 19 20import itertools 21import os 22import re 23import pandas as pd 24 25from trappy.bare_trace import BareTrace 26from trappy.utils import listify 27 28class FTraceParseError(Exception): 29 pass 30 31def _plot_freq_hists(allfreqs, what, axis, title): 32 """Helper function for plot_freq_hists 33 34 allfreqs is the output of a Cpu*Power().get_all_freqs() (for 35 example, CpuInPower.get_all_freqs()). what is a string: "in" or 36 "out" 37 38 """ 39 import trappy.plot_utils 40 41 for ax, actor in zip(axis, allfreqs): 42 this_title = "freq {} {}".format(what, actor) 43 this_title = trappy.plot_utils.normalize_title(this_title, title) 44 xlim = (0, allfreqs[actor].max()) 45 46 trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20, 47 "Frequency", xlim, "default") 48 49SPECIAL_FIELDS_RE = re.compile( 50 r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\ 51 r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\ 52 r"(?P<timestamp>[0-9]+\.[0-9]+): (\w+:\s+)+(?P<data>.+)" 53) 54 55class GenericFTrace(BareTrace): 56 """Generic class to parse output of FTrace. This class is meant to be 57subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" 58 59 thermal_classes = {} 60 61 sched_classes = {} 62 63 dynamic_classes = {} 64 65 def __init__(self, name="", normalize_time=True, scope="all", 66 events=[], window=(0, None), abs_window=(0, None)): 67 super(GenericFTrace, self).__init__(name) 68 69 if not hasattr(self, "needs_raw_parsing"): 70 self.needs_raw_parsing = False 71 72 self.class_definitions.update(self.dynamic_classes.items()) 73 self.__add_events(listify(events)) 74 75 if scope == "thermal": 76 self.class_definitions.update(self.thermal_classes.items()) 77 elif scope == "sched": 78 self.class_definitions.update(self.sched_classes.items()) 79 elif scope != "custom": 80 self.class_definitions.update(self.thermal_classes.items() + 81 self.sched_classes.items()) 82 83 for attr, class_def in self.class_definitions.iteritems(): 84 trace_class = class_def() 85 setattr(self, attr, trace_class) 86 self.trace_classes.append(trace_class) 87 88 self.__parse_trace_file(self.trace_path, window, abs_window) 89 if self.needs_raw_parsing and (self.trace_path_raw is not None): 90 self.__parse_trace_file(self.trace_path_raw, window, abs_window, 91 raw=True) 92 self.finalize_objects() 93 94 if normalize_time: 95 self.normalize_time() 96 97 @classmethod 98 def register_parser(cls, cobject, scope): 99 """Register the class as an Event. This function 100 can be used to register a class which is associated 101 with an FTrace unique word. 102 103 .. seealso:: 104 105 :mod:`trappy.dynamic.register_dynamic_ftrace` :mod:`trappy.dynamic.register_ftrace_parser` 106 107 """ 108 109 if not hasattr(cobject, "name"): 110 cobject.name = cobject.unique_word.split(":")[0] 111 112 # Add the class to the classes dictionary 113 if scope == "all": 114 cls.dynamic_classes[cobject.name] = cobject 115 else: 116 getattr(cls, scope + "_classes")[cobject.name] = cobject 117 118 @classmethod 119 def unregister_parser(cls, cobject): 120 """Unregister a parser 121 122 This is the opposite of FTrace.register_parser(), it removes a class 123 from the list of classes that will be parsed on the trace 124 125 """ 126 127 # TODO: scopes should not be hardcoded (nor here nor in the FTrace object) 128 all_scopes = [cls.thermal_classes, cls.sched_classes, 129 cls.dynamic_classes] 130 known_events = ((n, c, sc) for sc in all_scopes for n, c in sc.items()) 131 132 for name, obj, scope_classes in known_events: 133 if cobject == obj: 134 del scope_classes[name] 135 136 def __add_events(self, events): 137 """Add events to the class_definitions 138 139 If the events are known to trappy just add that class to the 140 class definitions list. Otherwise, register a class to parse 141 that event 142 143 """ 144 145 from trappy.dynamic import DynamicTypeFactory, default_init 146 from trappy.base import Base 147 148 # TODO: scopes should not be hardcoded (nor here nor in the FTrace object) 149 all_scopes = [self.thermal_classes, self.sched_classes, 150 self.dynamic_classes] 151 known_events = {k: v for sc in all_scopes for k, v in sc.iteritems()} 152 153 for event_name in events: 154 for cls in known_events.itervalues(): 155 if (event_name == cls.unique_word) or \ 156 (event_name + ":" == cls.unique_word): 157 self.class_definitions[event_name] = cls 158 break 159 else: 160 kwords = { 161 "__init__": default_init, 162 "unique_word": event_name + ":", 163 "name": event_name, 164 } 165 trace_class = DynamicTypeFactory(event_name, (Base,), kwords) 166 self.class_definitions[event_name] = trace_class 167 168 def __populate_data(self, fin, cls_for_unique_word, window, abs_window): 169 """Append to trace data from a txt trace""" 170 171 def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): 172 for unique_word in unique_words: 173 if unique_word in line: 174 return True 175 return False 176 177 actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin) 178 actual_trace = itertools.takewhile(self.trace_hasnt_finished(), 179 actual_trace) 180 181 for line in actual_trace: 182 if not contains_unique_word(line): 183 self.lines += 1 184 continue 185 for unique_word, cls in cls_for_unique_word.iteritems(): 186 if unique_word in line: 187 trace_class = cls 188 if not cls.fallback: 189 break 190 else: 191 if not trace_class: 192 raise FTraceParseError("No unique word in '{}'".format(line)) 193 194 line = line[:-1] 195 196 fields_match = SPECIAL_FIELDS_RE.match(line) 197 if not fields_match: 198 raise FTraceParseError("Couldn't match fields in '{}'".format(line)) 199 comm = fields_match.group('comm') 200 pid = int(fields_match.group('pid')) 201 cpu = int(fields_match.group('cpu')) 202 timestamp = float(fields_match.group('timestamp')) 203 data_str = fields_match.group('data') 204 205 if not self.basetime: 206 self.basetime = timestamp 207 208 if (timestamp < window[0] + self.basetime) or \ 209 (timestamp < abs_window[0]): 210 self.lines += 1 211 continue 212 213 if (window[1] and timestamp > window[1] + self.basetime) or \ 214 (abs_window[1] and timestamp > abs_window[1]): 215 return 216 217 # Remove empty arrays from the trace 218 data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str) 219 220 trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str) 221 self.lines += 1 222 223 def trace_hasnt_started(self): 224 """Return a function that accepts a line and returns true if this line 225is not part of the trace. 226 227 Subclasses of GenericFTrace may override this to skip the 228 beginning of a file that is not part of the trace. The first 229 time the returned function returns False it will be considered 230 the beginning of the trace and this function will never be 231 called again (because once it returns False, the trace has 232 started). 233 234 """ 235 return lambda line: not SPECIAL_FIELDS_RE.match(line) 236 237 def trace_hasnt_finished(self): 238 """Return a function that accepts a line and returns true if this line 239is part of the trace. 240 241 This function is called with each line of the file *after* 242 trace_hasnt_started() returns True so the first line it sees 243 is part of the trace. The returned function should return 244 True as long as the line it receives is part of the trace. As 245 soon as this function returns False, the rest of the file will 246 be dropped. Subclasses of GenericFTrace may override this to 247 stop processing after the end of the trace is found to skip 248 parsing the end of the file if it contains anything other than 249 trace. 250 251 """ 252 return lambda x: True 253 254 def __parse_trace_file(self, trace_file, window, abs_window, raw=False): 255 """parse the trace and create a pandas DataFrame""" 256 257 # Memoize the unique words to speed up parsing the trace file 258 cls_for_unique_word = {} 259 for trace_name in self.class_definitions.iterkeys(): 260 trace_class = getattr(self, trace_name) 261 262 if self.needs_raw_parsing and (trace_class.parse_raw != raw): 263 continue 264 265 unique_word = trace_class.unique_word 266 cls_for_unique_word[unique_word] = trace_class 267 268 if len(cls_for_unique_word) == 0: 269 return 270 271 try: 272 with open(trace_file) as fin: 273 self.lines = 0 274 self.__populate_data( 275 fin, cls_for_unique_word, window, abs_window) 276 except FTraceParseError as e: 277 raise ValueError('Failed to parse ftrace file {}:\n{}'.format( 278 trace_file, str(e))) 279 280 # TODO: Move thermal specific functionality 281 282 def get_all_freqs_data(self, map_label): 283 """get an array of tuple of names and DataFrames suitable for the 284 allfreqs plot""" 285 286 cpu_in_freqs = self.cpu_in_power.get_all_freqs(map_label) 287 cpu_out_freqs = self.cpu_out_power.get_all_freqs(map_label) 288 289 ret = [] 290 for label in map_label.values(): 291 in_label = label + "_freq_in" 292 out_label = label + "_freq_out" 293 294 cpu_inout_freq_dict = {in_label: cpu_in_freqs[label], 295 out_label: cpu_out_freqs[label]} 296 dfr = pd.DataFrame(cpu_inout_freq_dict).fillna(method="pad") 297 ret.append((label, dfr)) 298 299 try: 300 gpu_freq_in_data = self.devfreq_in_power.get_all_freqs() 301 gpu_freq_out_data = self.devfreq_out_power.get_all_freqs() 302 except KeyError: 303 gpu_freq_in_data = gpu_freq_out_data = None 304 305 if gpu_freq_in_data is not None: 306 inout_freq_dict = {"gpu_freq_in": gpu_freq_in_data["freq"], 307 "gpu_freq_out": gpu_freq_out_data["freq"] 308 } 309 dfr = pd.DataFrame(inout_freq_dict).fillna(method="pad") 310 ret.append(("GPU", dfr)) 311 312 return ret 313 314 def plot_freq_hists(self, map_label, ax): 315 """Plot histograms for each actor input and output frequency 316 317 ax is an array of axis, one for the input power and one for 318 the output power 319 320 """ 321 322 in_base_idx = len(ax) / 2 323 324 try: 325 devfreq_out_all_freqs = self.devfreq_out_power.get_all_freqs() 326 devfreq_in_all_freqs = self.devfreq_in_power.get_all_freqs() 327 except KeyError: 328 devfreq_out_all_freqs = None 329 devfreq_in_all_freqs = None 330 331 out_allfreqs = (self.cpu_out_power.get_all_freqs(map_label), 332 devfreq_out_all_freqs, ax[0:in_base_idx]) 333 in_allfreqs = (self.cpu_in_power.get_all_freqs(map_label), 334 devfreq_in_all_freqs, ax[in_base_idx:]) 335 336 for cpu_allfreqs, devfreq_freqs, axis in (out_allfreqs, in_allfreqs): 337 if devfreq_freqs is not None: 338 devfreq_freqs.name = "GPU" 339 allfreqs = pd.concat([cpu_allfreqs, devfreq_freqs], axis=1) 340 else: 341 allfreqs = cpu_allfreqs 342 343 allfreqs.fillna(method="pad", inplace=True) 344 _plot_freq_hists(allfreqs, "out", axis, self.name) 345 346 def plot_load(self, mapping_label, title="", width=None, height=None, 347 ax=None): 348 """plot the load of all the clusters, similar to how compare runs did it 349 350 the mapping_label has to be a dict whose keys are the cluster 351 numbers as found in the trace and values are the names that 352 will appear in the legend. 353 354 """ 355 import trappy.plot_utils 356 357 load_data = self.cpu_in_power.get_load_data(mapping_label) 358 try: 359 gpu_data = pd.DataFrame({"GPU": 360 self.devfreq_in_power.data_frame["load"]}) 361 load_data = pd.concat([load_data, gpu_data], axis=1) 362 except KeyError: 363 pass 364 365 load_data = load_data.fillna(method="pad") 366 title = trappy.plot_utils.normalize_title("Utilization", title) 367 368 if not ax: 369 ax = trappy.plot_utils.pre_plot_setup(width=width, height=height) 370 371 load_data.plot(ax=ax) 372 373 trappy.plot_utils.post_plot_setup(ax, title=title) 374 375 def plot_normalized_load(self, mapping_label, title="", width=None, 376 height=None, ax=None): 377 """plot the normalized load of all the clusters, similar to how compare runs did it 378 379 the mapping_label has to be a dict whose keys are the cluster 380 numbers as found in the trace and values are the names that 381 will appear in the legend. 382 383 """ 384 import trappy.plot_utils 385 386 load_data = self.cpu_in_power.get_normalized_load_data(mapping_label) 387 if "load" in self.devfreq_in_power.data_frame: 388 gpu_dfr = self.devfreq_in_power.data_frame 389 gpu_max_freq = max(gpu_dfr["freq"]) 390 gpu_load = gpu_dfr["load"] * gpu_dfr["freq"] / gpu_max_freq 391 392 gpu_data = pd.DataFrame({"GPU": gpu_load}) 393 load_data = pd.concat([load_data, gpu_data], axis=1) 394 395 load_data = load_data.fillna(method="pad") 396 title = trappy.plot_utils.normalize_title("Normalized Utilization", title) 397 398 if not ax: 399 ax = trappy.plot_utils.pre_plot_setup(width=width, height=height) 400 401 load_data.plot(ax=ax) 402 403 trappy.plot_utils.post_plot_setup(ax, title=title) 404 405 def plot_allfreqs(self, map_label, width=None, height=None, ax=None): 406 """Do allfreqs plots similar to those of CompareRuns 407 408 if ax is not none, it must be an array of the same size as 409 map_label. Each plot will be done in each of the axis in 410 ax 411 412 """ 413 import trappy.plot_utils 414 415 all_freqs = self.get_all_freqs_data(map_label) 416 417 setup_plot = False 418 if ax is None: 419 ax = [None] * len(all_freqs) 420 setup_plot = True 421 422 for this_ax, (label, dfr) in zip(ax, all_freqs): 423 this_title = trappy.plot_utils.normalize_title("allfreqs " + label, 424 self.name) 425 426 if setup_plot: 427 this_ax = trappy.plot_utils.pre_plot_setup(width=width, 428 height=height) 429 430 dfr.plot(ax=this_ax) 431 trappy.plot_utils.post_plot_setup(this_ax, title=this_title) 432 433class FTrace(GenericFTrace): 434 """A wrapper class that initializes all the classes of a given run 435 436 - The FTrace class can receive the following optional parameters. 437 438 :param path: Path contains the path to the trace file. If no path is given, it 439 uses the current directory by default. If path is a file, and ends in 440 .dat, it's run through "trace-cmd report". If it doesn't end in 441 ".dat", then it must be the output of a trace-cmd report run. If path 442 is a directory that contains a trace.txt, that is assumed to be the 443 output of "trace-cmd report". If path is a directory that doesn't 444 have a trace.txt but has a trace.dat, it runs trace-cmd report on the 445 trace.dat, saves it in trace.txt and then uses that. 446 447 :param name: is a string describing the trace. 448 449 :param normalize_time: is used to make all traces start from time 0 (the 450 default). If normalize_time is False, the trace times are the same as 451 in the trace file. 452 453 :param scope: can be used to limit the parsing done on the trace. The default 454 scope parses all the traces known to trappy. If scope is thermal, only 455 the thermal classes are parsed. If scope is sched, only the sched 456 classes are parsed. 457 458 :param events: A list of strings containing the name of the trace 459 events that you want to include in this FTrace object. The 460 string must correspond to the event name (what you would pass 461 to "trace-cmd -e", i.e. 4th field in trace.txt) 462 463 :param window: a tuple indicating a time window. The first 464 element in the tuple is the start timestamp and the second one 465 the end timestamp. Timestamps are relative to the first trace 466 event that's parsed. If you want to trace until the end of 467 the trace, set the second element to None. If you want to use 468 timestamps extracted from the trace file use "abs_window". The 469 window is inclusive: trace events exactly matching the start 470 or end timestamps will be included. 471 472 :param abs_window: a tuple indicating an absolute time window. 473 This parameter is similar to the "window" one but its values 474 represent timestamps that are not normalized, (i.e. the ones 475 you find in the trace file). The window is inclusive. 476 477 478 :type path: str 479 :type name: str 480 :type normalize_time: bool 481 :type scope: str 482 :type events: list 483 :type window: tuple 484 :type abs_window: tuple 485 486 This is a simple example: 487 :: 488 489 import trappy 490 trappy.FTrace("trace_dir") 491 492 """ 493 494 def __init__(self, path=".", name="", normalize_time=True, scope="all", 495 events=[], window=(0, None), abs_window=(0, None)): 496 self.trace_path, self.trace_path_raw = self.__process_path(path) 497 self.needs_raw_parsing = True 498 499 self.__populate_metadata() 500 501 super(FTrace, self).__init__(name, normalize_time, scope, events, 502 window, abs_window) 503 504 def __process_path(self, basepath): 505 """Process the path and return the path to the trace text file""" 506 507 if os.path.isfile(basepath): 508 trace_name = os.path.splitext(basepath)[0] 509 else: 510 trace_name = os.path.join(basepath, "trace") 511 512 trace_txt = trace_name + ".txt" 513 trace_raw = trace_name + ".raw.txt" 514 trace_dat = trace_name + ".dat" 515 516 if os.path.isfile(trace_dat): 517 # Both TXT and RAW traces must always be generated 518 if not os.path.isfile(trace_txt) or \ 519 not os.path.isfile(trace_raw): 520 self.__run_trace_cmd_report(trace_dat) 521 # TXT (and RAW) traces must match the most recent binary trace 522 elif os.path.getmtime(trace_txt) < os.path.getmtime(trace_dat): 523 self.__run_trace_cmd_report(trace_dat) 524 525 if not os.path.isfile(trace_raw): 526 trace_raw = None 527 528 return trace_txt, trace_raw 529 530 def __run_trace_cmd_report(self, fname): 531 """Run "trace-cmd report fname > fname.txt" 532 and "trace-cmd report -R fname > fname.raw.txt" 533 534 The resulting traces are stored in files with extension ".txt" 535 and ".raw.txt" respectively. If fname is "my_trace.dat", the 536 trace is stored in "my_trace.txt" and "my_trace.raw.txt". The 537 contents of the destination files are overwritten if they 538 exist. 539 540 """ 541 from subprocess import check_output 542 543 cmd = ["trace-cmd", "report"] 544 545 if not os.path.isfile(fname): 546 raise IOError("No such file or directory: {}".format(fname)) 547 548 raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt" 549 trace_output = os.path.splitext(fname)[0] + ".txt" 550 cmd.append(fname) 551 552 with open(os.devnull) as devnull: 553 try: 554 out = check_output(cmd, stderr=devnull) 555 except OSError as exc: 556 if exc.errno == 2 and not exc.filename: 557 raise OSError(2, "trace-cmd not found in PATH, is it installed?") 558 else: 559 raise 560 561 # Add the -R flag to the trace-cmd 562 # for raw parsing 563 cmd.insert(-1, "-R") 564 raw_out = check_output(cmd, stderr=devnull) 565 566 with open(trace_output, "w") as fout: 567 fout.write(out) 568 569 with open(raw_trace_output, "w") as fout: 570 fout.write(raw_out) 571 572 def __populate_metadata(self): 573 """Populates trace metadata""" 574 575 # Meta Data as expected to be found in the parsed trace header 576 metadata_keys = ["version", "cpus"] 577 578 for key in metadata_keys: 579 setattr(self, "_" + key, None) 580 581 with open(self.trace_path) as fin: 582 for line in fin: 583 if not metadata_keys: 584 return 585 586 metadata_pattern = r"^\b(" + "|".join(metadata_keys) + \ 587 r")\b\s*=\s*([0-9]+)" 588 match = re.search(metadata_pattern, line) 589 if match: 590 setattr(self, "_" + match.group(1), match.group(2)) 591 metadata_keys.remove(match.group(1)) 592 593 if SPECIAL_FIELDS_RE.match(line): 594 # Reached a valid trace line, abort metadata population 595 return 596