1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import bisect
31import collections
32import ctypes
33import disasm
34import mmap
35import optparse
36import os
37import re
38import subprocess
39import sys
40import time
41
42
43USAGE="""usage: %prog [OPTION]...
44
45Analyses V8 and perf logs to produce profiles.
46
47Perf logs can be collected using a command like:
48  $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
49  # -R: collect all data
50  # -e cycles: use cpu-cycles event (run "perf list" for details)
51  # -c 10000: write a sample after each 10000 events
52  # -f: force output file overwrite
53  # -i: limit profiling to our process and the kernel
54  # --ll-prof shell flag enables the right V8 logs
55This will produce a binary trace file (perf.data) that %prog can analyse.
56
57IMPORTANT:
58  The kernel has an internal maximum for events per second, it is 100K by
59  default. That's not enough for "-c 10000". Set it to some higher value:
60  $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61  You can also make the warning about kernel address maps go away:
62  $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
63
64We have a convenience script that handles all of the above for you:
65  $ tools/run-llprof.sh ./d8 bench.js
66
67Examples:
68  # Print flat profile with annotated disassembly for the 10 top
69  # symbols. Use default log names and include the snapshot log.
70  $ %prog --snapshot --disasm-top=10
71
72  # Print flat profile with annotated disassembly for all used symbols.
73  # Use default log names and include kernel symbols into analysis.
74  $ %prog --disasm-all --kernel
75
76  # Print flat profile. Use custom log names.
77  $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
78"""
79
80
81JS_ORIGIN = "js"
82JS_SNAPSHOT_ORIGIN = "js-snapshot"
83
84class Code(object):
85  """Code object."""
86
87  _id = 0
88  UNKNOWN = 0
89  V8INTERNAL = 1
90  FULL_CODEGEN = 2
91  OPTIMIZED = 3
92
93  def __init__(self, name, start_address, end_address, origin, origin_offset):
94    self.id = Code._id
95    Code._id += 1
96    self.name = name
97    self.other_names = None
98    self.start_address = start_address
99    self.end_address = end_address
100    self.origin = origin
101    self.origin_offset = origin_offset
102    self.self_ticks = 0
103    self.self_ticks_map = None
104    self.callee_ticks = None
105    if name.startswith("LazyCompile:*"):
106      self.codetype = Code.OPTIMIZED
107    elif name.startswith("LazyCompile:"):
108      self.codetype = Code.FULL_CODEGEN
109    elif name.startswith("v8::internal::"):
110      self.codetype = Code.V8INTERNAL
111    else:
112      self.codetype = Code.UNKNOWN
113
114  def AddName(self, name):
115    assert self.name != name
116    if self.other_names is None:
117      self.other_names = [name]
118      return
119    if not name in self.other_names:
120      self.other_names.append(name)
121
122  def FullName(self):
123    if self.other_names is None:
124      return self.name
125    self.other_names.sort()
126    return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
127
128  def IsUsed(self):
129    return self.self_ticks > 0 or self.callee_ticks is not None
130
131  def Tick(self, pc):
132    self.self_ticks += 1
133    if self.self_ticks_map is None:
134      self.self_ticks_map = collections.defaultdict(lambda: 0)
135    offset = pc - self.start_address
136    self.self_ticks_map[offset] += 1
137
138  def CalleeTick(self, callee):
139    if self.callee_ticks is None:
140      self.callee_ticks = collections.defaultdict(lambda: 0)
141    self.callee_ticks[callee] += 1
142
143  def PrintAnnotated(self, arch, options):
144    if self.self_ticks_map is None:
145      ticks_map = []
146    else:
147      ticks_map = self.self_ticks_map.items()
148    # Convert the ticks map to offsets and counts arrays so that later
149    # we can do binary search in the offsets array.
150    ticks_map.sort(key=lambda t: t[0])
151    ticks_offsets = [t[0] for t in ticks_map]
152    ticks_counts = [t[1] for t in ticks_map]
153    # Get a list of disassembled lines and their addresses.
154    lines = self._GetDisasmLines(arch, options)
155    if len(lines) == 0:
156      return
157    # Print annotated lines.
158    address = lines[0][0]
159    total_count = 0
160    for i in xrange(len(lines)):
161      start_offset = lines[i][0] - address
162      if i == len(lines) - 1:
163        end_offset = self.end_address - self.start_address
164      else:
165        end_offset = lines[i + 1][0] - address
166      # Ticks (reported pc values) are not always precise, i.e. not
167      # necessarily point at instruction starts. So we have to search
168      # for ticks that touch the current instruction line.
169      j = bisect.bisect_left(ticks_offsets, end_offset)
170      count = 0
171      for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
172        if offset < start_offset:
173          break
174        count += cnt
175      total_count += count
176      count = 100.0 * count / self.self_ticks
177      if count >= 0.01:
178        print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
179      else:
180        print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
181    print
182    assert total_count == self.self_ticks, \
183        "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
184
185  def __str__(self):
186    return "%s [0x%x, 0x%x) size: %d origin: %s" % (
187      self.name,
188      self.start_address,
189      self.end_address,
190      self.end_address - self.start_address,
191      self.origin)
192
193  def _GetDisasmLines(self, arch, options):
194    if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
195      inplace = False
196      filename = options.log + ".ll"
197    else:
198      inplace = True
199      filename = self.origin
200    return disasm.GetDisasmLines(filename,
201                                 self.origin_offset,
202                                 self.end_address - self.start_address,
203                                 arch,
204                                 inplace)
205
206
207class CodePage(object):
208  """Group of adjacent code objects."""
209
210  SHIFT = 20  # 1M pages
211  SIZE = (1 << SHIFT)
212  MASK = ~(SIZE - 1)
213
214  @staticmethod
215  def PageAddress(address):
216    return address & CodePage.MASK
217
218  @staticmethod
219  def PageId(address):
220    return address >> CodePage.SHIFT
221
222  @staticmethod
223  def PageAddressFromId(id):
224    return id << CodePage.SHIFT
225
226  def __init__(self, address):
227    self.address = address
228    self.code_objects = []
229
230  def Add(self, code):
231    self.code_objects.append(code)
232
233  def Remove(self, code):
234    self.code_objects.remove(code)
235
236  def Find(self, pc):
237    code_objects = self.code_objects
238    for i, code in enumerate(code_objects):
239      if code.start_address <= pc < code.end_address:
240        code_objects[0], code_objects[i] = code, code_objects[0]
241        return code
242    return None
243
244  def __iter__(self):
245    return self.code_objects.__iter__()
246
247
248class CodeMap(object):
249  """Code object map."""
250
251  def __init__(self):
252    self.pages = {}
253    self.min_address = 1 << 64
254    self.max_address = -1
255
256  def Add(self, code, max_pages=-1):
257    page_id = CodePage.PageId(code.start_address)
258    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
259    pages = 0
260    while page_id < limit_id:
261      if max_pages >= 0 and pages > max_pages:
262        print >>sys.stderr, \
263            "Warning: page limit (%d) reached for %s [%s]" % (
264            max_pages, code.name, code.origin)
265        break
266      if page_id in self.pages:
267        page = self.pages[page_id]
268      else:
269        page = CodePage(CodePage.PageAddressFromId(page_id))
270        self.pages[page_id] = page
271      page.Add(code)
272      page_id += 1
273      pages += 1
274    self.min_address = min(self.min_address, code.start_address)
275    self.max_address = max(self.max_address, code.end_address)
276
277  def Remove(self, code):
278    page_id = CodePage.PageId(code.start_address)
279    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
280    removed = False
281    while page_id < limit_id:
282      if page_id not in self.pages:
283        page_id += 1
284        continue
285      page = self.pages[page_id]
286      page.Remove(code)
287      removed = True
288      page_id += 1
289    return removed
290
291  def AllCode(self):
292    for page in self.pages.itervalues():
293      for code in page:
294        if CodePage.PageAddress(code.start_address) == page.address:
295          yield code
296
297  def UsedCode(self):
298    for code in self.AllCode():
299      if code.IsUsed():
300        yield code
301
302  def Print(self):
303    for code in self.AllCode():
304      print code
305
306  def Find(self, pc):
307    if pc < self.min_address or pc >= self.max_address:
308      return None
309    page_id = CodePage.PageId(pc)
310    if page_id not in self.pages:
311      return None
312    return self.pages[page_id].Find(pc)
313
314
315class CodeInfo(object):
316  """Generic info about generated code objects."""
317
318  def __init__(self, arch, header_size):
319    self.arch = arch
320    self.header_size = header_size
321
322
323class SnapshotLogReader(object):
324  """V8 snapshot log reader."""
325
326  _SNAPSHOT_CODE_NAME_RE = re.compile(
327    r"snapshot-code-name,(\d+),\"(.*)\"")
328
329  def __init__(self, log_name):
330    self.log_name = log_name
331
332  def ReadNameMap(self):
333    log = open(self.log_name, "r")
334    try:
335      snapshot_pos_to_name = {}
336      for line in log:
337        match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
338        if match:
339          pos = int(match.group(1))
340          name = match.group(2)
341          snapshot_pos_to_name[pos] = name
342    finally:
343      log.close()
344    return snapshot_pos_to_name
345
346
347class LogReader(object):
348  """V8 low-level (binary) log reader."""
349
350  _ARCH_TO_POINTER_TYPE_MAP = {
351    "ia32": ctypes.c_uint32,
352    "arm": ctypes.c_uint32,
353    "mips": ctypes.c_uint32,
354    "x64": ctypes.c_uint64,
355    "arm64": ctypes.c_uint64
356  }
357
358  _CODE_CREATE_TAG = "C"
359  _CODE_MOVE_TAG = "M"
360  _CODE_DELETE_TAG = "D"
361  _SNAPSHOT_POSITION_TAG = "P"
362  _CODE_MOVING_GC_TAG = "G"
363
364  def __init__(self, log_name, code_map, snapshot_pos_to_name):
365    self.log_file = open(log_name, "r")
366    self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
367    self.log_pos = 0
368    self.code_map = code_map
369    self.snapshot_pos_to_name = snapshot_pos_to_name
370    self.address_to_snapshot_name = {}
371
372    self.arch = self.log[:self.log.find("\0")]
373    self.log_pos += len(self.arch) + 1
374    assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
375        "Unsupported architecture %s" % self.arch
376    pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
377
378    self.code_create_struct = LogReader._DefineStruct([
379        ("name_size", ctypes.c_int32),
380        ("code_address", pointer_type),
381        ("code_size", ctypes.c_int32)])
382
383    self.code_move_struct = LogReader._DefineStruct([
384        ("from_address", pointer_type),
385        ("to_address", pointer_type)])
386
387    self.code_delete_struct = LogReader._DefineStruct([
388        ("address", pointer_type)])
389
390    self.snapshot_position_struct = LogReader._DefineStruct([
391        ("address", pointer_type),
392        ("position", ctypes.c_int32)])
393
394  def ReadUpToGC(self):
395    while self.log_pos < self.log.size():
396      tag = self.log[self.log_pos]
397      self.log_pos += 1
398
399      if tag == LogReader._CODE_MOVING_GC_TAG:
400        self.address_to_snapshot_name.clear()
401        return
402
403      if tag == LogReader._CODE_CREATE_TAG:
404        event = self.code_create_struct.from_buffer(self.log, self.log_pos)
405        self.log_pos += ctypes.sizeof(event)
406        start_address = event.code_address
407        end_address = start_address + event.code_size
408        if start_address in self.address_to_snapshot_name:
409          name = self.address_to_snapshot_name[start_address]
410          origin = JS_SNAPSHOT_ORIGIN
411        else:
412          name = self.log[self.log_pos:self.log_pos + event.name_size]
413          origin = JS_ORIGIN
414        self.log_pos += event.name_size
415        origin_offset = self.log_pos
416        self.log_pos += event.code_size
417        code = Code(name, start_address, end_address, origin, origin_offset)
418        conficting_code = self.code_map.Find(start_address)
419        if conficting_code:
420          if not (conficting_code.start_address == code.start_address and
421            conficting_code.end_address == code.end_address):
422            self.code_map.Remove(conficting_code)
423          else:
424            LogReader._HandleCodeConflict(conficting_code, code)
425            # TODO(vitalyr): this warning is too noisy because of our
426            # attempts to reconstruct code log from the snapshot.
427            # print >>sys.stderr, \
428            #     "Warning: Skipping duplicate code log entry %s" % code
429            continue
430        self.code_map.Add(code)
431        continue
432
433      if tag == LogReader._CODE_MOVE_TAG:
434        event = self.code_move_struct.from_buffer(self.log, self.log_pos)
435        self.log_pos += ctypes.sizeof(event)
436        old_start_address = event.from_address
437        new_start_address = event.to_address
438        if old_start_address == new_start_address:
439          # Skip useless code move entries.
440          continue
441        code = self.code_map.Find(old_start_address)
442        if not code:
443          print >>sys.stderr, "Warning: Not found %x" % old_start_address
444          continue
445        assert code.start_address == old_start_address, \
446            "Inexact move address %x for %s" % (old_start_address, code)
447        self.code_map.Remove(code)
448        size = code.end_address - code.start_address
449        code.start_address = new_start_address
450        code.end_address = new_start_address + size
451        self.code_map.Add(code)
452        continue
453
454      if tag == LogReader._CODE_DELETE_TAG:
455        event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
456        self.log_pos += ctypes.sizeof(event)
457        old_start_address = event.address
458        code = self.code_map.Find(old_start_address)
459        if not code:
460          print >>sys.stderr, "Warning: Not found %x" % old_start_address
461          continue
462        assert code.start_address == old_start_address, \
463            "Inexact delete address %x for %s" % (old_start_address, code)
464        self.code_map.Remove(code)
465        continue
466
467      if tag == LogReader._SNAPSHOT_POSITION_TAG:
468        event = self.snapshot_position_struct.from_buffer(self.log,
469                                                          self.log_pos)
470        self.log_pos += ctypes.sizeof(event)
471        start_address = event.address
472        snapshot_pos = event.position
473        if snapshot_pos in self.snapshot_pos_to_name:
474          self.address_to_snapshot_name[start_address] = \
475              self.snapshot_pos_to_name[snapshot_pos]
476        continue
477
478      assert False, "Unknown tag %s" % tag
479
480  def Dispose(self):
481    self.log.close()
482    self.log_file.close()
483
484  @staticmethod
485  def _DefineStruct(fields):
486    class Struct(ctypes.Structure):
487      _fields_ = fields
488    return Struct
489
490  @staticmethod
491  def _HandleCodeConflict(old_code, new_code):
492    assert (old_code.start_address == new_code.start_address and
493            old_code.end_address == new_code.end_address), \
494        "Conficting code log entries %s and %s" % (old_code, new_code)
495    if old_code.name == new_code.name:
496      return
497    # Code object may be shared by a few functions. Collect the full
498    # set of names.
499    old_code.AddName(new_code.name)
500
501
502class Descriptor(object):
503  """Descriptor of a structure in the binary trace log."""
504
505  CTYPE_MAP = {
506    "u16": ctypes.c_uint16,
507    "u32": ctypes.c_uint32,
508    "u64": ctypes.c_uint64
509  }
510
511  def __init__(self, fields):
512    class TraceItem(ctypes.Structure):
513      _fields_ = Descriptor.CtypesFields(fields)
514
515      def __str__(self):
516        return ", ".join("%s: %s" % (field, self.__getattribute__(field))
517                         for field, _ in TraceItem._fields_)
518
519    self.ctype = TraceItem
520
521  def Read(self, trace, offset):
522    return self.ctype.from_buffer(trace, offset)
523
524  @staticmethod
525  def CtypesFields(fields):
526    return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
527
528
529# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
530# for the gory details.
531
532
533# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
534TRACE_HEADER_DESC = Descriptor([
535  ("magic", "u64"),
536  ("size", "u64"),
537  ("attr_size", "u64"),
538  ("attrs_offset", "u64"),
539  ("attrs_size", "u64"),
540  ("data_offset", "u64"),
541  ("data_size", "u64"),
542  ("event_types_offset", "u64"),
543  ("event_types_size", "u64")
544])
545
546
547# Reference: /usr/include/linux/perf_event.h
548PERF_EVENT_ATTR_DESC = Descriptor([
549  ("type", "u32"),
550  ("size", "u32"),
551  ("config", "u64"),
552  ("sample_period_or_freq", "u64"),
553  ("sample_type", "u64"),
554  ("read_format", "u64"),
555  ("flags", "u64"),
556  ("wakeup_events_or_watermark", "u32"),
557  ("bp_type", "u32"),
558  ("bp_addr", "u64"),
559  ("bp_len", "u64")
560])
561
562
563# Reference: /usr/include/linux/perf_event.h
564PERF_EVENT_HEADER_DESC = Descriptor([
565  ("type", "u32"),
566  ("misc", "u16"),
567  ("size", "u16")
568])
569
570
571# Reference: kernel/tools/perf/util/event.h
572PERF_MMAP_EVENT_BODY_DESC = Descriptor([
573  ("pid", "u32"),
574  ("tid", "u32"),
575  ("addr", "u64"),
576  ("len", "u64"),
577  ("pgoff", "u64")
578])
579
580# Reference: kernel/tools/perf/util/event.h
581PERF_MMAP2_EVENT_BODY_DESC = Descriptor([
582  ("pid", "u32"),
583  ("tid", "u32"),
584  ("addr", "u64"),
585  ("len", "u64"),
586  ("pgoff", "u64"),
587  ("maj", "u32"),
588  ("min", "u32"),
589  ("ino", "u64"),
590  ("ino_generation", "u64"),
591  ("prot", "u32"),
592  ("flags","u32")
593])
594
595# perf_event_attr.sample_type bits control the set of
596# perf_sample_event fields.
597PERF_SAMPLE_IP = 1 << 0
598PERF_SAMPLE_TID = 1 << 1
599PERF_SAMPLE_TIME = 1 << 2
600PERF_SAMPLE_ADDR = 1 << 3
601PERF_SAMPLE_READ = 1 << 4
602PERF_SAMPLE_CALLCHAIN = 1 << 5
603PERF_SAMPLE_ID = 1 << 6
604PERF_SAMPLE_CPU = 1 << 7
605PERF_SAMPLE_PERIOD = 1 << 8
606PERF_SAMPLE_STREAM_ID = 1 << 9
607PERF_SAMPLE_RAW = 1 << 10
608
609
610# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
611PERF_SAMPLE_EVENT_BODY_FIELDS = [
612  ("ip", "u64", PERF_SAMPLE_IP),
613  ("pid", "u32", PERF_SAMPLE_TID),
614  ("tid", "u32", PERF_SAMPLE_TID),
615  ("time", "u64", PERF_SAMPLE_TIME),
616  ("addr", "u64", PERF_SAMPLE_ADDR),
617  ("id", "u64", PERF_SAMPLE_ID),
618  ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
619  ("cpu", "u32", PERF_SAMPLE_CPU),
620  ("res", "u32", PERF_SAMPLE_CPU),
621  ("period", "u64", PERF_SAMPLE_PERIOD),
622  # Don't want to handle read format that comes after the period and
623  # before the callchain and has variable size.
624  ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
625  # Raw data follows the callchain and is ignored.
626]
627
628
629PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
630
631
632PERF_RECORD_MMAP = 1
633PERF_RECORD_MMAP2 = 10
634PERF_RECORD_SAMPLE = 9
635
636
637class TraceReader(object):
638  """Perf (linux-2.6/tools/perf) trace file reader."""
639
640  _TRACE_HEADER_MAGIC = 4993446653023372624
641
642  def __init__(self, trace_name):
643    self.trace_file = open(trace_name, "r")
644    self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
645    self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
646    if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
647      print >>sys.stderr, "Warning: unsupported trace header magic"
648    self.offset = self.trace_header.data_offset
649    self.limit = self.trace_header.data_offset + self.trace_header.data_size
650    assert self.limit <= self.trace.size(), \
651        "Trace data limit exceeds trace file size"
652    self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
653    assert self.trace_header.attrs_size != 0, \
654        "No perf event attributes found in the trace"
655    perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
656                                                self.trace_header.attrs_offset)
657    self.sample_event_body_desc = self._SampleEventBodyDesc(
658        perf_event_attr.sample_type)
659    self.callchain_supported = \
660        (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
661    if self.callchain_supported:
662      self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
663      self.ip_size = ctypes.sizeof(self.ip_struct)
664
665  def ReadEventHeader(self):
666    if self.offset >= self.limit:
667      return None, 0
668    offset = self.offset
669    header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
670    self.offset += header.size
671    return header, offset
672
673  def ReadMmap(self, header, offset):
674    mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
675                                               offset + self.header_size)
676    # Read null-terminated filename.
677    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
678                          offset + header.size]
679    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
680    return mmap_info
681
682  def ReadMmap2(self, header, offset):
683    mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace,
684                                                offset + self.header_size)
685    # Read null-terminated filename.
686    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
687                          offset + header.size]
688    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
689    return mmap_info
690
691  def ReadSample(self, header, offset):
692    sample = self.sample_event_body_desc.Read(self.trace,
693                                              offset + self.header_size)
694    if not self.callchain_supported:
695      return sample
696    sample.ips = []
697    offset += self.header_size + ctypes.sizeof(sample)
698    for _ in xrange(sample.nr):
699      sample.ips.append(
700        self.ip_struct.from_buffer(self.trace, offset).value)
701      offset += self.ip_size
702    return sample
703
704  def Dispose(self):
705    self.trace.close()
706    self.trace_file.close()
707
708  def _SampleEventBodyDesc(self, sample_type):
709    assert (sample_type & PERF_SAMPLE_READ) == 0, \
710           "Can't hande read format in samples"
711    fields = [(field, format)
712              for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
713              if (bit & sample_type) != 0]
714    return Descriptor(fields)
715
716
717OBJDUMP_SECTION_HEADER_RE = re.compile(
718  r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
719OBJDUMP_SYMBOL_LINE_RE = re.compile(
720  r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
721OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
722  r"^DYNAMIC SYMBOL TABLE")
723OBJDUMP_SKIP_RE = re.compile(
724  r"^.*ld\.so\.cache$")
725KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
726PERF_KERNEL_ALLSYMS_RE = re.compile(
727  r".*kallsyms.*")
728KERNEL_ALLSYMS_LINE_RE = re.compile(
729  r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
730
731
732class LibraryRepo(object):
733  def __init__(self):
734    self.infos = []
735    self.names = set()
736    self.ticks = {}
737
738
739  def HasDynamicSymbols(self, filename):
740    if filename.endswith(".ko"): return False
741    process = subprocess.Popen(
742      "%s -h %s" % (OBJDUMP_BIN, filename),
743      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
744    pipe = process.stdout
745    try:
746      for line in pipe:
747        match = OBJDUMP_SECTION_HEADER_RE.match(line)
748        if match and match.group(1) == 'dynsym': return True
749    finally:
750      pipe.close()
751    assert process.wait() == 0, "Failed to objdump -h %s" % filename
752    return False
753
754
755  def Load(self, mmap_info, code_map, options):
756    # Skip kernel mmaps when requested using the fact that their tid
757    # is 0.
758    if mmap_info.tid == 0 and not options.kernel:
759      return True
760    if OBJDUMP_SKIP_RE.match(mmap_info.filename):
761      return True
762    if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
763      return self._LoadKernelSymbols(code_map)
764    self.infos.append(mmap_info)
765    mmap_info.ticks = 0
766    mmap_info.unique_name = self._UniqueMmapName(mmap_info)
767    if not os.path.exists(mmap_info.filename):
768      return True
769    # Request section headers (-h), symbols (-t), and dynamic symbols
770    # (-T) from objdump.
771    # Unfortunately, section headers span two lines, so we have to
772    # keep the just seen section name (from the first line in each
773    # section header) in the after_section variable.
774    if self.HasDynamicSymbols(mmap_info.filename):
775      dynamic_symbols = "-T"
776    else:
777      dynamic_symbols = ""
778    process = subprocess.Popen(
779      "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
780      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
781    pipe = process.stdout
782    after_section = None
783    code_sections = set()
784    reloc_sections = set()
785    dynamic = False
786    try:
787      for line in pipe:
788        if after_section:
789          if line.find("CODE") != -1:
790            code_sections.add(after_section)
791          if line.find("RELOC") != -1:
792            reloc_sections.add(after_section)
793          after_section = None
794          continue
795
796        match = OBJDUMP_SECTION_HEADER_RE.match(line)
797        if match:
798          after_section = match.group(1)
799          continue
800
801        if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
802          dynamic = True
803          continue
804
805        match = OBJDUMP_SYMBOL_LINE_RE.match(line)
806        if match:
807          start_address = int(match.group(1), 16)
808          origin_offset = start_address
809          flags = match.group(2)
810          section = match.group(3)
811          if section in code_sections:
812            if dynamic or section in reloc_sections:
813              start_address += mmap_info.addr
814            size = int(match.group(4), 16)
815            name = match.group(5)
816            origin = mmap_info.filename
817            code_map.Add(Code(name, start_address, start_address + size,
818                              origin, origin_offset))
819    finally:
820      pipe.close()
821    assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
822
823  def Tick(self, pc):
824    for i, mmap_info in enumerate(self.infos):
825      if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
826        mmap_info.ticks += 1
827        self.infos[0], self.infos[i] = mmap_info, self.infos[0]
828        return True
829    return False
830
831  def _UniqueMmapName(self, mmap_info):
832    name = mmap_info.filename
833    index = 1
834    while name in self.names:
835      name = "%s-%d" % (mmap_info.filename, index)
836      index += 1
837    self.names.add(name)
838    return name
839
840  def _LoadKernelSymbols(self, code_map):
841    if not os.path.exists(KERNEL_ALLSYMS_FILE):
842      print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
843      return False
844    kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
845    code = None
846    for line in kallsyms:
847      match = KERNEL_ALLSYMS_LINE_RE.match(line)
848      if match:
849        start_address = int(match.group(1), 16)
850        end_address = start_address
851        name = match.group(2)
852        if code:
853          code.end_address = start_address
854          code_map.Add(code, 16)
855        code = Code(name, start_address, end_address, "kernel", 0)
856    return True
857
858
859def PrintReport(code_map, library_repo, arch, ticks, options):
860  print "Ticks per symbol:"
861  used_code = [code for code in code_map.UsedCode()]
862  used_code.sort(key=lambda x: x.self_ticks, reverse=True)
863  for i, code in enumerate(used_code):
864    code_ticks = code.self_ticks
865    print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
866                                    code.FullName(), code.origin)
867    if options.disasm_all or i < options.disasm_top:
868      code.PrintAnnotated(arch, options)
869  print
870  print "Ticks per library:"
871  mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
872  mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
873  for mmap_info in mmap_infos:
874    mmap_ticks = mmap_info.ticks
875    print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
876                               mmap_info.unique_name)
877
878
879def PrintDot(code_map, options):
880  print "digraph G {"
881  for code in code_map.UsedCode():
882    if code.self_ticks < 10:
883      continue
884    print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
885    if code.callee_ticks:
886      for callee, ticks in code.callee_ticks.iteritems():
887        print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
888  print "}"
889
890
891if __name__ == "__main__":
892  parser = optparse.OptionParser(USAGE)
893  parser.add_option("--snapshot-log",
894                    default="obj/release/snapshot.log",
895                    help="V8 snapshot log file name [default: %default]")
896  parser.add_option("--log",
897                    default="v8.log",
898                    help="V8 log file name [default: %default]")
899  parser.add_option("--snapshot",
900                    default=False,
901                    action="store_true",
902                    help="process V8 snapshot log [default: %default]")
903  parser.add_option("--trace",
904                    default="perf.data",
905                    help="perf trace file name [default: %default]")
906  parser.add_option("--kernel",
907                    default=False,
908                    action="store_true",
909                    help="process kernel entries [default: %default]")
910  parser.add_option("--disasm-top",
911                    default=0,
912                    type="int",
913                    help=("number of top symbols to disassemble and annotate "
914                          "[default: %default]"))
915  parser.add_option("--disasm-all",
916                    default=False,
917                    action="store_true",
918                    help=("disassemble and annotate all used symbols "
919                          "[default: %default]"))
920  parser.add_option("--dot",
921                    default=False,
922                    action="store_true",
923                    help="produce dot output (WIP) [default: %default]")
924  parser.add_option("--quiet", "-q",
925                    default=False,
926                    action="store_true",
927                    help="no auxiliary messages [default: %default]")
928  parser.add_option("--gc-fake-mmap",
929                    default="/tmp/__v8_gc__",
930                    help="gc fake mmap file [default: %default]")
931  parser.add_option("--objdump",
932                    default="/usr/bin/objdump",
933                    help="objdump tool to use [default: %default]")
934  parser.add_option("--host-root",
935                    default="",
936                    help="Path to the host root [default: %default]")
937  options, args = parser.parse_args()
938
939  if not options.quiet:
940    if options.snapshot:
941      print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
942                                        options.log,
943                                        options.log)
944    else:
945      print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
946    print "Perf trace file: %s" % options.trace
947
948  V8_GC_FAKE_MMAP = options.gc_fake_mmap
949  HOST_ROOT = options.host_root
950  if os.path.exists(options.objdump):
951    disasm.OBJDUMP_BIN = options.objdump
952    OBJDUMP_BIN = options.objdump
953  else:
954    print "Cannot find %s, falling back to default objdump" % options.objdump
955
956  # Stats.
957  events = 0
958  ticks = 0
959  missed_ticks = 0
960  really_missed_ticks = 0
961  optimized_ticks = 0
962  generated_ticks = 0
963  v8_internal_ticks = 0
964  mmap_time = 0
965  sample_time = 0
966
967  # Process the snapshot log to fill the snapshot name map.
968  snapshot_name_map = {}
969  if options.snapshot:
970    snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
971    snapshot_name_map = snapshot_log_reader.ReadNameMap()
972
973  # Initialize the log reader.
974  code_map = CodeMap()
975  log_reader = LogReader(log_name=options.log + ".ll",
976                         code_map=code_map,
977                         snapshot_pos_to_name=snapshot_name_map)
978  if not options.quiet:
979    print "Generated code architecture: %s" % log_reader.arch
980    print
981    sys.stdout.flush()
982
983  # Process the code and trace logs.
984  library_repo = LibraryRepo()
985  log_reader.ReadUpToGC()
986  trace_reader = TraceReader(options.trace)
987  while True:
988    header, offset = trace_reader.ReadEventHeader()
989    if not header:
990      break
991    events += 1
992    if header.type == PERF_RECORD_MMAP:
993      start = time.time()
994      mmap_info = trace_reader.ReadMmap(header, offset)
995      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
996        log_reader.ReadUpToGC()
997      else:
998        library_repo.Load(mmap_info, code_map, options)
999      mmap_time += time.time() - start
1000    elif header.type == PERF_RECORD_MMAP2:
1001      start = time.time()
1002      mmap_info = trace_reader.ReadMmap2(header, offset)
1003      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
1004        log_reader.ReadUpToGC()
1005      else:
1006        library_repo.Load(mmap_info, code_map, options)
1007      mmap_time += time.time() - start
1008    elif header.type == PERF_RECORD_SAMPLE:
1009      ticks += 1
1010      start = time.time()
1011      sample = trace_reader.ReadSample(header, offset)
1012      code = code_map.Find(sample.ip)
1013      if code:
1014        code.Tick(sample.ip)
1015        if code.codetype == Code.OPTIMIZED:
1016          optimized_ticks += 1
1017        elif code.codetype == Code.FULL_CODEGEN:
1018          generated_ticks += 1
1019        elif code.codetype == Code.V8INTERNAL:
1020          v8_internal_ticks += 1
1021      else:
1022        missed_ticks += 1
1023      if not library_repo.Tick(sample.ip) and not code:
1024        really_missed_ticks += 1
1025      if trace_reader.callchain_supported:
1026        for ip in sample.ips:
1027          caller_code = code_map.Find(ip)
1028          if caller_code:
1029            if code:
1030              caller_code.CalleeTick(code)
1031            code = caller_code
1032      sample_time += time.time() - start
1033
1034  if options.dot:
1035    PrintDot(code_map, options)
1036  else:
1037    PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
1038
1039    if not options.quiet:
1040      def PrintTicks(number, total, description):
1041        print("%10d %5.1f%% ticks in %s" %
1042              (number, 100.0*number/total, description))
1043      print
1044      print "Stats:"
1045      print "%10d total trace events" % events
1046      print "%10d total ticks" % ticks
1047      print "%10d ticks not in symbols" % missed_ticks
1048      unaccounted = "unaccounted ticks"
1049      if really_missed_ticks > 0:
1050        unaccounted += " (probably in the kernel, try --kernel)"
1051      PrintTicks(really_missed_ticks, ticks, unaccounted)
1052      PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1053      PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1054      PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1055      print "%10d total symbols" % len([c for c in code_map.AllCode()])
1056      print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1057      print "%9.2fs library processing time" % mmap_time
1058      print "%9.2fs tick processing time" % sample_time
1059
1060  log_reader.Dispose()
1061  trace_reader.Dispose()
1062