1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import bisect
31import collections
32import ctypes
33import disasm
34import mmap
35import optparse
36import os
37import re
38import subprocess
39import sys
40import time
41
42
43USAGE="""usage: %prog [OPTION]...
44
45Analyses V8 and perf logs to produce profiles.
46
47Perf logs can be collected using a command like:
48  $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
49  # -R: collect all data
50  # -e cycles: use cpu-cycles event (run "perf list" for details)
51  # -c 10000: write a sample after each 10000 events
52  # -f: force output file overwrite
53  # -i: limit profiling to our process and the kernel
54  # --ll-prof shell flag enables the right V8 logs
55This will produce a binary trace file (perf.data) that %prog can analyse.
56
57IMPORTANT:
58  The kernel has an internal maximum for events per second, it is 100K by
59  default. That's not enough for "-c 10000". Set it to some higher value:
60  $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61  You can also make the warning about kernel address maps go away:
62  $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
63
64We have a convenience script that handles all of the above for you:
65  $ tools/run-llprof.sh ./d8 bench.js
66
67Examples:
68  # Print flat profile with annotated disassembly for the 10 top
69  # symbols. Use default log names and include the snapshot log.
70  $ %prog --snapshot --disasm-top=10
71
72  # Print flat profile with annotated disassembly for all used symbols.
73  # Use default log names and include kernel symbols into analysis.
74  $ %prog --disasm-all --kernel
75
76  # Print flat profile. Use custom log names.
77  $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
78"""
79
80
81JS_ORIGIN = "js"
82JS_SNAPSHOT_ORIGIN = "js-snapshot"
83
84class Code(object):
85  """Code object."""
86
87  _id = 0
88  UNKNOWN = 0
89  V8INTERNAL = 1
90  FULL_CODEGEN = 2
91  OPTIMIZED = 3
92
93  def __init__(self, name, start_address, end_address, origin, origin_offset):
94    self.id = Code._id
95    Code._id += 1
96    self.name = name
97    self.other_names = None
98    self.start_address = start_address
99    self.end_address = end_address
100    self.origin = origin
101    self.origin_offset = origin_offset
102    self.self_ticks = 0
103    self.self_ticks_map = None
104    self.callee_ticks = None
105    if name.startswith("LazyCompile:*"):
106      self.codetype = Code.OPTIMIZED
107    elif name.startswith("LazyCompile:"):
108      self.codetype = Code.FULL_CODEGEN
109    elif name.startswith("v8::internal::"):
110      self.codetype = Code.V8INTERNAL
111    else:
112      self.codetype = Code.UNKNOWN
113
114  def AddName(self, name):
115    assert self.name != name
116    if self.other_names is None:
117      self.other_names = [name]
118      return
119    if not name in self.other_names:
120      self.other_names.append(name)
121
122  def FullName(self):
123    if self.other_names is None:
124      return self.name
125    self.other_names.sort()
126    return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
127
128  def IsUsed(self):
129    return self.self_ticks > 0 or self.callee_ticks is not None
130
131  def Tick(self, pc):
132    self.self_ticks += 1
133    if self.self_ticks_map is None:
134      self.self_ticks_map = collections.defaultdict(lambda: 0)
135    offset = pc - self.start_address
136    self.self_ticks_map[offset] += 1
137
138  def CalleeTick(self, callee):
139    if self.callee_ticks is None:
140      self.callee_ticks = collections.defaultdict(lambda: 0)
141    self.callee_ticks[callee] += 1
142
143  def PrintAnnotated(self, arch, options):
144    if self.self_ticks_map is None:
145      ticks_map = []
146    else:
147      ticks_map = self.self_ticks_map.items()
148    # Convert the ticks map to offsets and counts arrays so that later
149    # we can do binary search in the offsets array.
150    ticks_map.sort(key=lambda t: t[0])
151    ticks_offsets = [t[0] for t in ticks_map]
152    ticks_counts = [t[1] for t in ticks_map]
153    # Get a list of disassembled lines and their addresses.
154    lines = self._GetDisasmLines(arch, options)
155    if len(lines) == 0:
156      return
157    # Print annotated lines.
158    address = lines[0][0]
159    total_count = 0
160    for i in xrange(len(lines)):
161      start_offset = lines[i][0] - address
162      if i == len(lines) - 1:
163        end_offset = self.end_address - self.start_address
164      else:
165        end_offset = lines[i + 1][0] - address
166      # Ticks (reported pc values) are not always precise, i.e. not
167      # necessarily point at instruction starts. So we have to search
168      # for ticks that touch the current instruction line.
169      j = bisect.bisect_left(ticks_offsets, end_offset)
170      count = 0
171      for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
172        if offset < start_offset:
173          break
174        count += cnt
175      total_count += count
176      count = 100.0 * count / self.self_ticks
177      if count >= 0.01:
178        print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
179      else:
180        print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
181    print
182    assert total_count == self.self_ticks, \
183        "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
184
185  def __str__(self):
186    return "%s [0x%x, 0x%x) size: %d origin: %s" % (
187      self.name,
188      self.start_address,
189      self.end_address,
190      self.end_address - self.start_address,
191      self.origin)
192
193  def _GetDisasmLines(self, arch, options):
194    if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
195      inplace = False
196      filename = options.log + ".ll"
197    else:
198      inplace = True
199      filename = self.origin
200    return disasm.GetDisasmLines(filename,
201                                 self.origin_offset,
202                                 self.end_address - self.start_address,
203                                 arch,
204                                 inplace)
205
206
207class CodePage(object):
208  """Group of adjacent code objects."""
209
210  SHIFT = 20  # 1M pages
211  SIZE = (1 << SHIFT)
212  MASK = ~(SIZE - 1)
213
214  @staticmethod
215  def PageAddress(address):
216    return address & CodePage.MASK
217
218  @staticmethod
219  def PageId(address):
220    return address >> CodePage.SHIFT
221
222  @staticmethod
223  def PageAddressFromId(id):
224    return id << CodePage.SHIFT
225
226  def __init__(self, address):
227    self.address = address
228    self.code_objects = []
229
230  def Add(self, code):
231    self.code_objects.append(code)
232
233  def Remove(self, code):
234    self.code_objects.remove(code)
235
236  def Find(self, pc):
237    code_objects = self.code_objects
238    for i, code in enumerate(code_objects):
239      if code.start_address <= pc < code.end_address:
240        code_objects[0], code_objects[i] = code, code_objects[0]
241        return code
242    return None
243
244  def __iter__(self):
245    return self.code_objects.__iter__()
246
247
248class CodeMap(object):
249  """Code object map."""
250
251  def __init__(self):
252    self.pages = {}
253    self.min_address = 1 << 64
254    self.max_address = -1
255
256  def Add(self, code, max_pages=-1):
257    page_id = CodePage.PageId(code.start_address)
258    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
259    pages = 0
260    while page_id < limit_id:
261      if max_pages >= 0 and pages > max_pages:
262        print >>sys.stderr, \
263            "Warning: page limit (%d) reached for %s [%s]" % (
264            max_pages, code.name, code.origin)
265        break
266      if page_id in self.pages:
267        page = self.pages[page_id]
268      else:
269        page = CodePage(CodePage.PageAddressFromId(page_id))
270        self.pages[page_id] = page
271      page.Add(code)
272      page_id += 1
273      pages += 1
274    self.min_address = min(self.min_address, code.start_address)
275    self.max_address = max(self.max_address, code.end_address)
276
277  def Remove(self, code):
278    page_id = CodePage.PageId(code.start_address)
279    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
280    removed = False
281    while page_id < limit_id:
282      if page_id not in self.pages:
283        page_id += 1
284        continue
285      page = self.pages[page_id]
286      page.Remove(code)
287      removed = True
288      page_id += 1
289    return removed
290
291  def AllCode(self):
292    for page in self.pages.itervalues():
293      for code in page:
294        if CodePage.PageAddress(code.start_address) == page.address:
295          yield code
296
297  def UsedCode(self):
298    for code in self.AllCode():
299      if code.IsUsed():
300        yield code
301
302  def Print(self):
303    for code in self.AllCode():
304      print code
305
306  def Find(self, pc):
307    if pc < self.min_address or pc >= self.max_address:
308      return None
309    page_id = CodePage.PageId(pc)
310    if page_id not in self.pages:
311      return None
312    return self.pages[page_id].Find(pc)
313
314
315class CodeInfo(object):
316  """Generic info about generated code objects."""
317
318  def __init__(self, arch, header_size):
319    self.arch = arch
320    self.header_size = header_size
321
322
323class SnapshotLogReader(object):
324  """V8 snapshot log reader."""
325
326  _SNAPSHOT_CODE_NAME_RE = re.compile(
327    r"snapshot-code-name,(\d+),\"(.*)\"")
328
329  def __init__(self, log_name):
330    self.log_name = log_name
331
332  def ReadNameMap(self):
333    log = open(self.log_name, "r")
334    try:
335      snapshot_pos_to_name = {}
336      for line in log:
337        match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
338        if match:
339          pos = int(match.group(1))
340          name = match.group(2)
341          snapshot_pos_to_name[pos] = name
342    finally:
343      log.close()
344    return snapshot_pos_to_name
345
346
347class LogReader(object):
348  """V8 low-level (binary) log reader."""
349
350  _ARCH_TO_POINTER_TYPE_MAP = {
351    "ia32": ctypes.c_uint32,
352    "arm": ctypes.c_uint32,
353    "mips": ctypes.c_uint32,
354    "x64": ctypes.c_uint64,
355    "arm64": ctypes.c_uint64
356  }
357
358  _CODE_CREATE_TAG = "C"
359  _CODE_MOVE_TAG = "M"
360  _CODE_DELETE_TAG = "D"
361  _SNAPSHOT_POSITION_TAG = "P"
362  _CODE_MOVING_GC_TAG = "G"
363
364  def __init__(self, log_name, code_map, snapshot_pos_to_name):
365    self.log_file = open(log_name, "r")
366    self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
367    self.log_pos = 0
368    self.code_map = code_map
369    self.snapshot_pos_to_name = snapshot_pos_to_name
370    self.address_to_snapshot_name = {}
371
372    self.arch = self.log[:self.log.find("\0")]
373    self.log_pos += len(self.arch) + 1
374    assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
375        "Unsupported architecture %s" % self.arch
376    pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
377
378    self.code_create_struct = LogReader._DefineStruct([
379        ("name_size", ctypes.c_int32),
380        ("code_address", pointer_type),
381        ("code_size", ctypes.c_int32)])
382
383    self.code_move_struct = LogReader._DefineStruct([
384        ("from_address", pointer_type),
385        ("to_address", pointer_type)])
386
387    self.code_delete_struct = LogReader._DefineStruct([
388        ("address", pointer_type)])
389
390    self.snapshot_position_struct = LogReader._DefineStruct([
391        ("address", pointer_type),
392        ("position", ctypes.c_int32)])
393
394  def ReadUpToGC(self):
395    while self.log_pos < self.log.size():
396      tag = self.log[self.log_pos]
397      self.log_pos += 1
398
399      if tag == LogReader._CODE_MOVING_GC_TAG:
400        self.address_to_snapshot_name.clear()
401        return
402
403      if tag == LogReader._CODE_CREATE_TAG:
404        event = self.code_create_struct.from_buffer(self.log, self.log_pos)
405        self.log_pos += ctypes.sizeof(event)
406        start_address = event.code_address
407        end_address = start_address + event.code_size
408        if start_address in self.address_to_snapshot_name:
409          name = self.address_to_snapshot_name[start_address]
410          origin = JS_SNAPSHOT_ORIGIN
411        else:
412          name = self.log[self.log_pos:self.log_pos + event.name_size]
413          origin = JS_ORIGIN
414        self.log_pos += event.name_size
415        origin_offset = self.log_pos
416        self.log_pos += event.code_size
417        code = Code(name, start_address, end_address, origin, origin_offset)
418        conficting_code = self.code_map.Find(start_address)
419        if conficting_code:
420          if not (conficting_code.start_address == code.start_address and
421            conficting_code.end_address == code.end_address):
422            self.code_map.Remove(conficting_code)
423          else:
424            LogReader._HandleCodeConflict(conficting_code, code)
425            # TODO(vitalyr): this warning is too noisy because of our
426            # attempts to reconstruct code log from the snapshot.
427            # print >>sys.stderr, \
428            #     "Warning: Skipping duplicate code log entry %s" % code
429            continue
430        self.code_map.Add(code)
431        continue
432
433      if tag == LogReader._CODE_MOVE_TAG:
434        event = self.code_move_struct.from_buffer(self.log, self.log_pos)
435        self.log_pos += ctypes.sizeof(event)
436        old_start_address = event.from_address
437        new_start_address = event.to_address
438        if old_start_address == new_start_address:
439          # Skip useless code move entries.
440          continue
441        code = self.code_map.Find(old_start_address)
442        if not code:
443          print >>sys.stderr, "Warning: Not found %x" % old_start_address
444          continue
445        assert code.start_address == old_start_address, \
446            "Inexact move address %x for %s" % (old_start_address, code)
447        self.code_map.Remove(code)
448        size = code.end_address - code.start_address
449        code.start_address = new_start_address
450        code.end_address = new_start_address + size
451        self.code_map.Add(code)
452        continue
453
454      if tag == LogReader._CODE_DELETE_TAG:
455        event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
456        self.log_pos += ctypes.sizeof(event)
457        old_start_address = event.address
458        code = self.code_map.Find(old_start_address)
459        if not code:
460          print >>sys.stderr, "Warning: Not found %x" % old_start_address
461          continue
462        assert code.start_address == old_start_address, \
463            "Inexact delete address %x for %s" % (old_start_address, code)
464        self.code_map.Remove(code)
465        continue
466
467      if tag == LogReader._SNAPSHOT_POSITION_TAG:
468        event = self.snapshot_position_struct.from_buffer(self.log,
469                                                          self.log_pos)
470        self.log_pos += ctypes.sizeof(event)
471        start_address = event.address
472        snapshot_pos = event.position
473        if snapshot_pos in self.snapshot_pos_to_name:
474          self.address_to_snapshot_name[start_address] = \
475              self.snapshot_pos_to_name[snapshot_pos]
476        continue
477
478      assert False, "Unknown tag %s" % tag
479
480  def Dispose(self):
481    self.log.close()
482    self.log_file.close()
483
484  @staticmethod
485  def _DefineStruct(fields):
486    class Struct(ctypes.Structure):
487      _fields_ = fields
488    return Struct
489
490  @staticmethod
491  def _HandleCodeConflict(old_code, new_code):
492    assert (old_code.start_address == new_code.start_address and
493            old_code.end_address == new_code.end_address), \
494        "Conficting code log entries %s and %s" % (old_code, new_code)
495    if old_code.name == new_code.name:
496      return
497    # Code object may be shared by a few functions. Collect the full
498    # set of names.
499    old_code.AddName(new_code.name)
500
501
502class Descriptor(object):
503  """Descriptor of a structure in the binary trace log."""
504
505  CTYPE_MAP = {
506    "u16": ctypes.c_uint16,
507    "u32": ctypes.c_uint32,
508    "u64": ctypes.c_uint64
509  }
510
511  def __init__(self, fields):
512    class TraceItem(ctypes.Structure):
513      _fields_ = Descriptor.CtypesFields(fields)
514
515      def __str__(self):
516        return ", ".join("%s: %s" % (field, self.__getattribute__(field))
517                         for field, _ in TraceItem._fields_)
518
519    self.ctype = TraceItem
520
521  def Read(self, trace, offset):
522    return self.ctype.from_buffer(trace, offset)
523
524  @staticmethod
525  def CtypesFields(fields):
526    return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
527
528
529# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
530# for the gory details.
531
532
533# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
534TRACE_HEADER_DESC = Descriptor([
535  ("magic", "u64"),
536  ("size", "u64"),
537  ("attr_size", "u64"),
538  ("attrs_offset", "u64"),
539  ("attrs_size", "u64"),
540  ("data_offset", "u64"),
541  ("data_size", "u64"),
542  ("event_types_offset", "u64"),
543  ("event_types_size", "u64")
544])
545
546
547# Reference: /usr/include/linux/perf_event.h
548PERF_EVENT_ATTR_DESC = Descriptor([
549  ("type", "u32"),
550  ("size", "u32"),
551  ("config", "u64"),
552  ("sample_period_or_freq", "u64"),
553  ("sample_type", "u64"),
554  ("read_format", "u64"),
555  ("flags", "u64"),
556  ("wakeup_events_or_watermark", "u32"),
557  ("bp_type", "u32"),
558  ("bp_addr", "u64"),
559  ("bp_len", "u64")
560])
561
562
563# Reference: /usr/include/linux/perf_event.h
564PERF_EVENT_HEADER_DESC = Descriptor([
565  ("type", "u32"),
566  ("misc", "u16"),
567  ("size", "u16")
568])
569
570
571# Reference: kernel/events/core.c
572PERF_MMAP_EVENT_BODY_DESC = Descriptor([
573  ("pid", "u32"),
574  ("tid", "u32"),
575  ("addr", "u64"),
576  ("len", "u64"),
577  ("pgoff", "u64")
578])
579
580
581# perf_event_attr.sample_type bits control the set of
582# perf_sample_event fields.
583PERF_SAMPLE_IP = 1 << 0
584PERF_SAMPLE_TID = 1 << 1
585PERF_SAMPLE_TIME = 1 << 2
586PERF_SAMPLE_ADDR = 1 << 3
587PERF_SAMPLE_READ = 1 << 4
588PERF_SAMPLE_CALLCHAIN = 1 << 5
589PERF_SAMPLE_ID = 1 << 6
590PERF_SAMPLE_CPU = 1 << 7
591PERF_SAMPLE_PERIOD = 1 << 8
592PERF_SAMPLE_STREAM_ID = 1 << 9
593PERF_SAMPLE_RAW = 1 << 10
594
595
596# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
597PERF_SAMPLE_EVENT_BODY_FIELDS = [
598  ("ip", "u64", PERF_SAMPLE_IP),
599  ("pid", "u32", PERF_SAMPLE_TID),
600  ("tid", "u32", PERF_SAMPLE_TID),
601  ("time", "u64", PERF_SAMPLE_TIME),
602  ("addr", "u64", PERF_SAMPLE_ADDR),
603  ("id", "u64", PERF_SAMPLE_ID),
604  ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
605  ("cpu", "u32", PERF_SAMPLE_CPU),
606  ("res", "u32", PERF_SAMPLE_CPU),
607  ("period", "u64", PERF_SAMPLE_PERIOD),
608  # Don't want to handle read format that comes after the period and
609  # before the callchain and has variable size.
610  ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
611  # Raw data follows the callchain and is ignored.
612]
613
614
615PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
616
617
618PERF_RECORD_MMAP = 1
619PERF_RECORD_SAMPLE = 9
620
621
622class TraceReader(object):
623  """Perf (linux-2.6/tools/perf) trace file reader."""
624
625  _TRACE_HEADER_MAGIC = 4993446653023372624
626
627  def __init__(self, trace_name):
628    self.trace_file = open(trace_name, "r")
629    self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
630    self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
631    if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
632      print >>sys.stderr, "Warning: unsupported trace header magic"
633    self.offset = self.trace_header.data_offset
634    self.limit = self.trace_header.data_offset + self.trace_header.data_size
635    assert self.limit <= self.trace.size(), \
636        "Trace data limit exceeds trace file size"
637    self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
638    assert self.trace_header.attrs_size != 0, \
639        "No perf event attributes found in the trace"
640    perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
641                                                self.trace_header.attrs_offset)
642    self.sample_event_body_desc = self._SampleEventBodyDesc(
643        perf_event_attr.sample_type)
644    self.callchain_supported = \
645        (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
646    if self.callchain_supported:
647      self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
648      self.ip_size = ctypes.sizeof(self.ip_struct)
649
650  def ReadEventHeader(self):
651    if self.offset >= self.limit:
652      return None, 0
653    offset = self.offset
654    header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
655    self.offset += header.size
656    return header, offset
657
658  def ReadMmap(self, header, offset):
659    mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
660                                               offset + self.header_size)
661    # Read null-terminated filename.
662    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
663                          offset + header.size]
664    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
665    return mmap_info
666
667  def ReadSample(self, header, offset):
668    sample = self.sample_event_body_desc.Read(self.trace,
669                                              offset + self.header_size)
670    if not self.callchain_supported:
671      return sample
672    sample.ips = []
673    offset += self.header_size + ctypes.sizeof(sample)
674    for _ in xrange(sample.nr):
675      sample.ips.append(
676        self.ip_struct.from_buffer(self.trace, offset).value)
677      offset += self.ip_size
678    return sample
679
680  def Dispose(self):
681    self.trace.close()
682    self.trace_file.close()
683
684  def _SampleEventBodyDesc(self, sample_type):
685    assert (sample_type & PERF_SAMPLE_READ) == 0, \
686           "Can't hande read format in samples"
687    fields = [(field, format)
688              for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
689              if (bit & sample_type) != 0]
690    return Descriptor(fields)
691
692
693OBJDUMP_SECTION_HEADER_RE = re.compile(
694  r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
695OBJDUMP_SYMBOL_LINE_RE = re.compile(
696  r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
697OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
698  r"^DYNAMIC SYMBOL TABLE")
699OBJDUMP_SKIP_RE = re.compile(
700  r"^.*ld\.so\.cache$")
701KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
702PERF_KERNEL_ALLSYMS_RE = re.compile(
703  r".*kallsyms.*")
704KERNEL_ALLSYMS_LINE_RE = re.compile(
705  r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
706
707
708class LibraryRepo(object):
709  def __init__(self):
710    self.infos = []
711    self.names = set()
712    self.ticks = {}
713
714  def Load(self, mmap_info, code_map, options):
715    # Skip kernel mmaps when requested using the fact that their tid
716    # is 0.
717    if mmap_info.tid == 0 and not options.kernel:
718      return True
719    if OBJDUMP_SKIP_RE.match(mmap_info.filename):
720      return True
721    if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
722      return self._LoadKernelSymbols(code_map)
723    self.infos.append(mmap_info)
724    mmap_info.ticks = 0
725    mmap_info.unique_name = self._UniqueMmapName(mmap_info)
726    if not os.path.exists(mmap_info.filename):
727      return True
728    # Request section headers (-h), symbols (-t), and dynamic symbols
729    # (-T) from objdump.
730    # Unfortunately, section headers span two lines, so we have to
731    # keep the just seen section name (from the first line in each
732    # section header) in the after_section variable.
733    if mmap_info.filename.endswith(".ko"):
734      dynamic_symbols = ""
735    else:
736      dynamic_symbols = "-T"
737    process = subprocess.Popen(
738      "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
739      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
740    pipe = process.stdout
741    after_section = None
742    code_sections = set()
743    reloc_sections = set()
744    dynamic = False
745    try:
746      for line in pipe:
747        if after_section:
748          if line.find("CODE") != -1:
749            code_sections.add(after_section)
750          if line.find("RELOC") != -1:
751            reloc_sections.add(after_section)
752          after_section = None
753          continue
754
755        match = OBJDUMP_SECTION_HEADER_RE.match(line)
756        if match:
757          after_section = match.group(1)
758          continue
759
760        if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
761          dynamic = True
762          continue
763
764        match = OBJDUMP_SYMBOL_LINE_RE.match(line)
765        if match:
766          start_address = int(match.group(1), 16)
767          origin_offset = start_address
768          flags = match.group(2)
769          section = match.group(3)
770          if section in code_sections:
771            if dynamic or section in reloc_sections:
772              start_address += mmap_info.addr
773            size = int(match.group(4), 16)
774            name = match.group(5)
775            origin = mmap_info.filename
776            code_map.Add(Code(name, start_address, start_address + size,
777                              origin, origin_offset))
778    finally:
779      pipe.close()
780    assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
781
782  def Tick(self, pc):
783    for i, mmap_info in enumerate(self.infos):
784      if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
785        mmap_info.ticks += 1
786        self.infos[0], self.infos[i] = mmap_info, self.infos[0]
787        return True
788    return False
789
790  def _UniqueMmapName(self, mmap_info):
791    name = mmap_info.filename
792    index = 1
793    while name in self.names:
794      name = "%s-%d" % (mmap_info.filename, index)
795      index += 1
796    self.names.add(name)
797    return name
798
799  def _LoadKernelSymbols(self, code_map):
800    if not os.path.exists(KERNEL_ALLSYMS_FILE):
801      print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
802      return False
803    kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
804    code = None
805    for line in kallsyms:
806      match = KERNEL_ALLSYMS_LINE_RE.match(line)
807      if match:
808        start_address = int(match.group(1), 16)
809        end_address = start_address
810        name = match.group(2)
811        if code:
812          code.end_address = start_address
813          code_map.Add(code, 16)
814        code = Code(name, start_address, end_address, "kernel", 0)
815    return True
816
817
818def PrintReport(code_map, library_repo, arch, ticks, options):
819  print "Ticks per symbol:"
820  used_code = [code for code in code_map.UsedCode()]
821  used_code.sort(key=lambda x: x.self_ticks, reverse=True)
822  for i, code in enumerate(used_code):
823    code_ticks = code.self_ticks
824    print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
825                                    code.FullName(), code.origin)
826    if options.disasm_all or i < options.disasm_top:
827      code.PrintAnnotated(arch, options)
828  print
829  print "Ticks per library:"
830  mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
831  mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
832  for mmap_info in mmap_infos:
833    mmap_ticks = mmap_info.ticks
834    print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
835                               mmap_info.unique_name)
836
837
838def PrintDot(code_map, options):
839  print "digraph G {"
840  for code in code_map.UsedCode():
841    if code.self_ticks < 10:
842      continue
843    print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
844    if code.callee_ticks:
845      for callee, ticks in code.callee_ticks.iteritems():
846        print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
847  print "}"
848
849
850if __name__ == "__main__":
851  parser = optparse.OptionParser(USAGE)
852  parser.add_option("--snapshot-log",
853                    default="obj/release/snapshot.log",
854                    help="V8 snapshot log file name [default: %default]")
855  parser.add_option("--log",
856                    default="v8.log",
857                    help="V8 log file name [default: %default]")
858  parser.add_option("--snapshot",
859                    default=False,
860                    action="store_true",
861                    help="process V8 snapshot log [default: %default]")
862  parser.add_option("--trace",
863                    default="perf.data",
864                    help="perf trace file name [default: %default]")
865  parser.add_option("--kernel",
866                    default=False,
867                    action="store_true",
868                    help="process kernel entries [default: %default]")
869  parser.add_option("--disasm-top",
870                    default=0,
871                    type="int",
872                    help=("number of top symbols to disassemble and annotate "
873                          "[default: %default]"))
874  parser.add_option("--disasm-all",
875                    default=False,
876                    action="store_true",
877                    help=("disassemble and annotate all used symbols "
878                          "[default: %default]"))
879  parser.add_option("--dot",
880                    default=False,
881                    action="store_true",
882                    help="produce dot output (WIP) [default: %default]")
883  parser.add_option("--quiet", "-q",
884                    default=False,
885                    action="store_true",
886                    help="no auxiliary messages [default: %default]")
887  parser.add_option("--gc-fake-mmap",
888                    default="/tmp/__v8_gc__",
889                    help="gc fake mmap file [default: %default]")
890  parser.add_option("--objdump",
891                    default="/usr/bin/objdump",
892                    help="objdump tool to use [default: %default]")
893  parser.add_option("--host-root",
894                    default="",
895                    help="Path to the host root [default: %default]")
896  options, args = parser.parse_args()
897
898  if not options.quiet:
899    if options.snapshot:
900      print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
901                                        options.log,
902                                        options.log)
903    else:
904      print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
905    print "Perf trace file: %s" % options.trace
906
907  V8_GC_FAKE_MMAP = options.gc_fake_mmap
908  HOST_ROOT = options.host_root
909  if os.path.exists(options.objdump):
910    disasm.OBJDUMP_BIN = options.objdump
911    OBJDUMP_BIN = options.objdump
912  else:
913    print "Cannot find %s, falling back to default objdump" % options.objdump
914
915  # Stats.
916  events = 0
917  ticks = 0
918  missed_ticks = 0
919  really_missed_ticks = 0
920  optimized_ticks = 0
921  generated_ticks = 0
922  v8_internal_ticks = 0
923  mmap_time = 0
924  sample_time = 0
925
926  # Process the snapshot log to fill the snapshot name map.
927  snapshot_name_map = {}
928  if options.snapshot:
929    snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
930    snapshot_name_map = snapshot_log_reader.ReadNameMap()
931
932  # Initialize the log reader.
933  code_map = CodeMap()
934  log_reader = LogReader(log_name=options.log + ".ll",
935                         code_map=code_map,
936                         snapshot_pos_to_name=snapshot_name_map)
937  if not options.quiet:
938    print "Generated code architecture: %s" % log_reader.arch
939    print
940    sys.stdout.flush()
941
942  # Process the code and trace logs.
943  library_repo = LibraryRepo()
944  log_reader.ReadUpToGC()
945  trace_reader = TraceReader(options.trace)
946  while True:
947    header, offset = trace_reader.ReadEventHeader()
948    if not header:
949      break
950    events += 1
951    if header.type == PERF_RECORD_MMAP:
952      start = time.time()
953      mmap_info = trace_reader.ReadMmap(header, offset)
954      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
955        log_reader.ReadUpToGC()
956      else:
957        library_repo.Load(mmap_info, code_map, options)
958      mmap_time += time.time() - start
959    elif header.type == PERF_RECORD_SAMPLE:
960      ticks += 1
961      start = time.time()
962      sample = trace_reader.ReadSample(header, offset)
963      code = code_map.Find(sample.ip)
964      if code:
965        code.Tick(sample.ip)
966        if code.codetype == Code.OPTIMIZED:
967          optimized_ticks += 1
968        elif code.codetype == Code.FULL_CODEGEN:
969          generated_ticks += 1
970        elif code.codetype == Code.V8INTERNAL:
971          v8_internal_ticks += 1
972      else:
973        missed_ticks += 1
974      if not library_repo.Tick(sample.ip) and not code:
975        really_missed_ticks += 1
976      if trace_reader.callchain_supported:
977        for ip in sample.ips:
978          caller_code = code_map.Find(ip)
979          if caller_code:
980            if code:
981              caller_code.CalleeTick(code)
982            code = caller_code
983      sample_time += time.time() - start
984
985  if options.dot:
986    PrintDot(code_map, options)
987  else:
988    PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
989
990    if not options.quiet:
991      def PrintTicks(number, total, description):
992        print("%10d %5.1f%% ticks in %s" %
993              (number, 100.0*number/total, description))
994      print
995      print "Stats:"
996      print "%10d total trace events" % events
997      print "%10d total ticks" % ticks
998      print "%10d ticks not in symbols" % missed_ticks
999      unaccounted = "unaccounted ticks"
1000      if really_missed_ticks > 0:
1001        unaccounted += " (probably in the kernel, try --kernel)"
1002      PrintTicks(really_missed_ticks, ticks, unaccounted)
1003      PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1004      PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1005      PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1006      print "%10d total symbols" % len([c for c in code_map.AllCode()])
1007      print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1008      print "%9.2fs library processing time" % mmap_time
1009      print "%9.2fs tick processing time" % sample_time
1010
1011  log_reader.Dispose()
1012  trace_reader.Dispose()
1013