1#!/usr/bin/env python
2#
3# Copyright 2013 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import base64
8import os
9import sys
10import re
11
12from optparse import OptionParser
13
14"""Extracts the list of resident symbols of a library loaded in a process.
15
16This scripts combines the extended output of memdump for a given process
17(obtained through memdump -x PID) and the symbol table of a .so loaded in that
18process (obtained through nm -C lib-with-symbols.so), filtering out only those
19symbols that, at the time of the snapshot, were resident in memory (that are,
20the symbols which start address belongs to a mapped page of the .so which was
21resident at the time of the snapshot).
22The aim is to perform a "code coverage"-like profiling of a binary, intersecting
23run-time information (list of resident pages) and debug symbols.
24"""
25
26_PAGE_SIZE = 4096
27
28
29def _TestBit(word, bit):
30  assert(bit >= 0 and bit < 8)
31  return not not ((word >> bit) & 1)
32
33
34def _HexAddr(addr):
35  return hex(addr)[2:].zfill(8)
36
37
38def _GetResidentPagesSet(memdump_contents, lib_name, verbose):
39  """Parses the memdump output and extracts the resident page set for lib_name.
40  Args:
41    memdump_contents: Array of strings (lines) of a memdump output.
42    lib_name: A string containing the name of the library.so to be matched.
43    verbose: Print a verbose header for each mapping matched.
44
45  Returns:
46    A set of resident pages (the key is the page index) for all the
47    mappings matching .*lib_name.
48  """
49  resident_pages = set()
50  MAP_RX = re.compile(
51      r'^([0-9a-f]+)-([0-9a-f]+) ([\w-]+) ([0-9a-f]+) .* "(.*)" \[(.*)\]$')
52  for line in memdump_contents:
53    line = line.rstrip('\r\n')
54    if line.startswith('[ PID'):
55      continue
56
57    r = MAP_RX.match(line)
58    if not r:
59      sys.stderr.write('Skipping %s from %s\n' % (line, memdump_file))
60      continue
61
62    map_start = int(r.group(1), 16)
63    map_end = int(r.group(2), 16)
64    prot = r.group(3)
65    offset = int(r.group(4), 16)
66    assert(offset % _PAGE_SIZE == 0)
67    lib = r.group(5)
68    enc_bitmap = r.group(6)
69
70    if not lib.endswith(lib_name):
71      continue
72
73    bitmap = base64.b64decode(enc_bitmap)
74    map_pages_count = (map_end - map_start + 1) / _PAGE_SIZE
75    bitmap_pages_count = len(bitmap) * 8
76
77    if verbose:
78      print 'Found %s: mapped %d pages in mode %s @ offset %s.' % (
79            lib, map_pages_count, prot, _HexAddr(offset))
80      print ' Map range in the process VA: [%s - %s]. Len: %s' % (
81          _HexAddr(map_start),
82          _HexAddr(map_end),
83          _HexAddr(map_pages_count * _PAGE_SIZE))
84      print ' Corresponding addresses in the binary: [%s - %s]. Len: %s' % (
85          _HexAddr(offset),
86          _HexAddr(offset + map_end - map_start),
87          _HexAddr(map_pages_count * _PAGE_SIZE))
88      print ' Bitmap: %d pages' % bitmap_pages_count
89      print ''
90
91    assert(bitmap_pages_count >= map_pages_count)
92    for i in xrange(map_pages_count):
93      bitmap_idx = i / 8
94      bitmap_off = i % 8
95      if (bitmap_idx < len(bitmap) and
96          _TestBit(ord(bitmap[bitmap_idx]), bitmap_off)):
97        resident_pages.add(offset / _PAGE_SIZE + i)
98  return resident_pages
99
100
101def main(argv):
102  NM_RX = re.compile(r'^([0-9a-f]+)\s+.*$')
103
104  parser = OptionParser()
105  parser.add_option("-r", "--reverse",
106                    action="store_true", dest="reverse", default=False,
107                    help="Print out non present symbols")
108  parser.add_option("-v", "--verbose",
109                    action="store_true", dest="verbose", default=False,
110                    help="Print out verbose debug information.")
111
112  (options, args) = parser.parse_args()
113
114  if len(args) != 3:
115    print 'Usage: %s [-v] memdump.file nm.file library.so' % (
116        os.path.basename(argv[0]))
117    return 1
118
119  memdump_file = args[0]
120  nm_file = args[1]
121  lib_name = args[2]
122
123  if memdump_file == '-':
124    memdump_contents = sys.stdin.readlines()
125  else:
126    memdump_contents = open(memdump_file, 'r').readlines()
127  resident_pages = _GetResidentPagesSet(memdump_contents,
128                                        lib_name,
129                                        options.verbose)
130
131  # Process the nm symbol table, filtering out the resident symbols.
132  nm_fh = open(nm_file, 'r')
133  for line in nm_fh:
134    line = line.rstrip('\r\n')
135    # Skip undefined symbols (lines with no address).
136    if line.startswith(' '):
137      continue
138
139    r = NM_RX.match(line)
140    if not r:
141      sys.stderr.write('Skipping %s from %s\n' % (line, nm_file))
142      continue
143
144    sym_addr = int(r.group(1), 16)
145    sym_page = sym_addr / _PAGE_SIZE
146    last_sym_matched = (sym_page in resident_pages)
147    if (sym_page in resident_pages) != options.reverse:
148      print line
149  return 0
150
151if __name__ == '__main__':
152  sys.exit(main(sys.argv))
153