dump-static-initializers.py revision 5c02ac1a9c1b504631c0a3d2b6e737b5d738bae1
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Dump functions called by static intializers in a Linux Release binary.
7
8Usage example:
9  tools/linux/dump-static-intializers.py out/Release/chrome
10
11A brief overview of static initialization:
121) the compiler writes out, per object file, a function that contains
13   the static intializers for that file.
142) the compiler also writes out a pointer to that function in a special
15   section.
163) at link time, the linker concatenates the function pointer sections
17   into a single list of all initializers.
184) at run time, on startup the binary runs all function pointers.
19
20The functions in (1) all have mangled names of the form
21  _GLOBAL__I_foobar.cc
22using objdump, we can disassemble those functions and dump all symbols that
23they reference.
24"""
25
26import optparse
27import re
28import subprocess
29import sys
30
31# A map of symbol => informative text about it.
32NOTES = {
33  '__cxa_atexit@plt': 'registers a dtor to run at exit',
34  'std::__ioinit': '#includes <iostream>, use <ostream> instead',
35}
36
37# Determine whether this is a git checkout (as opposed to e.g. svn).
38IS_GIT_WORKSPACE = (subprocess.Popen(
39    ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0)
40
41class Demangler(object):
42  """A wrapper around c++filt to provide a function to demangle symbols."""
43  def __init__(self):
44    self.cppfilt = subprocess.Popen(['c++filt'],
45                                    stdin=subprocess.PIPE,
46                                    stdout=subprocess.PIPE)
47
48  def Demangle(self, sym):
49    """Given mangled symbol |sym|, return its demangled form."""
50    self.cppfilt.stdin.write(sym + '\n')
51    return self.cppfilt.stdout.readline().strip()
52
53# Matches for example: "cert_logger.pb.cc", capturing "cert_logger".
54protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$')
55def QualifyFilenameAsProto(filename):
56  """Attempt to qualify a bare |filename| with a src-relative path, assuming it
57  is a protoc-generated file.  If a single match is found, it is returned.
58  Otherwise the original filename is returned."""
59  if not IS_GIT_WORKSPACE:
60    return filename
61  match = protobuf_filename_re.match(filename)
62  if not match:
63    return filename
64  basename = match.groups(0)
65  gitlsfiles = subprocess.Popen(
66    ['git', 'ls-files', '--', '*/%s.proto' % basename],
67    stdout=subprocess.PIPE)
68  candidate = filename
69  for line in gitlsfiles.stdout:
70    if candidate != filename:
71      return filename # Multiple hits, can't help.
72    candidate = line.strip()
73  return candidate
74
75# Regex matching the substring of a symbol's demangled text representation most
76# likely to appear in a source file.
77# Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes
78# "InitBuiltinFunctionTable", since the first (optional & non-capturing) group
79# picks up any ::-qualification and the last fragment picks up a suffix that
80# starts with an opener.
81symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$')
82def QualifyFilename(filename, symbol):
83  """Given a bare filename and a symbol that occurs in it, attempt to qualify
84  it with a src-relative path.  If more than one file matches, return the
85  original filename."""
86  if not IS_GIT_WORKSPACE:
87    return filename
88  match = symbol_code_name_re.match(symbol)
89  if not match:
90    return filename
91  symbol = match.group(1)
92  gitgrep = subprocess.Popen(
93    ['git', 'grep', '-l', symbol, '--', '*/%s' % filename],
94    stdout=subprocess.PIPE)
95  candidate = filename
96  for line in gitgrep.stdout:
97    if candidate != filename:  # More than one candidate; return bare filename.
98      return filename
99    candidate = line.strip()
100  return candidate
101
102# Regex matching nm output for the symbols we're interested in.
103# See test_ParseNmLine for examples.
104nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)')
105def ParseNmLine(line):
106  """Given a line of nm output, parse static initializers as a
107  (file, start, size) tuple."""
108  match = nm_re.match(line)
109  if match:
110    addr, size, filename = match.groups()
111    return (filename, int(addr, 16), int(size, 16))
112
113
114def test_ParseNmLine():
115  """Verify the nm_re regex matches some sample lines."""
116  parse = ParseNmLine(
117    '0000000001919920 0000000000000008 t '
118    '_ZN12_GLOBAL__I_safe_browsing_service.cc')
119  assert parse == ('safe_browsing_service.cc', 26319136, 8), parse
120
121  parse = ParseNmLine(
122    '00000000026b9eb0 0000000000000024 t '
123    '_GLOBAL__sub_I_extension_specifics.pb.cc')
124  assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse
125
126# Just always run the test; it is fast enough.
127test_ParseNmLine()
128
129
130def ParseNm(binary):
131  """Given a binary, yield static initializers as (file, start, size) tuples."""
132  nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE)
133  for line in nm.stdout:
134    parse = ParseNmLine(line)
135    if parse:
136      yield parse
137
138# Regex matching objdump output for the symbols we're interested in.
139# Example line:
140#     12354ab:  (disassembly, including <FunctionReference>)
141disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>')
142def ExtractSymbolReferences(binary, start, end):
143  """Given a span of addresses, returns symbol references from disassembly."""
144  cmd = ['objdump', binary, '--disassemble',
145         '--start-address=0x%x' % start, '--stop-address=0x%x' % end]
146  objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE)
147
148  refs = set()
149  for line in objdump.stdout:
150    if '__static_initialization_and_destruction' in line:
151      raise RuntimeError, ('code mentions '
152                           '__static_initialization_and_destruction; '
153                           'did you accidentally run this on a Debug binary?')
154    match = disassembly_re.search(line)
155    if match:
156      (ref,) = match.groups()
157      if ref.startswith('.LC') or ref.startswith('_DYNAMIC'):
158        # Ignore these, they are uninformative.
159        continue
160      if ref.startswith('_GLOBAL__I_'):
161        # Probably a relative jump within this function.
162        continue
163      refs.add(ref)
164
165  return sorted(refs)
166
167def main():
168  parser = optparse.OptionParser(usage='%prog [option] filename')
169  parser.add_option('-d', '--diffable', dest='diffable',
170                    action='store_true', default=False,
171                    help='Prints the filename on each line, for more easily '
172                         'diff-able output. (Used by sizes.py)')
173  opts, args = parser.parse_args()
174  if len(args) != 1:
175    parser.error('missing filename argument')
176    return 1
177  binary = args[0]
178
179  demangler = Demangler()
180  file_count = 0
181  initializer_count = 0
182
183  files = ParseNm(binary)
184  if opts.diffable:
185    files = sorted(files)
186  for filename, addr, size in files:
187    file_count += 1
188    ref_output = []
189
190    qualified_filename = QualifyFilenameAsProto(filename)
191
192    if size == 2:
193      # gcc generates a two-byte 'repz retq' initializer when there is a
194      # ctor even when the ctor is empty.  This is fixed in gcc 4.6, but
195      # Android uses gcc 4.4.
196      ref_output.append('[empty ctor, but it still has cost on gcc <4.6]')
197    else:
198      for ref in ExtractSymbolReferences(binary, addr, addr+size):
199        initializer_count += 1
200
201        ref = demangler.Demangle(ref)
202        if qualified_filename == filename:
203          qualified_filename = QualifyFilename(filename, ref)
204
205        note = ''
206        if ref in NOTES:
207          note = NOTES[ref]
208        elif ref.endswith('_2eproto()'):
209          note = 'protocol compiler bug: crbug.com/105626'
210
211        if note:
212          ref_output.append('%s [%s]' % (ref, note))
213        else:
214          ref_output.append(ref)
215
216    if opts.diffable:
217      if ref_output:
218        print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output)
219      else:
220        print '# %s: (empty initializer list)' % qualified_filename
221    else:
222      print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename,
223                                                        addr, size)
224      print ''.join('  %s\n' % r for r in ref_output)
225
226  if opts.diffable:
227    print '#',
228  print 'Found %d static initializers in %d files.' % (initializer_count,
229                                                       file_count)
230
231  return 0
232
233if '__main__' == __name__:
234  sys.exit(main())
235