1#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Generate a spatial analysis against an arbitrary library.
7
8To use, build the 'binary_size_tool' target. Then run this tool, passing
9in the location of the library to be analyzed along with any other options
10you desire.
11"""
12
13import collections
14import json
15import logging
16import multiprocessing
17import optparse
18import os
19import re
20import shutil
21import subprocess
22import sys
23import tempfile
24import time
25
26import binary_size_utils
27
28# This path changee is not beautiful. Temporary (I hope) measure until
29# the chromium project has figured out a proper way to organize the
30# library of python tools. http://crbug.com/375725
31elf_symbolizer_path = os.path.abspath(os.path.join(
32    os.path.dirname(__file__),
33    '..',
34    '..',
35    'build',
36    'android',
37    'pylib'))
38sys.path.append(elf_symbolizer_path)
39import symbols.elf_symbolizer as elf_symbolizer  # pylint: disable=F0401
40
41
42# Node dictionary keys. These are output in json read by the webapp so
43# keep them short to save file size.
44# Note: If these change, the webapp must also change.
45NODE_TYPE_KEY = 'k'
46NODE_NAME_KEY = 'n'
47NODE_CHILDREN_KEY = 'children'
48NODE_SYMBOL_TYPE_KEY = 't'
49NODE_SYMBOL_SIZE_KEY = 'value'
50NODE_MAX_DEPTH_KEY = 'maxDepth'
51NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
52
53# The display name of the bucket where we put symbols without path.
54NAME_NO_PATH_BUCKET = '(No Path)'
55
56# Try to keep data buckets smaller than this to avoid killing the
57# graphing lib.
58BIG_BUCKET_LIMIT = 3000
59
60
61# TODO(andrewhayden): Only used for legacy reports. Delete.
62def FormatBytes(byte_count):
63  """Pretty-print a number of bytes."""
64  if byte_count > 1e6:
65    byte_count = byte_count / 1.0e6
66    return '%.1fm' % byte_count
67  if byte_count > 1e3:
68    byte_count = byte_count / 1.0e3
69    return '%.1fk' % byte_count
70  return str(byte_count)
71
72
73# TODO(andrewhayden): Only used for legacy reports. Delete.
74def SymbolTypeToHuman(symbol_type):
75  """Convert a symbol type as printed by nm into a human-readable name."""
76  return {'b': 'bss',
77          'd': 'data',
78          'r': 'read-only data',
79          't': 'code',
80          'w': 'weak symbol',
81          'v': 'weak symbol'}[symbol_type]
82
83
84def _MkChild(node, name):
85  child = node[NODE_CHILDREN_KEY].get(name)
86  if child is None:
87    child = {NODE_NAME_KEY: name,
88             NODE_CHILDREN_KEY: {}}
89    node[NODE_CHILDREN_KEY][name] = child
90  return child
91
92
93
94def SplitNoPathBucket(node):
95  """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
96  handle. Split it into sub-buckets in that case."""
97  root_children = node[NODE_CHILDREN_KEY]
98  if NAME_NO_PATH_BUCKET in root_children:
99    no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
100    old_children = no_path_bucket[NODE_CHILDREN_KEY]
101    count = 0
102    for symbol_type, symbol_bucket in old_children.iteritems():
103      count += len(symbol_bucket[NODE_CHILDREN_KEY])
104    if count > BIG_BUCKET_LIMIT:
105      new_children = {}
106      no_path_bucket[NODE_CHILDREN_KEY] = new_children
107      current_bucket = None
108      index = 0
109      for symbol_type, symbol_bucket in old_children.iteritems():
110        for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
111          if index % BIG_BUCKET_LIMIT == 0:
112            group_no = (index / BIG_BUCKET_LIMIT) + 1
113            current_bucket = _MkChild(no_path_bucket,
114                                      '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
115                                                          group_no))
116            assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
117            node[NODE_TYPE_KEY] = 'p'  # p for path
118          index += 1
119          symbol_size = value[NODE_SYMBOL_SIZE_KEY]
120          AddSymbolIntoFileNode(current_bucket, symbol_type,
121                                symbol_name, symbol_size)
122
123
124def MakeChildrenDictsIntoLists(node):
125  largest_list_len = 0
126  if NODE_CHILDREN_KEY in node:
127    largest_list_len = len(node[NODE_CHILDREN_KEY])
128    child_list = []
129    for child in node[NODE_CHILDREN_KEY].itervalues():
130      child_largest_list_len = MakeChildrenDictsIntoLists(child)
131      if child_largest_list_len > largest_list_len:
132        largest_list_len = child_largest_list_len
133      child_list.append(child)
134    node[NODE_CHILDREN_KEY] = child_list
135
136  return largest_list_len
137
138
139def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
140  """Puts symbol into the file path node |node|.
141  Returns the number of added levels in tree. I.e. returns 2."""
142
143  # 'node' is the file node and first step is to find its symbol-type bucket.
144  node[NODE_LAST_PATH_ELEMENT_KEY] = True
145  node = _MkChild(node, symbol_type)
146  assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
147  node[NODE_SYMBOL_TYPE_KEY] = symbol_type
148  node[NODE_TYPE_KEY] = 'b'  # b for bucket
149
150  # 'node' is now the symbol-type bucket. Make the child entry.
151  node = _MkChild(node, symbol_name)
152  if NODE_CHILDREN_KEY in node:
153    if node[NODE_CHILDREN_KEY]:
154      logging.warning('A container node used as symbol for %s.' % symbol_name)
155    # This is going to be used as a leaf so no use for child list.
156    del node[NODE_CHILDREN_KEY]
157  node[NODE_SYMBOL_SIZE_KEY] = symbol_size
158  node[NODE_SYMBOL_TYPE_KEY] = symbol_type
159  node[NODE_TYPE_KEY] = 's'  # s for symbol
160
161  return 2  # Depth of the added subtree.
162
163
164def MakeCompactTree(symbols):
165  result = {NODE_NAME_KEY: '/',
166            NODE_CHILDREN_KEY: {},
167            NODE_TYPE_KEY: 'p',
168            NODE_MAX_DEPTH_KEY: 0}
169  seen_symbol_with_path = False
170  for symbol_name, symbol_type, symbol_size, file_path in symbols:
171
172    if 'vtable for ' in symbol_name:
173      symbol_type = '@'  # hack to categorize these separately
174    # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
175    if file_path:
176      file_path = os.path.normpath(file_path)
177      seen_symbol_with_path = True
178    else:
179      file_path = NAME_NO_PATH_BUCKET
180
181    if file_path.startswith('/'):
182      file_path = file_path[1:]
183    path_parts = file_path.split('/')
184
185    # Find pre-existing node in tree, or update if it already exists
186    node = result
187    depth = 0
188    while len(path_parts) > 0:
189      path_part = path_parts.pop(0)
190      if len(path_part) == 0:
191        continue
192      depth += 1
193      node = _MkChild(node, path_part)
194      assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
195      node[NODE_TYPE_KEY] = 'p'  # p for path
196
197    depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
198    result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
199
200  if not seen_symbol_with_path:
201    logging.warning('Symbols lack paths. Data will not be structured.')
202
203  # The (no path) bucket can be extremely large if we failed to get
204  # path information. Split it into subgroups if needed.
205  SplitNoPathBucket(result)
206
207  largest_list_len = MakeChildrenDictsIntoLists(result)
208
209  if largest_list_len > BIG_BUCKET_LIMIT:
210    logging.warning('There are sections with %d nodes. '
211                    'Results might be unusable.' % largest_list_len)
212  return result
213
214
215# TODO(andrewhayden): Only used for legacy reports. Delete.
216def TreeifySymbols(symbols):
217  """Convert symbols into a path-based tree, calculating size information
218  along the way.
219
220  The result is a dictionary that contains two kinds of nodes:
221  1. Leaf nodes, representing source code locations (e.g., c++ files)
222     These nodes have the following dictionary entries:
223       sizes: a dictionary whose keys are categories (such as code, data,
224              vtable, etceteras) and whose values are the size, in bytes, of
225              those categories;
226       size:  the total size, in bytes, of all the entries in the sizes dict
227  2. Non-leaf nodes, representing directories
228     These nodes have the following dictionary entries:
229       children: a dictionary whose keys are names (path entries; either
230                 directory or file names) and whose values are other nodes;
231       size:     the total size, in bytes, of all the leaf nodes that are
232                 contained within the children dict (recursively expanded)
233
234  The result object is itself a dictionary that represents the common ancestor
235  of all child nodes, e.g. a path to which all other nodes beneath it are
236  relative. The 'size' attribute of this dict yields the sum of the size of all
237  leaf nodes within the data structure.
238  """
239  dirs = {'children': {}, 'size': 0}
240  for sym, symbol_type, size, path in symbols:
241    dirs['size'] += size
242    if path:
243      path = os.path.normpath(path)
244      if path.startswith('/'):
245        path = path[1:]
246
247    parts = None
248    if path:
249      parts = path.split('/')
250
251    if parts:
252      assert path
253      file_key = parts.pop()
254      tree = dirs
255      try:
256        # Traverse the tree to the parent of the file node, creating as needed
257        for part in parts:
258          assert part != ''
259          if part not in tree['children']:
260            tree['children'][part] = {'children': {}, 'size': 0}
261          tree = tree['children'][part]
262          tree['size'] += size
263
264        # Get (creating if necessary) the node for the file
265        # This node doesn't have a 'children' attribute
266        if file_key not in tree['children']:
267          tree['children'][file_key] = {'sizes': collections.defaultdict(int),
268                                        'size': 0}
269        tree = tree['children'][file_key]
270        tree['size'] += size
271
272        # Accumulate size into a bucket within the file
273        symbol_type = symbol_type.lower()
274        if 'vtable for ' in sym:
275          tree['sizes']['[vtable]'] += size
276        elif 'r' == symbol_type:
277          tree['sizes']['[rodata]'] += size
278        elif 'd' == symbol_type:
279          tree['sizes']['[data]'] += size
280        elif 'b' == symbol_type:
281          tree['sizes']['[bss]'] += size
282        elif 't' == symbol_type:
283          # 'text' in binary parlance means 'code'.
284          tree['sizes']['[code]'] += size
285        elif 'w' == symbol_type:
286          tree['sizes']['[weak]'] += size
287        else:
288          tree['sizes']['[other]'] += size
289      except:
290        print >> sys.stderr, sym, parts, file_key
291        raise
292    else:
293      key = 'symbols without paths'
294      if key not in dirs['children']:
295        dirs['children'][key] = {'sizes': collections.defaultdict(int),
296                                 'size': 0}
297      tree = dirs['children'][key]
298      subkey = 'misc'
299      if (sym.endswith('::__FUNCTION__') or
300        sym.endswith('::__PRETTY_FUNCTION__')):
301        subkey = '__FUNCTION__'
302      elif sym.startswith('CSWTCH.'):
303        subkey = 'CSWTCH'
304      elif '::' in sym:
305        subkey = sym[0:sym.find('::') + 2]
306      tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size
307      tree['size'] += size
308  return dirs
309
310
311# TODO(andrewhayden): Only used for legacy reports. Delete.
312def JsonifyTree(tree, name):
313  """Convert TreeifySymbols output to a JSON treemap.
314
315  The format is very similar, with the notable exceptions being
316  lists of children instead of maps and some different attribute names."""
317  children = []
318  css_class_map = {
319                  '[vtable]': 'vtable',
320                  '[rodata]': 'read-only_data',
321                  '[data]': 'data',
322                  '[bss]': 'bss',
323                  '[code]': 'code',
324                  '[weak]': 'weak_symbol'
325  }
326  if 'children' in tree:
327    # Non-leaf node. Recurse.
328    for child_name, child in tree['children'].iteritems():
329      children.append(JsonifyTree(child, child_name))
330  else:
331    # Leaf node; dump per-file stats as entries in the treemap
332    for kind, size in tree['sizes'].iteritems():
333      child_json = {'name': kind + ' (' + FormatBytes(size) + ')',
334                   'data': { '$area': size }}
335      css_class = css_class_map.get(kind)
336      if css_class is not None:
337        child_json['data']['$symbol'] = css_class
338      children.append(child_json)
339  # Sort children by size, largest to smallest.
340  children.sort(key=lambda child: -child['data']['$area'])
341
342  # For leaf nodes, the 'size' attribute is the size of the leaf;
343  # Non-leaf nodes don't really have a size, but their 'size' attribute is
344  # the sum of the sizes of all their children.
345  return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
346          'data': { '$area': tree['size'] },
347          'children': children }
348
349def DumpCompactTree(symbols, outfile):
350  tree_root = MakeCompactTree(symbols)
351  with open(outfile, 'w') as out:
352    out.write('var tree_data = ')
353    json.dump(tree_root, out)
354  print('Writing %d bytes json' % os.path.getsize(outfile))
355
356
357# TODO(andrewhayden): Only used for legacy reports. Delete.
358def DumpTreemap(symbols, outfile):
359  dirs = TreeifySymbols(symbols)
360  out = open(outfile, 'w')
361  try:
362    out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
363  finally:
364    out.flush()
365    out.close()
366
367
368# TODO(andrewhayden): Only used for legacy reports. Delete.
369def DumpLargestSymbols(symbols, outfile, n):
370  # a list of (sym, symbol_type, size, path); sort by size.
371  symbols = sorted(symbols, key=lambda x: -x[2])
372  dumped = 0
373  out = open(outfile, 'w')
374  try:
375    out.write('var largestSymbols = [\n')
376    for sym, symbol_type, size, path in symbols:
377      if symbol_type in ('b', 'w'):
378        continue  # skip bss and weak symbols
379      if path is None:
380        path = ''
381      entry = {'size': FormatBytes(size),
382               'symbol': sym,
383               'type': SymbolTypeToHuman(symbol_type),
384               'location': path }
385      out.write(json.dumps(entry))
386      out.write(',\n')
387      dumped += 1
388      if dumped >= n:
389        return
390  finally:
391    out.write('];\n')
392    out.flush()
393    out.close()
394
395
396def MakeSourceMap(symbols):
397  sources = {}
398  for _sym, _symbol_type, size, path in symbols:
399    key = None
400    if path:
401      key = os.path.normpath(path)
402    else:
403      key = '[no path]'
404    if key not in sources:
405      sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
406    record = sources[key]
407    record['size'] += size
408    record['symbol_count'] += 1
409  return sources
410
411
412# TODO(andrewhayden): Only used for legacy reports. Delete.
413def DumpLargestSources(symbols, outfile, n):
414  source_map = MakeSourceMap(symbols)
415  sources = sorted(source_map.values(), key=lambda x: -x['size'])
416  dumped = 0
417  out = open(outfile, 'w')
418  try:
419    out.write('var largestSources = [\n')
420    for record in sources:
421      entry = {'size': FormatBytes(record['size']),
422               'symbol_count': str(record['symbol_count']),
423               'location': record['path']}
424      out.write(json.dumps(entry))
425      out.write(',\n')
426      dumped += 1
427      if dumped >= n:
428        return
429  finally:
430    out.write('];\n')
431    out.flush()
432    out.close()
433
434
435# TODO(andrewhayden): Only used for legacy reports. Delete.
436def DumpLargestVTables(symbols, outfile, n):
437  vtables = []
438  for symbol, _type, size, path in symbols:
439    if 'vtable for ' in symbol:
440      vtables.append({'symbol': symbol, 'path': path, 'size': size})
441  vtables = sorted(vtables, key=lambda x: -x['size'])
442  dumped = 0
443  out = open(outfile, 'w')
444  try:
445    out.write('var largestVTables = [\n')
446    for record in vtables:
447      entry = {'size': FormatBytes(record['size']),
448               'symbol': record['symbol'],
449               'location': record['path']}
450      out.write(json.dumps(entry))
451      out.write(',\n')
452      dumped += 1
453      if dumped >= n:
454        return
455  finally:
456    out.write('];\n')
457    out.flush()
458    out.close()
459
460
461# Regex for parsing "nm" output. A sample line looks like this:
462# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
463#
464# The fields are: address, size, type, name, source location
465# Regular expression explained ( see also: https://xkcd.com/208 ):
466# ([0-9a-f]{8,}+)   The address
467# [\s]+             Whitespace separator
468# ([0-9a-f]{8,}+)   The size. From here on out it's all optional.
469# [\s]+             Whitespace separator
470# (\S?)             The symbol type, which is any non-whitespace char
471# [\s*]             Whitespace separator
472# ([^\t]*)          Symbol name, any non-tab character (spaces ok!)
473# [\t]?             Tab separator
474# (.*)              The location (filename[:linennum|?][ (discriminator n)]
475sNmPattern = re.compile(
476  r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
477
478class Progress():
479  def __init__(self):
480    self.count = 0
481    self.skip_count = 0
482    self.collisions = 0
483    self.time_last_output = time.time()
484    self.count_last_output = 0
485
486
487def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs):
488  nm_output = RunNm(library, nm_binary)
489  nm_output_lines = nm_output.splitlines()
490  nm_output_lines_len = len(nm_output_lines)
491  address_symbol = {}
492  progress = Progress()
493  def map_address_symbol(symbol, addr):
494    progress.count += 1
495    if addr in address_symbol:
496      # 'Collision between %s and %s.' % (str(symbol.name),
497      #                                   str(address_symbol[addr].name))
498      progress.collisions += 1
499    else:
500      address_symbol[addr] = symbol
501
502    progress_chunk = 100
503    if progress.count % progress_chunk == 0:
504      time_now = time.time()
505      time_spent = time_now - progress.time_last_output
506      if time_spent > 1.0:
507        # Only output at most once per second.
508        progress.time_last_output = time_now
509        chunk_size = progress.count - progress.count_last_output
510        progress.count_last_output = progress.count
511        if time_spent > 0:
512          speed = chunk_size / time_spent
513        else:
514          speed = 0
515        progress_percent = (100.0 * (progress.count + progress.skip_count) /
516                            nm_output_lines_len)
517        print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' %
518              (progress_percent, progress.count, progress.collisions, speed))
519
520  symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
521                                            map_address_symbol,
522                                            max_concurrent_jobs=jobs)
523  user_interrupted = False
524  try:
525    for line in nm_output_lines:
526      match = sNmPattern.match(line)
527      if match:
528        location = match.group(5)
529        if not location:
530          addr = int(match.group(1), 16)
531          size = int(match.group(2), 16)
532          if addr in address_symbol:  # Already looked up, shortcut
533                                      # ELFSymbolizer.
534            map_address_symbol(address_symbol[addr], addr)
535            continue
536          elif size == 0:
537            # Save time by not looking up empty symbols (do they even exist?)
538            print('Empty symbol: ' + line)
539          else:
540            symbolizer.SymbolizeAsync(addr, addr)
541            continue
542
543      progress.skip_count += 1
544  except KeyboardInterrupt:
545    user_interrupted = True
546    print('Interrupting - killing subprocesses. Please wait.')
547
548  try:
549    symbolizer.Join()
550  except KeyboardInterrupt:
551    # Don't want to abort here since we will be finished in a few seconds.
552    user_interrupted = True
553    print('Patience you must have my young padawan.')
554
555  if user_interrupted:
556    print('Skipping the rest of the file mapping. '
557          'Output will not be fully classified.')
558
559  with open(outfile, 'w') as out:
560    for line in nm_output_lines:
561      match = sNmPattern.match(line)
562      if match:
563        location = match.group(5)
564        if not location:
565          addr = int(match.group(1), 16)
566          symbol = address_symbol.get(addr)
567          if symbol is not None:
568            path = '??'
569            if symbol.source_path is not None:
570              path = symbol.source_path
571            line_number = 0
572            if symbol.source_line is not None:
573              line_number = symbol.source_line
574            out.write('%s\t%s:%d\n' % (line, path, line_number))
575            continue
576
577      out.write('%s\n' % line)
578
579  print('%d symbols in the results.' % len(address_symbol))
580
581
582def RunNm(binary, nm_binary):
583  print('Starting nm')
584  cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
585         binary]
586  nm_process = subprocess.Popen(cmd,
587                                stdout=subprocess.PIPE,
588                                stderr=subprocess.PIPE)
589  (process_output, err_output) = nm_process.communicate()
590
591  if nm_process.returncode != 0:
592    if err_output:
593      raise Exception, err_output
594    else:
595      raise Exception, process_output
596
597  print('Finished nm')
598  return process_output
599
600
601def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
602                 addr2line_binary, nm_binary):
603  if nm_infile is None:
604    if outfile is None:
605      outfile = tempfile.NamedTemporaryFile(delete=False).name
606
607    if verbose:
608      print 'Running parallel addr2line, dumping symbols to ' + outfile
609    RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs)
610
611    nm_infile = outfile
612
613  elif verbose:
614    print 'Using nm input from ' + nm_infile
615  with file(nm_infile, 'r') as infile:
616    return list(binary_size_utils.ParseNm(infile))
617
618
619def _find_in_system_path(binary):
620  """Locate the full path to binary in the system path or return None
621  if not found."""
622  system_path = os.environ["PATH"].split(os.pathsep)
623  for path in system_path:
624    binary_path = os.path.join(path, binary)
625    if os.path.isfile(binary_path):
626      return binary_path
627  return None
628
629def CheckDebugFormatSupport(library, addr2line_binary):
630  """Kills the program if debug data is in an unsupported format.
631
632  There are two common versions of the DWARF debug formats and
633  since we are right now transitioning from DWARF2 to newer formats,
634  it's possible to have a mix of tools that are not compatible. Detect
635  that and abort rather than produce meaningless output."""
636  tool_output = subprocess.check_output([addr2line_binary, '--version'])
637  version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
638  parsed_output = version_re.match(tool_output)
639  major = int(parsed_output.group(1))
640  minor = int(parsed_output.group(2))
641  supports_dwarf4 = major > 2 or major == 2 and minor > 22
642
643  if supports_dwarf4:
644    return
645
646  print('Checking version of debug information in %s.' % library)
647  debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
648                                       '--dwarf-depth=1', library])
649  dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
650  parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
651  version = int(parsed_dwarf_format_output.group(1))
652  if version > 2:
653    print('The supplied tools only support DWARF2 debug data but the binary\n' +
654          'uses DWARF%d. Update the tools or compile the binary\n' % version +
655          'with -gdwarf-2.')
656    sys.exit(1)
657
658
659def main():
660  usage = """%prog [options]
661
662  Runs a spatial analysis on a given library, looking up the source locations
663  of its symbols and calculating how much space each directory, source file,
664  and so on is taking. The result is a report that can be used to pinpoint
665  sources of large portions of the binary, etceteras.
666
667  Under normal circumstances, you only need to pass two arguments, thusly:
668
669      %prog --library /path/to/library --destdir /path/to/output
670
671  In this mode, the program will dump the symbols from the specified library
672  and map those symbols back to source locations, producing a web-based
673  report in the specified output directory.
674
675  Other options are available via '--help'.
676  """
677  parser = optparse.OptionParser(usage=usage)
678  parser.add_option('--nm-in', metavar='PATH',
679                    help='if specified, use nm input from <path> instead of '
680                    'generating it. Note that source locations should be '
681                    'present in the file; i.e., no addr2line symbol lookups '
682                    'will be performed when this option is specified. '
683                    'Mutually exclusive with --library.')
684  parser.add_option('--destdir', metavar='PATH',
685                    help='write output to the specified directory. An HTML '
686                    'report is generated here along with supporting files; '
687                    'any existing report will be overwritten.')
688  parser.add_option('--library', metavar='PATH',
689                    help='if specified, process symbols in the library at '
690                    'the specified path. Mutually exclusive with --nm-in.')
691  parser.add_option('--nm-binary',
692                    help='use the specified nm binary to analyze library. '
693                    'This is to be used when the nm in the path is not for '
694                    'the right architecture or of the right version.')
695  parser.add_option('--addr2line-binary',
696                    help='use the specified addr2line binary to analyze '
697                    'library. This is to be used when the addr2line in '
698                    'the path is not for the right architecture or '
699                    'of the right version.')
700  parser.add_option('--jobs', type='int',
701                    help='number of jobs to use for the parallel '
702                    'addr2line processing pool; defaults to 1. More '
703                    'jobs greatly improve throughput but eat RAM like '
704                    'popcorn, and take several gigabytes each. Start low '
705                    'and ramp this number up until your machine begins to '
706                    'struggle with RAM. '
707                    'This argument is only valid when using --library.')
708  parser.add_option('-v', dest='verbose', action='store_true',
709                    help='be verbose, printing lots of status information.')
710  parser.add_option('--nm-out', metavar='PATH',
711                    help='keep the nm output file, and store it at the '
712                    'specified path. This is useful if you want to see the '
713                    'fully processed nm output after the symbols have been '
714                    'mapped to source locations. By default, a tempfile is '
715                    'used and is deleted when the program terminates.'
716                    'This argument is only valid when using --library.')
717  parser.add_option('--legacy', action='store_true',
718                    help='emit legacy binary size report instead of modern')
719  opts, _args = parser.parse_args()
720
721  if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
722    parser.error('exactly one of --library or --nm-in is required')
723  if (opts.nm_in):
724    if opts.jobs:
725      print >> sys.stderr, ('WARNING: --jobs has no effect '
726                            'when used with --nm-in')
727  if not opts.destdir:
728    parser.error('--destdir is required argument')
729  if not opts.jobs:
730    # Use the number of processors but cap between 2 and 4 since raw
731    # CPU power isn't the limiting factor. It's I/O limited, memory
732    # bus limited and available-memory-limited. Too many processes and
733    # the computer will run out of memory and it will be slow.
734    opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
735
736  if opts.addr2line_binary:
737    assert os.path.isfile(opts.addr2line_binary)
738    addr2line_binary = opts.addr2line_binary
739  else:
740    addr2line_binary = _find_in_system_path('addr2line')
741    assert addr2line_binary, 'Unable to find addr2line in the path. '\
742        'Use --addr2line-binary to specify location.'
743
744  if opts.nm_binary:
745    assert os.path.isfile(opts.nm_binary)
746    nm_binary = opts.nm_binary
747  else:
748    nm_binary = _find_in_system_path('nm')
749    assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
750        'to specify location.'
751
752  print('addr2line: %s' % addr2line_binary)
753  print('nm: %s' % nm_binary)
754
755  CheckDebugFormatSupport(opts.library, addr2line_binary)
756
757  symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
758                         opts.jobs, opts.verbose is True,
759                         addr2line_binary, nm_binary)
760  if not os.path.exists(opts.destdir):
761    os.makedirs(opts.destdir, 0755)
762
763
764  if opts.legacy: # legacy report
765    DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
766    DumpLargestSymbols(symbols,
767                         os.path.join(opts.destdir, 'largest-symbols.js'), 100)
768    DumpLargestSources(symbols,
769                         os.path.join(opts.destdir, 'largest-sources.js'), 100)
770    DumpLargestVTables(symbols,
771                         os.path.join(opts.destdir, 'largest-vtables.js'), 100)
772    treemap_out = os.path.join(opts.destdir, 'webtreemap')
773    if not os.path.exists(treemap_out):
774      os.makedirs(treemap_out, 0755)
775    treemap_src = os.path.join('third_party', 'webtreemap', 'src')
776    shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)
777    shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)
778    shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)
779    shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template',
780                             'index.html'), opts.destdir)
781  else: # modern report
782    DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js'))
783    d3_out = os.path.join(opts.destdir, 'd3')
784    if not os.path.exists(d3_out):
785      os.makedirs(d3_out, 0755)
786    d3_src = os.path.join(os.path.dirname(__file__),
787                          '..',
788                          '..',
789                          'third_party', 'd3', 'src')
790    template_src = os.path.join(os.path.dirname(__file__),
791                                'template')
792    shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
793    shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
794    shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
795    shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)
796
797  print 'Report saved to ' + opts.destdir + '/index.html'
798
799
800if __name__ == '__main__':
801  sys.exit(main())
802