1cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)# Copyright 2014 The Chromium Authors. All rights reserved.
2cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
3cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)# found in the LICENSE file.
4cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
5cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)"""Common utilities for tools that deal with binary size information.
6cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)"""
7cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
8cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import logging
9cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import re
10cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
11cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
12cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)def ParseNm(nm_lines):
13cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  """Parse nm output, returning data for all relevant (to binary size)
14cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  symbols and ignoring the rest.
15cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
16cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  Args:
17cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      nm_lines: an iterable over lines of nm output.
18cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
19cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  Yields:
20cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      (symbol name, symbol type, symbol size, source file path).
21cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
22cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      Path may be None if nm couldn't figure out the source file.
23cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  """
24cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
25cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  # Match lines with size, symbol, optional location, optional discriminator
26cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  sym_re = re.compile(r'^[0-9a-f]{8,} ' # address (8+ hex digits)
27cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                      '([0-9a-f]{8,}) ' # size (8+ hex digits)
28cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                      '(.) ' # symbol type, one character
29cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                      '([^\t]+)' # symbol name, separated from next by tab
30cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                      '(?:\t(.*):[\d\?]+)?.*$') # location
31cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  # Match lines with addr but no size.
32cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  addr_re = re.compile(r'^[0-9a-f]{8,} (.) ([^\t]+)(?:\t.*)?$')
33cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  # Match lines that don't have an address at all -- typically external symbols.
34cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  noaddr_re = re.compile(r'^ {8,} (.) (.*)$')
35cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  # Match lines with no symbol name, only addr and type
36cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  addr_only_re = re.compile(r'^[0-9a-f]{8,} (.)$')
37cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
38cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  for line in nm_lines:
39cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    line = line.rstrip()
40cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    match = sym_re.match(line)
41cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if match:
42cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      size, sym_type, sym = match.groups()[0:3]
43cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      size = int(size, 16)
44cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      if sym_type in ('B', 'b'):
45cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        continue  # skip all BSS for now.
46cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      path = match.group(4)
47cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      yield sym, sym_type, size, path
48cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      continue
49cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    match = addr_re.match(line)
50cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if match:
51cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      # sym_type, sym = match.groups()[0:2]
52cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      continue  # No size == we don't care.
53cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    match = noaddr_re.match(line)
54cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if match:
55cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      sym_type, sym = match.groups()
56cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      if sym_type in ('U', 'w'):
57cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        continue  # external or weak symbol
58cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    match = addr_only_re.match(line)
59cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if match:
60cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      continue  # Nothing to do.
61cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
62cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
63cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    # If we reach this part of the loop, there was something in the
64cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    # line that we didn't expect or recognize.
65cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    logging.warning('nm output parser failed to parse: %s', repr(line))
66