17e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren# Copyright 2014 The Chromium Authors. All rights reserved.
27e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren# Use of this source code is governed by a BSD-style license that can be
37e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren# found in the LICENSE file.
47e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
57e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren"""Common utilities for tools that deal with binary size information.
67e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
77e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric BorenCopied from chromium/src/build/android/pylib/symbols/binary_size_tools.py.
87e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren"""
97e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
107e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Borenimport logging
117e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Borenimport re
127e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
137e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
147e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Borendef ParseNm(nm_lines):
157e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  """Parse nm output, returning data for all relevant (to binary size)
167e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  symbols and ignoring the rest.
177e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
187e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  Args:
197e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      nm_lines: an iterable over lines of nm output.
207e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
217e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  Yields:
227e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      (symbol name, symbol type, symbol size, source file path).
237e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
247e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      Path may be None if nm couldn't figure out the source file.
257e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  """
267e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
277e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  # Match lines with size, symbol, optional location, optional discriminator
287e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  sym_re = re.compile(r'^[0-9a-f]{8,} ' # address (8+ hex digits)
297e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren                      '([0-9a-f]{8,}) ' # size (8+ hex digits)
307e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren                      '(.) ' # symbol type, one character
317e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren                      '([^\t]+)' # symbol name, separated from next by tab
327e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren                      '(?:\t(.*):[\d\?]+)?.*$') # location
337e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  # Match lines with addr but no size.
347e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  addr_re = re.compile(r'^[0-9a-f]{8,} (.) ([^\t]+)(?:\t.*)?$')
357e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  # Match lines that don't have an address at all -- typically external symbols.
367e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  noaddr_re = re.compile(r'^ {8,} (.) (.*)$')
377e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  # Match lines with no symbol name, only addr and type
387e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  addr_only_re = re.compile(r'^[0-9a-f]{8,} (.)$')
397e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
407e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren  for line in nm_lines:
417e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    line = line.rstrip()
427e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    match = sym_re.match(line)
437e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    if match:
447e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      size, sym_type, sym = match.groups()[0:3]
457e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      size = int(size, 16)
467e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      if sym_type in ('B', 'b'):
477e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren        continue  # skip all BSS for now.
487e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      path = match.group(4)
497e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      yield sym, sym_type, size, path
507e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      continue
517e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    match = addr_re.match(line)
527e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    if match:
537e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      # sym_type, sym = match.groups()[0:2]
547e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      continue  # No size == we don't care.
557e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    match = noaddr_re.match(line)
567e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    if match:
577e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      sym_type, sym = match.groups()
587e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      if sym_type in ('U', 'w'):
597e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren        continue  # external or weak symbol
607e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    match = addr_only_re.match(line)
617e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    if match:
627e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren      continue  # Nothing to do.
637e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
647e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren
657e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    # If we reach this part of the loop, there was something in the
667e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    # line that we didn't expect or recognize.
677e97dc0aa1ff5b03593e689fe1cfdcc3550c169eEric Boren    logging.warning('nm output parser failed to parse: %s', repr(line))
68