17dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# Copyright 2013 The Chromium Authors. All rights reserved.
27dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# Use of this source code is governed by a BSD-style license that can be
37dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# found in the LICENSE file.
47dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
57dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport logging
67dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport os
77dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport sys
87dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
97dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch_BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch_FIND_RUNTIME_SYMBOLS_PATH = os.path.join(_BASE_PATH,
117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                                          os.pardir,
127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                                          'find_runtime_symbols')
13a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)_TOOLS_LINUX_PATH = os.path.join(_BASE_PATH,
14a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                                 os.pardir,
15a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                                 'linux')
167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochsys.path.append(_FIND_RUNTIME_SYMBOLS_PATH)
17a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)sys.path.append(_TOOLS_LINUX_PATH)
187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
197dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport find_runtime_symbols
207dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport prepare_symbol_info
21a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)import procfs  # pylint: disable=W0611,F0401
227dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
237dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochLOGGER = logging.getLogger('dmprof')
247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
257dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFUNCTION_SYMBOLS = find_runtime_symbols.FUNCTION_SYMBOLS
267dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochSOURCEFILE_SYMBOLS = find_runtime_symbols.SOURCEFILE_SYMBOLS
277dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochTYPEINFO_SYMBOLS = find_runtime_symbols.TYPEINFO_SYMBOLS
287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochclass SymbolDataSources(object):
317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """Manages symbol data sources in a process.
327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  so on.  They are collected into a directory '|prefix|.symmap' from the binary
357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.
367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  Binaries are not mandatory to profile.  The prepared data sources work in
387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  place of the binary even if the binary has been overwritten with another
397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  binary.
407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  Note that loading the symbol data sources takes a long time.  They are often
427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  very big.  So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
437dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  which caches actually used symbols.
447dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """
457dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def __init__(self, prefix, alternative_dirs=None):
467dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._prefix = prefix
477dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._prepared_symbol_data_sources_path = None
487dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._loaded_symbol_data_sources = None
497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._alternative_dirs = alternative_dirs or {}
507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
517dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def prepare(self):
527dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """Prepares symbol data sources by extracting mapping from a binary.
537dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
547dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    The prepared symbol data sources are stored in a directory.  The directory
557dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    name is stored in |self._prepared_symbol_data_sources_path|.
567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
577dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    Returns:
587dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        True if succeeded.
597dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """
607dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    LOGGER.info('Preparing symbol mapping...')
617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._prepared_symbol_data_sources_path, used_tempdir = (
627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        prepare_symbol_info.prepare_symbol_info(
637dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            self._prefix + '.maps',
647dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            output_dir_path=self._prefix + '.symmap',
657dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            alternative_dirs=self._alternative_dirs,
667dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            use_tempdir=True,
677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            use_source_file_name=True))
687dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if self._prepared_symbol_data_sources_path:
697dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      LOGGER.info('  Prepared symbol mapping.')
707dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      if used_tempdir:
717dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        LOGGER.warn('  Using a temporary directory for symbol mapping.')
727dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        LOGGER.warn('  Delete it by yourself.')
737dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        LOGGER.warn('  Or, move the directory by yourself to use it later.')
747dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return True
757dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    else:
767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      LOGGER.warn('  Failed to prepare symbol mapping.')
777dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return False
787dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
797dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def get(self):
807dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """Returns the prepared symbol data sources.
817dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    Returns:
837dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        The prepared symbol data sources.  None if failed.
847dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """
857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if not self._prepared_symbol_data_sources_path and not self.prepare():
867dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return None
877dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if not self._loaded_symbol_data_sources:
887dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      LOGGER.info('Loading symbol mapping...')
897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      self._loaded_symbol_data_sources = (
907dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch          find_runtime_symbols.RuntimeSymbolsInProcess.load(
917dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch              self._prepared_symbol_data_sources_path))
927dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    return self._loaded_symbol_data_sources
937dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
947dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def path(self):
957dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """Returns the path of the prepared symbol data sources if possible."""
967dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if not self._prepared_symbol_data_sources_path and not self.prepare():
977dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return None
987dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    return self._prepared_symbol_data_sources_path
997dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1007dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1017dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochclass SymbolFinder(object):
1027dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """Finds corresponding symbols from addresses.
1037dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1047dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  This class does only 'find()' symbols from a specified |address_list|.
1057dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  It is introduced to make a finder mockable.
1067dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """
1077dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def __init__(self, symbol_type, symbol_data_sources):
1087dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._symbol_type = symbol_type
1097dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._symbol_data_sources = symbol_data_sources
1107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def find(self, address_list):
1127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    return find_runtime_symbols.find_runtime_symbols(
1137dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        self._symbol_type, self._symbol_data_sources.get(), address_list)
1147dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochclass SymbolMappingCache(object):
1177dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """Caches mapping from actually used addresses to symbols.
1187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1197dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  'update()' updates the cache from the original symbol data sources via
1207dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  'SymbolFinder'.  Symbols can be looked up by the method 'lookup()'.
1217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """
1227dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def __init__(self):
1237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._symbol_mapping_caches = {
1247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        FUNCTION_SYMBOLS: {},
1257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        SOURCEFILE_SYMBOLS: {},
1267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        TYPEINFO_SYMBOLS: {},
1277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        }
1287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def update(self, symbol_type, bucket_set, symbol_finder, cache_f):
1307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """Updates symbol mapping cache on memory and in a symbol cache file.
1317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    It reads cached symbol mapping from a symbol cache file |cache_f| if it
1337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    exists.  Unresolved addresses are then resolved and added to the cache
1347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    both on memory and in the symbol cache file with using 'SymbolFinder'.
1357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    A cache file is formatted as follows:
1377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      <Address> <Symbol>
1387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      <Address> <Symbol>
1397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      <Address> <Symbol>
1407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      ...
1417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    Args:
1437dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        symbol_type: A type of symbols to update.  It should be one of
1447dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
1457dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        bucket_set: A BucketSet object.
1467dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        symbol_finder: A SymbolFinder object to find symbols.
1477dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        cache_f: A readable and writable IO object of the symbol cache file.
1487dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """
1497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    cache_f.seek(0, os.SEEK_SET)
1507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self._load(cache_f, symbol_type)
1517dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1527dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    unresolved_addresses = sorted(
1537dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        address for address in bucket_set.iter_addresses(symbol_type)
1547dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        if address not in self._symbol_mapping_caches[symbol_type])
1557dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if not unresolved_addresses:
1577dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      LOGGER.info('No need to resolve any more addresses.')
1587dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return
1597dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1607dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    cache_f.seek(0, os.SEEK_END)
1617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    LOGGER.info('Loading %d unresolved addresses.' %
1627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                len(unresolved_addresses))
1637dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    symbol_dict = symbol_finder.find(unresolved_addresses)
1647dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1657dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    for address, symbol in symbol_dict.iteritems():
1667dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      stripped_symbol = symbol.strip() or '?'
1677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      self._symbol_mapping_caches[symbol_type][address] = stripped_symbol
1687dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      cache_f.write('%x %s\n' % (address, stripped_symbol))
1697dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1707dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def lookup(self, symbol_type, address):
1717dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """Looks up a symbol for a given |address|.
1727dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1737dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    Args:
1747dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        symbol_type: A type of symbols to update.  It should be one of
1757dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
1767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        address: An integer that represents an address.
1777dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1787dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    Returns:
1797dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        A string that represents a symbol.
1807dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    """
1817dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    return self._symbol_mapping_caches[symbol_type].get(address)
1827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1837dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def _load(self, cache_f, symbol_type):
1847dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    try:
1857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      for line in cache_f:
1867dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        items = line.rstrip().split(None, 1)
1877dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        if len(items) == 1:
1887dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch          items.append('??')
1897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]
1907dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      LOGGER.info('Loaded %d entries from symbol cache.' %
1917dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                     len(self._symbol_mapping_caches[symbol_type]))
1927dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    except IOError as e:
1937dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      LOGGER.info('The symbol cache file is invalid: %s' % e)
194