1# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import os
7import re
8from autotest_lib.client.common_lib import utils as client_utils
9from autotest_lib.client.common_lib.cros import dev_server
10from autotest_lib.client.common_lib.cros import retry
11from autotest_lib.client.common_lib.cros.graphite import autotest_stats
12from autotest_lib.client.cros import constants
13from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
14from autotest_lib.server import utils
15
16
17CRASH_SERVER_OVERLOAD = 'crash_server_overload'
18CRASH_SERVER_FOUND = 'crash_server_found'
19SYMBOLICATE_TIMEDOUT = 'symbolicate_timedout'
20
21timer = autotest_stats.Timer('crash_collect')
22
23def generate_minidump_stacktrace(minidump_path):
24    """
25    Generates a stacktrace for the specified minidump.
26
27    This function expects the debug symbols to reside under:
28        /build/<board>/usr/lib/debug
29
30    @param minidump_path: absolute path to minidump to by symbolicated.
31    @raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
32    """
33    symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
34    logging.info('symbol_dir: %s', symbol_dir)
35    client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
36                     (minidump_path, symbol_dir, minidump_path))
37
38
39@timer.decorate
40def symbolicate_minidump_with_devserver(minidump_path, resultdir):
41    """
42    Generates a stack trace for the specified minidump by consulting devserver.
43
44    This function assumes the debug symbols have been staged on the devserver.
45
46    @param minidump_path: absolute path to minidump to by symbolicated.
47    @param resultdir: server job's result directory.
48    @raise DevServerException upon failure, HTTP or otherwise.
49    """
50    # First, look up what build we tested.  If we can't find this, we can't
51    # get the right debug symbols, so we might as well give up right now.
52    keyvals = client_utils.read_keyval(resultdir)
53    if JOB_BUILD_KEY not in keyvals:
54        raise dev_server.DevServerException(
55            'Cannot determine build being tested.')
56
57    crashserver_name = dev_server.get_least_loaded_devserver(
58            devserver_type=dev_server.CrashServer)
59    if not crashserver_name:
60        autotest_stats.Counter(CRASH_SERVER_OVERLOAD).increment()
61        raise dev_server.DevServerException(
62                'No crash server has the capacity to symbolicate the dump.')
63    else:
64        autotest_stats.Counter(CRASH_SERVER_FOUND).increment()
65    devserver = dev_server.CrashServer(crashserver_name)
66    trace_text = devserver.symbolicate_dump(
67        minidump_path, keyvals[JOB_BUILD_KEY])
68    if not trace_text:
69        raise dev_server.DevServerException('Unknown error!!')
70    with open(minidump_path + '.txt', 'w') as trace_file:
71        trace_file.write(trace_text)
72
73
74def find_and_generate_minidump_stacktraces(host_resultdir):
75    """
76    Finds all minidump files and generates a stack trace for each.
77
78    Enumerates all files under the test results directory (recursively)
79    and generates a stack trace file for the minidumps.  Minidump files are
80    identified as files with .dmp extension.  The stack trace filename is
81    composed by appending the .txt extension to the minidump filename.
82
83    @param host_resultdir: Directory to walk looking for dmp files.
84
85    @returns The list of generated minidumps.
86    """
87    minidumps = []
88    for dir, subdirs, files in os.walk(host_resultdir):
89        for file in files:
90            if not file.endswith('.dmp'):
91                continue
92            minidump = os.path.join(dir, file)
93
94            # First, try to symbolicate locally.
95            try:
96                generate_minidump_stacktrace(minidump)
97                logging.info('Generated stack trace for dump %s', minidump)
98                minidumps.append(minidump)
99                continue
100            except client_utils.error.CmdError as err:
101                logging.warning('Failed to generate stack trace locally for '
102                             'dump %s (rc=%d):\n%r',
103                             minidump, err.result_obj.exit_status, err)
104
105            # If that did not succeed, try to symbolicate using the dev server.
106            try:
107                logging.info('Generating stack trace for %s', minidump)
108                minidumps.append(minidump)
109                is_timeout, _ = retry.timeout(
110                        symbolicate_minidump_with_devserver,
111                        args=(minidump, host_resultdir),
112                        timeout_sec=600)
113                if is_timeout:
114                    logging.warn('Generating stack trace is timed out for dump '
115                                 '%s', minidump)
116                    autotest_stats.Counter(SYMBOLICATE_TIMEDOUT).increment()
117                else:
118                    logging.info('Generated stack trace for dump %s', minidump)
119                continue
120            except dev_server.DevServerException as e:
121                logging.warning('Failed to generate stack trace on devserver for '
122                             'dump %s:\n%r', minidump, e)
123    return minidumps
124
125
126def fetch_orphaned_crashdumps(host, host_resultdir):
127    """
128    Copy all of the crashes in the crash directory over to the results folder.
129
130    @param host A host object of the device we're to pull crashes from.
131    @param host_resultdir The result directory for this host for this test run.
132    @return The list of minidumps that we pulled back from the host.
133    """
134    minidumps = []
135    for file in host.list_files_glob(os.path.join(constants.CRASH_DIR, '*')):
136        logging.info('Collecting %s...', file)
137        host.get_file(file, host_resultdir, preserve_perm=False)
138        minidumps.append(file)
139    return minidumps
140
141
142def get_site_crashdumps(host, test_start_time):
143    """
144    Copy all of the crashdumps from a host to the results directory.
145
146    @param host The host object from which to pull crashes
147    @param test_start_time When the test we just ran started.
148    @return A list of all the minidumps
149    """
150    host_resultdir = getattr(getattr(host, 'job', None), 'resultdir', None)
151    infodir = os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
152    if not os.path.exists(infodir):
153        os.mkdir(infodir)
154
155    # TODO(milleral): handle orphans differently. crosbug.com/38202
156    try:
157        orphans = fetch_orphaned_crashdumps(host, infodir)
158    except Exception as e:
159        orphans = []
160        logging.warning('Collection of orphaned crash dumps failed %s', e)
161
162    minidumps = find_and_generate_minidump_stacktraces(host_resultdir)
163
164    # Record all crashdumps in status.log of the job:
165    # - If one server job runs several client jobs we will only record
166    # crashdumps in the status.log of the high level server job.
167    # - We will record these crashdumps whether or not we successfully
168    # symbolicate them.
169    if host.job and minidumps or orphans:
170        host.job.record('INFO', None, None, 'Start crashcollection record')
171        for minidump in minidumps:
172            host.job.record('INFO', None, 'New Crash Dump', minidump)
173        for orphan in orphans:
174            host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
175        host.job.record('INFO', None, None, 'End crashcollection record')
176
177    orphans.extend(minidumps)
178
179    for minidump in orphans:
180        report_bug_from_crash(host, minidump)
181
182    return orphans
183
184
185def find_package_of(host, exec_name):
186    """
187    Find the package that an executable came from.
188
189    @param host A host object that has the executable.
190    @param exec_name Name of or path to executable.
191    @return The name of the package that installed the executable.
192    """
193    # Run "portageq owners" on "host" to determine which package owns
194    # "exec_name."  Portageq queue output consists of package names followed
195    # tab-prefixed path names.  For example, owners of "python:"
196    #
197    # sys-devel/gdb-7.7.1-r2
198    #         /usr/share/gdb/python
199    # chromeos-base/dev-install-0.0.1-r711
200    #         /usr/bin/python
201    # dev-lang/python-2.7.3-r7
202    #         /etc/env.d/python
203    #
204    # This gets piped into "xargs stat" to annotate each line with
205    # information about the path, so we later can consider only packages
206    # with executable files.  After annotation the above looks like:
207    #
208    # stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
209    # stat: cannot stat '/usr/share/gdb/python': ...
210    # stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
211    # 755 -rwxr-xr-x /usr/bin/python
212    # stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
213    # 755 drwxr-xr-x /etc/env.d/python
214    #
215    # Package names are surrounded by "@@@" to facilitate parsing.  Lines
216    # starting with an octal number were successfully annotated, because
217    # the path existed on "host."
218    # The above is then parsed to find packages which contain executable files
219    # (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
220    #
221    # TODO(milleral): portageq can show scary looking error messages
222    # in the debug logs via stderr. We only look at stdout, so those
223    # get filtered, but it would be good to silence them.
224    cmd = ('portageq owners / ' + exec_name +
225            r'| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
226            r'| tr \\n \\0'
227            ' | xargs -0 -r stat -L -c "%a %A %n" 2>&1')
228    portageq = host.run(cmd, ignore_status=True)
229
230    # Parse into a set of names of packages containing an executable file.
231    packages = set()
232    pkg = ''
233    pkg_re = re.compile('@@@ (.*) @@@')
234    path_re = re.compile('^([0-7]{3,}) (.)')
235    for line in portageq.stdout.splitlines():
236        match = pkg_re.search(line)
237        if match:
238            pkg = match.group(1)
239            continue
240        match = path_re.match(line)
241        if match:
242            isexec = int(match.group(1), 8) & 0o111
243            isfile = match.group(2) == '-'
244            if pkg and isexec and isfile:
245                packages.add(pkg)
246
247    # If exactly one package found it must be the one we want, return it.
248    if len(packages) == 1:
249        return packages.pop()
250
251    # TODO(milleral): Decide if it really is an error if not exactly one
252    # package is found.
253    # It is highly questionable as to if this should be left in the
254    # production version of this code or not.
255    if len(packages) == 0:
256        logging.warning('find_package_of() found no packages for "%s"',
257                        exec_name)
258    else:
259        logging.warning('find_package_of() found multiple packages for "%s": '
260                        '%s', exec_name, ', '.join(packages))
261    return ''
262
263
264def report_bug_from_crash(host, minidump_path):
265    """
266    Given a host to query and a minidump, file a bug about the crash.
267
268    @param host A host object that is where the dump came from
269    @param minidump_path The path to the dump file that should be reported.
270    """
271    # TODO(milleral): Once this has actually been tested, remove the
272    # try/except. In the meantime, let's make sure nothing dies because of
273    # the fact that this code isn't very heavily tested.
274    try:
275        meta_path = os.path.splitext(minidump_path)[0] + '.meta'
276        with open(meta_path, 'r') as f:
277            for line in f.readlines():
278                parts = line.split('=')
279                if parts[0] == 'exec_name':
280                    package = find_package_of(host, parts[1].strip())
281                    if not package:
282                        package = '<unknown package>'
283                    logging.info('Would report crash on %s.', package)
284                    break
285    except Exception as e:
286        logging.warning('Crash detection failed with: %s', e)
287