stack_core.py revision 7dbb3d5cf0c15f500944d211057644d6a2f37371
1#!/usr/bin/env python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""stack symbolizes native crash dumps."""
18
19import re
20
21import symbol
22
23def PrintTraceLines(trace_lines):
24  """Print back trace."""
25  maxlen = max(map(lambda tl: len(tl[1]), trace_lines))
26  print
27  print "Stack Trace:"
28  print "  RELADDR   " + "FUNCTION".ljust(maxlen) + "  FILE:LINE"
29  for tl in trace_lines:
30    (addr, symbol_with_offset, location) = tl
31    print "  %8s  %s  %s" % (addr, symbol_with_offset.ljust(maxlen), location)
32  return
33
34
35def PrintValueLines(value_lines):
36  """Print stack data values."""
37  maxlen = max(map(lambda tl: len(tl[2]), value_lines))
38  print
39  print "Stack Data:"
40  print "  ADDR      VALUE     " + "FUNCTION".ljust(maxlen) + "  FILE:LINE"
41  for vl in value_lines:
42    (addr, value, symbol_with_offset, location) = vl
43    print "  %8s  %8s  %s  %s" % (addr, value, symbol_with_offset.ljust(maxlen), location)
44  return
45
46UNKNOWN = "<unknown>"
47HEAP = "[heap]"
48STACK = "[stack]"
49
50
51def PrintOutput(trace_lines, value_lines, more_info):
52  if trace_lines:
53    PrintTraceLines(trace_lines)
54  if value_lines:
55    # TODO(cjhopman): it seems that symbol.SymbolInformation always fails to
56    # find information for addresses in value_lines in chrome libraries, and so
57    # value_lines have little value to us and merely clutter the output.
58    # Since information is sometimes contained in these lines (from system
59    # libraries), don't completely disable them.
60    if more_info:
61      PrintValueLines(value_lines)
62
63def PrintDivider():
64  print
65  print "-----------------------------------------------------\n"
66
67def ConvertTrace(lines, more_info):
68  """Convert strings containing native crash to a stack."""
69  process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)")
70  signal_line = re.compile("(signal [0-9]+ \(.*\).*)")
71  register_line = re.compile("(([ ]*[0-9a-z]{2} [0-9a-f]{8}){4})")
72  thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-")
73  dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
74  dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
75  # Note that both trace and value line matching allow for variable amounts of
76  # whitespace (e.g. \t). This is because the we want to allow for the stack
77  # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
78  # strips out double spaces that are found in tombsone files and logcat output.
79  #
80  # Examples of matched trace lines include lines from tombstone files like:
81  #   #00  pc 001cf42e  /data/data/com.my.project/lib/libmyproject.so
82  #   #00  pc 001cf42e  /data/data/com.my.project/lib/libmyproject.so (symbol)
83  # Or lines from AndroidFeedback crash report system logs like:
84  #   03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
85  # Please note the spacing differences.
86  trace_line = re.compile("(.*)\#(?P<frame>[0-9]+)[ \t]+(..)[ \t]+(0x)?(?P<address>[0-9a-f]{0,8})[ \t]+(?P<lib>[^\r\n \t]*)(?P<symbol_present> \((?P<symbol_name>.*)\))?")  # pylint: disable-msg=C6310
87  # Examples of matched value lines include:
88  #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so
89  #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so (symbol)
90  #   03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
91  # Again, note the spacing differences.
92  value_line = re.compile("(.*)([0-9a-f]{8})[ \t]+([0-9a-f]{8})[ \t]+([^\r\n \t]*)( \((.*)\))?")
93  # Lines from 'code around' sections of the output will be matched before
94  # value lines because otheriwse the 'code around' sections will be confused as
95  # value lines.
96  #
97  # Examples include:
98  #   801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
99  #   03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
100  code_line = re.compile("(.*)[ \t]*[a-f0-9]{8}[ \t]*[a-f0-9]{8}[ \t]*[a-f0-9]{8}[ \t]*[a-f0-9]{8}[ \t]*[a-f0-9]{8}[ \t]*[ \r\n]")  # pylint: disable-msg=C6310
101
102  trace_lines = []
103  value_lines = []
104  last_frame = -1
105
106  # It is faster to get symbol information with a single call rather than with
107  # separate calls for each line. Since symbol.SymbolInformation caches results,
108  # we can extract all the addresses that we will want symbol information for
109  # from the log and call symbol.SymbolInformation so that the results are
110  # cached in the following lookups.
111  code_addresses = {}
112  for ln in lines:
113    line = unicode(ln, errors='ignore')
114    lib, address = None, None
115
116    match = trace_line.match(line)
117    if match:
118      address, lib = match.group('address', 'lib')
119
120    match = value_line.match(line)
121    if match and not code_line.match(line):
122      (_0, _1, address, lib, _2, _3) = match.groups()
123
124    if lib:
125      code_addresses.setdefault(lib, set()).add(address)
126
127  for lib in code_addresses:
128    symbol.SymbolInformationForSet(
129        symbol.TranslateLibPath(lib), code_addresses[lib], more_info)
130
131  for ln in lines:
132    # AndroidFeedback adds zero width spaces into its crash reports. These
133    # should be removed or the regular expresssions will fail to match.
134    line = unicode(ln, errors='ignore')
135    process_header = process_info_line.search(line)
136    signal_header = signal_line.search(line)
137    register_header = register_line.search(line)
138    thread_header = thread_line.search(line)
139    dalvik_jni_thread_header = dalvik_jni_thread_line.search(line)
140    dalvik_native_thread_header = dalvik_native_thread_line.search(line)
141    if process_header or signal_header or register_header or thread_header \
142        or dalvik_jni_thread_header or dalvik_native_thread_header:
143      if trace_lines or value_lines:
144        PrintOutput(trace_lines, value_lines, more_info)
145        PrintDivider()
146        trace_lines = []
147        value_lines = []
148        last_frame = -1
149      if process_header:
150        print process_header.group(1)
151      if signal_header:
152        print signal_header.group(1)
153      if register_header:
154        print register_header.group(1)
155      if thread_header:
156        print thread_header.group(1)
157      if dalvik_jni_thread_header:
158        print dalvik_jni_thread_header.group(1)
159      if dalvik_native_thread_header:
160        print dalvik_native_thread_header.group(1)
161      continue
162    if trace_line.match(line):
163      match = trace_line.match(line)
164      frame, code_addr, area, symbol_present, symbol_name = match.group(
165          'frame', 'address', 'lib', 'symbol_present', 'symbol_name')
166
167      if frame <= last_frame and (trace_lines or value_lines):
168        PrintOutput(trace_lines, value_lines, more_info)
169        PrintDivider()
170        trace_lines = []
171        value_lines = []
172      last_frame = frame
173
174      if area == UNKNOWN or area == HEAP or area == STACK:
175        trace_lines.append((code_addr, "", area))
176      else:
177        # If a calls b which further calls c and c is inlined to b, we want to
178        # display "a -> b -> c" in the stack trace instead of just "a -> c"
179        info = symbol.SymbolInformation(area, code_addr, more_info)
180        nest_count = len(info) - 1
181        for (source_symbol, source_location, object_symbol_with_offset) in info:
182          if not source_symbol:
183            if symbol_present:
184              source_symbol = symbol.CallCppFilt(symbol_name)
185            else:
186              source_symbol = UNKNOWN
187          if not source_location:
188            source_location = area
189          if nest_count > 0:
190            nest_count = nest_count - 1
191            trace_lines.append(("v------>", source_symbol, source_location))
192          else:
193            if not object_symbol_with_offset:
194              object_symbol_with_offset = source_symbol
195            trace_lines.append((code_addr,
196                                object_symbol_with_offset,
197                                source_location))
198    if code_line.match(line):
199      # Code lines should be ignored. If this were exluded the 'code around'
200      # sections would trigger value_line matches.
201      continue;
202    if value_line.match(line):
203      match = value_line.match(line)
204      (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
205      if area == UNKNOWN or area == HEAP or area == STACK or not area:
206        value_lines.append((addr, value, "", area))
207      else:
208        info = symbol.SymbolInformation(area, value, more_info)
209        (source_symbol, source_location, object_symbol_with_offset) = info.pop()
210        if not source_symbol:
211          if symbol_present:
212            source_symbol = symbol.CallCppFilt(symbol_name)
213          else:
214            source_symbol = UNKNOWN
215        if not source_location:
216          source_location = area
217        if not object_symbol_with_offset:
218          object_symbol_with_offset = source_symbol
219        value_lines.append((addr,
220                            value,
221                            object_symbol_with_offset,
222                            source_location))
223
224  PrintOutput(trace_lines, value_lines, more_info)
225