1// Copyright (c) 2010 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7//     * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9//     * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13//     * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31// This file implements the google_breakpad::StabsReader class.
32// See stabs_reader.h.
33
34#include "common/stabs_reader.h"
35
36#include <assert.h>
37#include <stab.h>
38#include <string.h>
39
40#include <string>
41
42#include "common/using_std_string.h"
43
44using std::vector;
45
46namespace google_breakpad {
47
48StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
49                                          bool big_endian, size_t value_size)
50    : value_size_(value_size), cursor_(buffer, big_endian) {
51  // Actually, we could handle weird sizes just fine, but they're
52  // probably mistakes --- expressed in bits, say.
53  assert(value_size == 4 || value_size == 8);
54  entry_.index = 0;
55  Fetch();
56}
57
58void StabsReader::EntryIterator::Fetch() {
59  cursor_
60      .Read(4, false, &entry_.name_offset)
61      .Read(1, false, &entry_.type)
62      .Read(1, false, &entry_.other)
63      .Read(2, false, &entry_.descriptor)
64      .Read(value_size_, false, &entry_.value);
65  entry_.at_end = !cursor_;
66}
67
68StabsReader::StabsReader(const uint8_t *stab,    size_t stab_size,
69                         const uint8_t *stabstr, size_t stabstr_size,
70                         bool big_endian, size_t value_size, bool unitized,
71                         StabsHandler *handler)
72    : entries_(stab, stab_size),
73      strings_(stabstr, stabstr_size),
74      iterator_(&entries_, big_endian, value_size),
75      unitized_(unitized),
76      handler_(handler),
77      string_offset_(0),
78      next_cu_string_offset_(0),
79      current_source_file_(NULL) { }
80
81const char *StabsReader::SymbolString() {
82  ptrdiff_t offset = string_offset_ + iterator_->name_offset;
83  if (offset < 0 || (size_t) offset >= strings_.Size()) {
84    handler_->Warning("symbol %d: name offset outside the string section\n",
85                      iterator_->index);
86    // Return our null string, to keep our promise about all names being
87    // taken from the string section.
88    offset = 0;
89  }
90  return reinterpret_cast<const char *>(strings_.start + offset);
91}
92
93bool StabsReader::Process() {
94  while (!iterator_->at_end) {
95    if (iterator_->type == N_SO) {
96      if (! ProcessCompilationUnit())
97        return false;
98    } else if (iterator_->type == N_UNDF && unitized_) {
99      // In unitized STABS (including Linux STABS, and pretty much anything
100      // else that puts STABS data in sections), at the head of each
101      // compilation unit's entries there is an N_UNDF stab giving the
102      // number of symbols in the compilation unit, and the number of bytes
103      // that compilation unit's strings take up in the .stabstr section.
104      // Each CU's strings are separate; the n_strx values are offsets
105      // within the current CU's portion of the .stabstr section.
106      //
107      // As an optimization, the GNU linker combines all the
108      // compilation units into one, with a single N_UNDF at the
109      // beginning. However, other linkers, like Gold, do not perform
110      // this optimization.
111      string_offset_ = next_cu_string_offset_;
112      next_cu_string_offset_ = iterator_->value;
113      ++iterator_;
114    }
115#if defined(HAVE_MACH_O_NLIST_H)
116    // Export symbols in Mach-O binaries look like this.
117    // This is necessary in order to be able to dump symbols
118    // from OS X system libraries.
119    else if ((iterator_->type & N_STAB) == 0 &&
120               (iterator_->type & N_TYPE) == N_SECT) {
121      ProcessExtern();
122    }
123#endif
124    else {
125      ++iterator_;
126    }
127  }
128  return true;
129}
130
131bool StabsReader::ProcessCompilationUnit() {
132  assert(!iterator_->at_end && iterator_->type == N_SO);
133
134  // There may be an N_SO entry whose name ends with a slash,
135  // indicating the directory in which the compilation occurred.
136  // The build directory defaults to NULL.
137  const char *build_directory = NULL;
138  {
139    const char *name = SymbolString();
140    if (name[0] && name[strlen(name) - 1] == '/') {
141      build_directory = name;
142      ++iterator_;
143    }
144  }
145
146  // We expect to see an N_SO entry with a filename next, indicating
147  // the start of the compilation unit.
148  {
149    if (iterator_->at_end || iterator_->type != N_SO)
150      return true;
151    const char *name = SymbolString();
152    if (name[0] == '\0') {
153      // This seems to be a stray end-of-compilation-unit marker;
154      // consume it, but don't report the end, since we didn't see a
155      // beginning.
156      ++iterator_;
157      return true;
158    }
159    current_source_file_ = name;
160  }
161
162  if (! handler_->StartCompilationUnit(current_source_file_,
163                                       iterator_->value,
164                                       build_directory))
165    return false;
166
167  ++iterator_;
168
169  // The STABS documentation says that some compilers may emit
170  // additional N_SO entries with names immediately following the
171  // first, and that they should be ignored.  However, the original
172  // Breakpad STABS reader doesn't ignore them, so we won't either.
173
174  // Process the body of the compilation unit, up to the next N_SO.
175  while (!iterator_->at_end && iterator_->type != N_SO) {
176    if (iterator_->type == N_FUN) {
177      if (! ProcessFunction())
178        return false;
179    } else if (iterator_->type == N_SLINE) {
180      // Mac OS X STABS place SLINE records before functions.
181      Line line;
182      // The value of an N_SLINE entry that appears outside a function is
183      // the absolute address of the line.
184      line.address = iterator_->value;
185      line.filename = current_source_file_;
186      // The n_desc of a N_SLINE entry is the line number.  It's a
187      // signed 16-bit field; line numbers from 32768 to 65535 are
188      // stored as n-65536.
189      line.number = (uint16_t) iterator_->descriptor;
190      queued_lines_.push_back(line);
191      ++iterator_;
192    } else if (iterator_->type == N_SOL) {
193      current_source_file_ = SymbolString();
194      ++iterator_;
195    } else {
196      // Ignore anything else.
197      ++iterator_;
198    }
199  }
200
201  // An N_SO with an empty name indicates the end of the compilation
202  // unit.  Default to zero.
203  uint64_t ending_address = 0;
204  if (!iterator_->at_end) {
205    assert(iterator_->type == N_SO);
206    const char *name = SymbolString();
207    if (name[0] == '\0') {
208      ending_address = iterator_->value;
209      ++iterator_;
210    }
211  }
212
213  if (! handler_->EndCompilationUnit(ending_address))
214    return false;
215
216  queued_lines_.clear();
217
218  return true;
219}
220
221bool StabsReader::ProcessFunction() {
222  assert(!iterator_->at_end && iterator_->type == N_FUN);
223
224  uint64_t function_address = iterator_->value;
225  // The STABS string for an N_FUN entry is the name of the function,
226  // followed by a colon, followed by type information for the
227  // function.  We want to pass the name alone to StartFunction.
228  const char *stab_string = SymbolString();
229  const char *name_end = strchr(stab_string, ':');
230  if (! name_end)
231    name_end = stab_string + strlen(stab_string);
232  string name(stab_string, name_end - stab_string);
233  if (! handler_->StartFunction(name, function_address))
234    return false;
235  ++iterator_;
236
237  // If there were any SLINE records given before the function, report them now.
238  for (vector<Line>::const_iterator it = queued_lines_.begin();
239       it != queued_lines_.end(); it++) {
240    if (!handler_->Line(it->address, it->filename, it->number))
241      return false;
242  }
243  queued_lines_.clear();
244
245  while (!iterator_->at_end) {
246    if (iterator_->type == N_SO || iterator_->type == N_FUN)
247      break;
248    else if (iterator_->type == N_SLINE) {
249      // The value of an N_SLINE entry is the offset of the line from
250      // the function's start address.
251      uint64_t line_address = function_address + iterator_->value;
252      // The n_desc of a N_SLINE entry is the line number.  It's a
253      // signed 16-bit field; line numbers from 32768 to 65535 are
254      // stored as n-65536.
255      uint16_t line_number = iterator_->descriptor;
256      if (! handler_->Line(line_address, current_source_file_, line_number))
257        return false;
258      ++iterator_;
259    } else if (iterator_->type == N_SOL) {
260      current_source_file_ = SymbolString();
261      ++iterator_;
262    } else
263      // Ignore anything else.
264      ++iterator_;
265  }
266
267  // We've reached the end of the function. See if we can figure out its
268  // ending address.
269  uint64_t ending_address = 0;
270  if (!iterator_->at_end) {
271    assert(iterator_->type == N_SO || iterator_->type == N_FUN);
272    if (iterator_->type == N_FUN) {
273      const char *symbol_name = SymbolString();
274      if (symbol_name[0] == '\0') {
275        // An N_FUN entry with no name is a terminator for this function;
276        // its value is the function's size.
277        ending_address = function_address + iterator_->value;
278        ++iterator_;
279      } else {
280        // An N_FUN entry with a name is the next function, and we can take
281        // its value as our ending address. Don't advance the iterator, as
282        // we'll use this symbol to start the next function as well.
283        ending_address = iterator_->value;
284      }
285    } else {
286      // An N_SO entry could be an end-of-compilation-unit marker, or the
287      // start of the next compilation unit, but in either case, its value
288      // is our ending address. We don't advance the iterator;
289      // ProcessCompilationUnit will decide what to do with this symbol.
290      ending_address = iterator_->value;
291    }
292  }
293
294  if (! handler_->EndFunction(ending_address))
295    return false;
296
297  return true;
298}
299
300bool StabsReader::ProcessExtern() {
301#if defined(HAVE_MACH_O_NLIST_H)
302  assert(!iterator_->at_end &&
303         (iterator_->type & N_STAB) == 0 &&
304         (iterator_->type & N_TYPE) == N_SECT);
305#endif
306
307  // TODO(mark): only do symbols in the text section?
308  if (!handler_->Extern(SymbolString(), iterator_->value))
309    return false;
310
311  ++iterator_;
312  return true;
313}
314
315} // namespace google_breakpad
316