1// -*- mode: c++ -*-
2
3// Copyright (c) 2010 Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// Add DWARF debugging information to a Breakpad symbol file. This
35// file defines the DwarfCUToModule class, which accepts parsed DWARF
36// data and populates a google_breakpad::Module with the results; the
37// Module can then write its contents as a Breakpad symbol file.
38
39#ifndef COMMON_LINUX_DWARF_CU_TO_MODULE_H__
40#define COMMON_LINUX_DWARF_CU_TO_MODULE_H__
41
42#include <string>
43
44#include "common/language.h"
45#include "common/module.h"
46#include "common/dwarf/bytereader.h"
47#include "common/dwarf/dwarf2diehandler.h"
48#include "common/dwarf/dwarf2reader.h"
49#include "common/scoped_ptr.h"
50#include "common/using_std_string.h"
51
52namespace google_breakpad {
53
54using dwarf2reader::DwarfAttribute;
55using dwarf2reader::DwarfForm;
56using dwarf2reader::DwarfLanguage;
57using dwarf2reader::DwarfTag;
58
59// Populate a google_breakpad::Module with DWARF debugging information.
60//
61// An instance of this class can be provided as a handler to a
62// dwarf2reader::DIEDispatcher, which can in turn be a handler for a
63// dwarf2reader::CompilationUnit DWARF parser. The handler uses the results
64// of parsing to populate a google_breakpad::Module with source file,
65// function, and source line information.
66class DwarfCUToModule: public dwarf2reader::RootDIEHandler {
67  struct FilePrivate;
68 public:
69  // Information global to the DWARF-bearing file we are processing,
70  // for use by DwarfCUToModule. Each DwarfCUToModule instance deals
71  // with a single compilation unit within the file, but information
72  // global to the whole file is held here. The client is responsible
73  // for filling it in appropriately (except for the 'file_private'
74  // field, which the constructor and destructor take care of), and
75  // then providing it to the DwarfCUToModule instance for each
76  // compilation unit we process in that file. Set HANDLE_INTER_CU_REFS
77  // to true to handle debugging symbols with DW_FORM_ref_addr entries.
78  class FileContext {
79   public:
80    FileContext(const string &filename,
81                Module *module,
82                bool handle_inter_cu_refs);
83    ~FileContext();
84
85    // Add CONTENTS of size LENGTH to the section map as NAME.
86    void AddSectionToSectionMap(const string& name,
87                                const char* contents,
88                                uint64 length);
89
90    // Clear the section map for testing.
91    void ClearSectionMapForTest();
92
93    const dwarf2reader::SectionMap& section_map() const;
94
95   private:
96    friend class DwarfCUToModule;
97
98    // Clears all the Specifications if HANDLE_INTER_CU_REFS_ is false.
99    void ClearSpecifications();
100
101    // Given an OFFSET and a CU that starts at COMPILATION_UNIT_START, returns
102    // true if this is an inter-compilation unit reference that is not being
103    // handled.
104    bool IsUnhandledInterCUReference(uint64 offset,
105                                     uint64 compilation_unit_start) const;
106
107    // The name of this file, for use in error messages.
108    const string filename_;
109
110    // A map of this file's sections, used for finding other DWARF
111    // sections that the .debug_info section may refer to.
112    dwarf2reader::SectionMap section_map_;
113
114    // The Module to which we're contributing definitions.
115    Module *module_;
116
117    // True if we are handling references between compilation units.
118    const bool handle_inter_cu_refs_;
119
120    // Inter-compilation unit data used internally by the handlers.
121    scoped_ptr<FilePrivate> file_private_;
122  };
123
124  // An abstract base class for handlers that handle DWARF line data
125  // for DwarfCUToModule. DwarfCUToModule could certainly just use
126  // dwarf2reader::LineInfo itself directly, but decoupling things
127  // this way makes unit testing a little easier.
128  class LineToModuleHandler {
129   public:
130    LineToModuleHandler() { }
131    virtual ~LineToModuleHandler() { }
132
133    // Called at the beginning of a new compilation unit, prior to calling
134    // ReadProgram(). compilation_dir will indicate the path that the
135    // current compilation unit was compiled in, consistent with the
136    // DW_AT_comp_dir DIE.
137    virtual void StartCompilationUnit(const string& compilation_dir) = 0;
138
139    // Populate MODULE and LINES with source file names and code/line
140    // mappings, given a pointer to some DWARF line number data
141    // PROGRAM, and an overestimate of its size. Add no zero-length
142    // lines to LINES.
143    virtual void ReadProgram(const char *program, uint64 length,
144                             Module *module, vector<Module::Line> *lines) = 0;
145  };
146
147  // The interface DwarfCUToModule uses to report warnings. The member
148  // function definitions for this class write messages to stderr, but
149  // you can override them if you'd like to detect or report these
150  // conditions yourself.
151  class WarningReporter {
152   public:
153    // Warn about problems in the DWARF file FILENAME, in the
154    // compilation unit at OFFSET.
155    WarningReporter(const string &filename, uint64 cu_offset)
156        : filename_(filename), cu_offset_(cu_offset), printed_cu_header_(false),
157          printed_unpaired_header_(false),
158          uncovered_warnings_enabled_(false) { }
159    virtual ~WarningReporter() { }
160
161    // Set the name of the compilation unit we're processing to NAME.
162    virtual void SetCUName(const string &name) { cu_name_ = name; }
163
164    // Accessor and setter for uncovered_warnings_enabled_.
165    // UncoveredFunction and UncoveredLine only report a problem if that is
166    // true. By default, these warnings are disabled, because those
167    // conditions occur occasionally in healthy code.
168    virtual bool uncovered_warnings_enabled() const {
169      return uncovered_warnings_enabled_;
170    }
171    virtual void set_uncovered_warnings_enabled(bool value) {
172      uncovered_warnings_enabled_ = value;
173    }
174
175    // A DW_AT_specification in the DIE at OFFSET refers to a DIE we
176    // haven't processed yet, or that wasn't marked as a declaration,
177    // at TARGET.
178    virtual void UnknownSpecification(uint64 offset, uint64 target);
179
180    // A DW_AT_abstract_origin in the DIE at OFFSET refers to a DIE we
181    // haven't processed yet, or that wasn't marked as inline, at TARGET.
182    virtual void UnknownAbstractOrigin(uint64 offset, uint64 target);
183
184    // We were unable to find the DWARF section named SECTION_NAME.
185    virtual void MissingSection(const string &section_name);
186
187    // The CU's DW_AT_stmt_list offset OFFSET is bogus.
188    virtual void BadLineInfoOffset(uint64 offset);
189
190    // FUNCTION includes code covered by no line number data.
191    virtual void UncoveredFunction(const Module::Function &function);
192
193    // Line number NUMBER in LINE_FILE, of length LENGTH, includes code
194    // covered by no function.
195    virtual void UncoveredLine(const Module::Line &line);
196
197    // The DW_TAG_subprogram DIE at OFFSET has no name specified directly
198    // in the DIE, nor via a DW_AT_specification or DW_AT_abstract_origin
199    // link.
200    virtual void UnnamedFunction(uint64 offset);
201
202    // __cxa_demangle() failed to demangle INPUT.
203    virtual void DemangleError(const string &input, int error);
204
205    // The DW_FORM_ref_addr at OFFSET to TARGET was not handled because
206    // FilePrivate did not retain the inter-CU specification data.
207    virtual void UnhandledInterCUReference(uint64 offset, uint64 target);
208
209    uint64 cu_offset() const {
210      return cu_offset_;
211    }
212
213   protected:
214    const string filename_;
215    const uint64 cu_offset_;
216    string cu_name_;
217    bool printed_cu_header_;
218    bool printed_unpaired_header_;
219    bool uncovered_warnings_enabled_;
220
221   private:
222    // Print a per-CU heading, once.
223    void CUHeading();
224    // Print an unpaired function/line heading, once.
225    void UncoveredHeading();
226  };
227
228  // Create a DWARF debugging info handler for a compilation unit
229  // within FILE_CONTEXT. This uses information received from the
230  // dwarf2reader::CompilationUnit DWARF parser to populate
231  // FILE_CONTEXT->module. Use LINE_READER to handle the compilation
232  // unit's line number data. Use REPORTER to report problems with the
233  // data we find.
234  DwarfCUToModule(FileContext *file_context,
235                  LineToModuleHandler *line_reader,
236                  WarningReporter *reporter);
237  ~DwarfCUToModule();
238
239  void ProcessAttributeSigned(enum DwarfAttribute attr,
240                              enum DwarfForm form,
241                              int64 data);
242  void ProcessAttributeUnsigned(enum DwarfAttribute attr,
243                                enum DwarfForm form,
244                                uint64 data);
245  void ProcessAttributeString(enum DwarfAttribute attr,
246                              enum DwarfForm form,
247                              const string &data);
248  bool EndAttributes();
249  DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag);
250
251  // Assign all our source Lines to the Functions that cover their
252  // addresses, and then add them to module_.
253  void Finish();
254
255  bool StartCompilationUnit(uint64 offset, uint8 address_size,
256                            uint8 offset_size, uint64 cu_length,
257                            uint8 dwarf_version);
258  bool StartRootDIE(uint64 offset, enum DwarfTag tag);
259
260 private:
261  // Used internally by the handler. Full definitions are in
262  // dwarf_cu_to_module.cc.
263  struct CUContext;
264  struct DIEContext;
265  struct Specification;
266  class GenericDIEHandler;
267  class FuncHandler;
268  class NamedScopeHandler;
269
270  // A map from section offsets to specifications.
271  typedef map<uint64, Specification> SpecificationByOffset;
272
273  // Set this compilation unit's source language to LANGUAGE.
274  void SetLanguage(DwarfLanguage language);
275
276  // Read source line information at OFFSET in the .debug_line
277  // section.  Record source files in module_, but record source lines
278  // in lines_; we apportion them to functions in
279  // AssignLinesToFunctions.
280  void ReadSourceLines(uint64 offset);
281
282  // Assign the lines in lines_ to the individual line lists of the
283  // functions in functions_.  (DWARF line information maps an entire
284  // compilation unit at a time, and gives no indication of which
285  // lines belong to which functions, beyond their addresses.)
286  void AssignLinesToFunctions();
287
288  // The only reason cu_context_ and child_context_ are pointers is
289  // that we want to keep their definitions private to
290  // dwarf_cu_to_module.cc, instead of listing them all here. They are
291  // owned by this DwarfCUToModule: the constructor sets them, and the
292  // destructor deletes them.
293
294  // The handler to use to handle line number data.
295  LineToModuleHandler *line_reader_;
296
297  // This compilation unit's context.
298  scoped_ptr<CUContext> cu_context_;
299
300  // A context for our children.
301  scoped_ptr<DIEContext> child_context_;
302
303  // True if this compilation unit has source line information.
304  bool has_source_line_info_;
305
306  // The offset of this compilation unit's line number information in
307  // the .debug_line section.
308  uint64 source_line_offset_;
309
310  // The line numbers we have seen thus far.  We accumulate these here
311  // during parsing.  Then, in Finish, we call AssignLinesToFunctions
312  // to dole them out to the appropriate functions.
313  vector<Module::Line> lines_;
314};
315
316}  // namespace google_breakpad
317
318#endif  // COMMON_LINUX_DWARF_CU_TO_MODULE_H__
319