1// -*- mode: c++ -*-
2
3// Copyright (c) 2010 Google Inc. All Rights Reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32
33// dwarf2reader::CompilationUnit is a simple and direct parser for
34// DWARF data, but its handler interface is not convenient to use.  In
35// particular:
36//
37// - CompilationUnit calls Dwarf2Handler's member functions to report
38//   every attribute's value, regardless of what sort of DIE it is.
39//   As a result, the ProcessAttributeX functions end up looking like
40//   this:
41//
42//     switch (parent_die_tag) {
43//       case DW_TAG_x:
44//         switch (attribute_name) {
45//           case DW_AT_y:
46//             handle attribute y of DIE type x
47//           ...
48//         } break;
49//       ...
50//     }
51//
52//   In C++ it's much nicer to use virtual function dispatch to find
53//   the right code for a given case than to switch on the DIE tag
54//   like this.
55//
56// - Processing different kinds of DIEs requires different sets of
57//   data: lexical block DIEs have start and end addresses, but struct
58//   type DIEs don't.  It would be nice to be able to have separate
59//   handler classes for separate kinds of DIEs, each with the members
60//   appropriate to its role, instead of having one handler class that
61//   needs to hold data for every DIE type.
62//
63// - There should be a separate instance of the appropriate handler
64//   class for each DIE, instead of a single object with tables
65//   tracking all the dies in the compilation unit.
66//
67// - It's not convenient to take some action after all a DIE's
68//   attributes have been seen, but before visiting any of its
69//   children.  The only indication you have that a DIE's attribute
70//   list is complete is that you get either a StartDIE or an EndDIE
71//   call.
72//
73// - It's not convenient to make use of the tree structure of the
74//   DIEs.  Skipping all the children of a given die requires
75//   maintaining state and returning false from StartDIE until we get
76//   an EndDIE call with the appropriate offset.
77//
78// This interface tries to take care of all that.  (You're shocked, I'm sure.)
79//
80// Using the classes here, you provide an initial handler for the root
81// DIE of the compilation unit.  Each handler receives its DIE's
82// attributes, and provides fresh handler objects for children of
83// interest, if any.  The three classes are:
84//
85// - DIEHandler: the base class for your DIE-type-specific handler
86//   classes.
87//
88// - RootDIEHandler: derived from DIEHandler, the base class for your
89//   root DIE handler class.
90//
91// - DIEDispatcher: derived from Dwarf2Handler, an instance of this
92//   invokes your DIE-type-specific handler objects.
93//
94// In detail:
95//
96// - Define handler classes specialized for the DIE types you're
97//   interested in.  These handler classes must inherit from
98//   DIEHandler.  Thus:
99//
100//     class My_DW_TAG_X_Handler: public DIEHandler { ... };
101//     class My_DW_TAG_Y_Handler: public DIEHandler { ... };
102//
103//   DIEHandler subclasses needn't correspond exactly to single DIE
104//   types, as shown here; the point is that you can have several
105//   different classes appropriate to different kinds of DIEs.
106//
107// - In particular, define a handler class for the compilation
108//   unit's root DIE, that inherits from RootDIEHandler:
109//
110//     class My_DW_TAG_compile_unit_Handler: public RootDIEHandler { ... };
111//
112//   RootDIEHandler inherits from DIEHandler, adding a few additional
113//   member functions for examining the compilation unit as a whole,
114//   and other quirks of rootness.
115//
116// - Then, create a DIEDispatcher instance, passing it an instance of
117//   your root DIE handler class, and use that DIEDispatcher as the
118//   dwarf2reader::CompilationUnit's handler:
119//
120//     My_DW_TAG_compile_unit_Handler root_die_handler(...);
121//     DIEDispatcher die_dispatcher(&root_die_handler);
122//     CompilationUnit reader(sections, offset, bytereader, &die_dispatcher);
123//
124//   Here, 'die_dispatcher' acts as a shim between 'reader' and the
125//   various DIE-specific handlers you have defined.
126//
127// - When you call reader.Start(), die_dispatcher behaves as follows,
128//   starting with your root die handler and the compilation unit's
129//   root DIE:
130//
131//   - It calls the handler's ProcessAttributeX member functions for
132//     each of the DIE's attributes.
133//
134//   - It calls the handler's EndAttributes member function.  This
135//     should return true if any of the DIE's children should be
136//     visited, in which case:
137//
138//     - For each of the DIE's children, die_dispatcher calls the
139//       DIE's handler's FindChildHandler member function.  If that
140//       returns a pointer to a DIEHandler instance, then
141//       die_dispatcher uses that handler to process the child, using
142//       this procedure recursively.  Alternatively, if
143//       FindChildHandler returns NULL, die_dispatcher ignores that
144//       child and its descendants.
145//
146//   - When die_dispatcher has finished processing all the DIE's
147//     children, it invokes the handler's Finish() member function,
148//     and destroys the handler.  (As a special case, it doesn't
149//     destroy the root DIE handler.)
150//
151// This allows the code for handling a particular kind of DIE to be
152// gathered together in a single class, makes it easy to skip all the
153// children or individual children of a particular DIE, and provides
154// appropriate parental context for each die.
155
156#ifndef COMMON_DWARF_DWARF2DIEHANDLER_H__
157#define COMMON_DWARF_DWARF2DIEHANDLER_H__
158
159#include <stack>
160#include <string>
161
162#include "common/dwarf/types.h"
163#include "common/dwarf/dwarf2enums.h"
164#include "common/dwarf/dwarf2reader.h"
165#include "common/using_std_string.h"
166
167namespace dwarf2reader {
168
169// A base class for handlers for specific DIE types.  The series of
170// calls made on a DIE handler is as follows:
171//
172// - for each attribute of the DIE:
173//   - ProcessAttributeX()
174// - EndAttributes()
175// - if that returned true, then for each child:
176//   - FindChildHandler()
177//   - if that returns a non-NULL pointer to a new handler:
178//     - recurse, with the new handler and the child die
179// - Finish()
180// - destruction
181class DIEHandler {
182 public:
183  DIEHandler() { }
184  virtual ~DIEHandler() { }
185
186  // When we visit a DIE, we first use these member functions to
187  // report the DIE's attributes and their values.  These have the
188  // same restrictions as the corresponding member functions of
189  // dwarf2reader::Dwarf2Handler.
190  //
191  // Since DWARF does not specify in what order attributes must
192  // appear, avoid making decisions in these functions that would be
193  // affected by the presence of other attributes. The EndAttributes
194  // function is a more appropriate place for such work, as all the
195  // DIE's attributes have been seen at that point.
196  //
197  // The default definitions ignore the values they are passed.
198  virtual void ProcessAttributeUnsigned(enum DwarfAttribute attr,
199                                        enum DwarfForm form,
200                                        uint64 data) { }
201  virtual void ProcessAttributeSigned(enum DwarfAttribute attr,
202                                      enum DwarfForm form,
203                                      int64 data) { }
204  virtual void ProcessAttributeReference(enum DwarfAttribute attr,
205                                         enum DwarfForm form,
206                                         uint64 data) { }
207  virtual void ProcessAttributeBuffer(enum DwarfAttribute attr,
208                                      enum DwarfForm form,
209                                      const char* data,
210                                      uint64 len) { }
211  virtual void ProcessAttributeString(enum DwarfAttribute attr,
212                                      enum DwarfForm form,
213                                      const string& data) { }
214  virtual void ProcessAttributeSignature(enum DwarfAttribute attr,
215                                         enum DwarfForm form,
216                                         uint64 signture) { }
217
218  // Once we have reported all the DIE's attributes' values, we call
219  // this member function.  If it returns false, we skip all the DIE's
220  // children.  If it returns true, we call FindChildHandler on each
221  // child.  If that returns a handler object, we use that to visit
222  // the child; otherwise, we skip the child.
223  //
224  // This is a good place to make decisions that depend on more than
225  // one attribute. DWARF does not specify in what order attributes
226  // must appear, so only when the EndAttributes function is called
227  // does the handler have a complete picture of the DIE's attributes.
228  //
229  // The default definition elects to ignore the DIE's children.
230  // You'll need to override this if you override FindChildHandler,
231  // but at least the default behavior isn't to pass the children to
232  // FindChildHandler, which then ignores them all.
233  virtual bool EndAttributes() { return false; }
234
235  // If EndAttributes returns true to indicate that some of the DIE's
236  // children might be of interest, then we apply this function to
237  // each of the DIE's children.  If it returns a handler object, then
238  // we use that to visit the child DIE.  If it returns NULL, we skip
239  // that child DIE (and all its descendants).
240  //
241  // OFFSET is the offset of the child; TAG indicates what kind of DIE
242  // it is.
243  //
244  // The default definition skips all children.
245  virtual DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag) {
246    return NULL;
247  }
248
249  // When we are done processing a DIE, we call this member function.
250  // This happens after the EndAttributes call, all FindChildHandler
251  // calls (if any), and all operations on the children themselves (if
252  // any). We call Finish on every handler --- even if EndAttributes
253  // returns false.
254  virtual void Finish() { };
255};
256
257// A subclass of DIEHandler, with additional kludges for handling the
258// compilation unit's root die.
259class RootDIEHandler: public DIEHandler {
260 public:
261  RootDIEHandler() { }
262  virtual ~RootDIEHandler() { }
263
264  // We pass the values reported via Dwarf2Handler::StartCompilationUnit
265  // to this member function, and skip the entire compilation unit if it
266  // returns false.  So the root DIE handler is actually also
267  // responsible for handling the compilation unit metadata.
268  // The default definition always visits the compilation unit.
269  virtual bool StartCompilationUnit(uint64 offset, uint8 address_size,
270                                    uint8 offset_size, uint64 cu_length,
271                                    uint8 dwarf_version) { return true; }
272
273  // For the root DIE handler only, we pass the offset, tag and
274  // attributes of the compilation unit's root DIE.  This is the only
275  // way the root DIE handler can find the root DIE's tag.  If this
276  // function returns true, we will visit the root DIE using the usual
277  // DIEHandler methods; otherwise, we skip the entire compilation
278  // unit.
279  //
280  // The default definition elects to visit the root DIE.
281  virtual bool StartRootDIE(uint64 offset, enum DwarfTag tag) { return true; }
282};
283
284class DIEDispatcher: public Dwarf2Handler {
285 public:
286  // Create a Dwarf2Handler which uses ROOT_HANDLER as the handler for
287  // the compilation unit's root die, as described for the DIEHandler
288  // class.
289  DIEDispatcher(RootDIEHandler *root_handler) : root_handler_(root_handler) { }
290  // Destroying a DIEDispatcher destroys all active handler objects
291  // except the root handler.
292  ~DIEDispatcher();
293  bool StartCompilationUnit(uint64 offset, uint8 address_size,
294                            uint8 offset_size, uint64 cu_length,
295                            uint8 dwarf_version);
296  bool StartDIE(uint64 offset, enum DwarfTag tag);
297  void ProcessAttributeUnsigned(uint64 offset,
298                                enum DwarfAttribute attr,
299                                enum DwarfForm form,
300                                uint64 data);
301  void ProcessAttributeSigned(uint64 offset,
302                              enum DwarfAttribute attr,
303                              enum DwarfForm form,
304                              int64 data);
305  void ProcessAttributeReference(uint64 offset,
306                                 enum DwarfAttribute attr,
307                                 enum DwarfForm form,
308                                 uint64 data);
309  void ProcessAttributeBuffer(uint64 offset,
310                              enum DwarfAttribute attr,
311                              enum DwarfForm form,
312                              const char* data,
313                              uint64 len);
314  void ProcessAttributeString(uint64 offset,
315                              enum DwarfAttribute attr,
316                              enum DwarfForm form,
317                              const string &data);
318  void ProcessAttributeSignature(uint64 offset,
319                                 enum DwarfAttribute attr,
320                                 enum DwarfForm form,
321                                 uint64 signature);
322  void EndDIE(uint64 offset);
323
324 private:
325
326  // The type of a handler stack entry.  This includes some fields
327  // which don't really need to be on the stack --- they could just be
328  // single data members of DIEDispatcher --- but putting them here
329  // makes it easier to see that the code is correct.
330  struct HandlerStack {
331    // The offset of the DIE for this handler stack entry.
332    uint64 offset_;
333
334    // The handler object interested in this DIE's attributes and
335    // children.  If NULL, we're not interested in either.
336    DIEHandler *handler_;
337
338    // Have we reported the end of this DIE's attributes to the handler?
339    bool reported_attributes_end_;
340  };
341
342  // Stack of DIE attribute handlers.  At StartDIE(D), the top of the
343  // stack is the handler of D's parent, whom we may ask for a handler
344  // for D itself.  At EndDIE(D), the top of the stack is D's handler.
345  // Special cases:
346  //
347  // - Before we've seen the compilation unit's root DIE, the stack is
348  //   empty; we'll call root_handler_'s special member functions, and
349  //   perhaps push root_handler_ on the stack to look at the root's
350  //   immediate children.
351  //
352  // - When we decide to ignore a subtree, we only push an entry on
353  //   the stack for the root of the tree being ignored, rather than
354  //   pushing lots of stack entries with handler_ set to NULL.
355  std::stack<HandlerStack> die_handlers_;
356
357  // The root handler.  We don't push it on die_handlers_ until we
358  // actually get the StartDIE call for the root.
359  RootDIEHandler *root_handler_;
360};
361
362} // namespace dwarf2reader
363#endif  // COMMON_DWARF_DWARF2DIEHANDLER_H__
364