1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
31// a line/address map for use with BasicSourceLineResolver.
32
33#ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
34#define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
35
36#include <atlcomcli.h>
37
38#include <unordered_map>
39#include <string>
40
41#include "common/windows/omap.h"
42
43struct IDiaEnumLineNumbers;
44struct IDiaSession;
45struct IDiaSymbol;
46
47namespace google_breakpad {
48
49using std::wstring;
50using std::unordered_map;
51
52// A structure that carries information that identifies a pdb file.
53struct PDBModuleInfo {
54 public:
55  // The basename of the pdb file from which information was loaded.
56  wstring debug_file;
57
58  // The pdb's identifier.  For recent pdb files, the identifier consists
59  // of the pdb's guid, in uppercase hexadecimal form without any dashes
60  // or separators, followed immediately by the pdb's age, also in
61  // uppercase hexadecimal form.  For older pdb files which have no guid,
62  // the identifier is the pdb's 32-bit signature value, in zero-padded
63  // hexadecimal form, followed immediately by the pdb's age, in lowercase
64  // hexadecimal form.
65  wstring debug_identifier;
66
67  // A string identifying the cpu that the pdb is associated with.
68  // Currently, this may be "x86" or "unknown".
69  wstring cpu;
70};
71
72// A structure that carries information that identifies a PE file,
73// either an EXE or a DLL.
74struct PEModuleInfo {
75  // The basename of the PE file.
76  wstring code_file;
77
78  // The PE file's code identifier, which consists of its timestamp
79  // and file size concatenated together into a single hex string.
80  // (The fields IMAGE_OPTIONAL_HEADER::SizeOfImage and
81  // IMAGE_FILE_HEADER::TimeDateStamp, as defined in the ImageHlp
82  // documentation.) This is not well documented, if it's documented
83  // at all, but it's what symstore does and what DbgHelp supports.
84  wstring code_identifier;
85};
86
87class PDBSourceLineWriter {
88 public:
89  enum FileFormat {
90    PDB_FILE,  // a .pdb file containing debug symbols
91    EXE_FILE,  // a .exe or .dll file
92    ANY_FILE   // try PDB_FILE and then EXE_FILE
93  };
94
95  explicit PDBSourceLineWriter();
96  ~PDBSourceLineWriter();
97
98  // Opens the given file.  For executable files, the corresponding pdb
99  // file must be available; Open will be if it is not.
100  // If there is already a pdb file open, it is automatically closed.
101  // Returns true on success.
102  bool Open(const wstring &file, FileFormat format);
103
104  // Sets the code file full path.  This is optional for 32-bit modules.  It is
105  // also optional for 64-bit modules when there is an executable file stored
106  // in the same directory as the PDB file.  It is only required for 64-bit
107  // modules when the executable file is not in the same location as the PDB
108  // file and it must be called after Open() and before WriteMap().
109  // If Open() was called for an executable file, then it is an error to call
110  // SetCodeFile() with a different file path and it will return false.
111  bool SetCodeFile(const wstring &exe_file);
112
113  // Writes a map file from the current pdb file to the given file stream.
114  // Returns true on success.
115  bool WriteMap(FILE *map_file);
116
117  // Closes the current pdb file and its associated resources.
118  void Close();
119
120  // Retrieves information about the module's debugging file.  Returns
121  // true on success and false on failure.
122  bool GetModuleInfo(PDBModuleInfo *info);
123
124  // Retrieves information about the module's PE file.  Returns
125  // true on success and false on failure.
126  bool GetPEInfo(PEModuleInfo *info);
127
128  // Sets uses_guid to true if the opened file uses a new-style CodeView
129  // record with a 128-bit GUID, or false if the opened file uses an old-style
130  // CodeView record.  When no GUID is available, a 32-bit signature should be
131  // used to identify the module instead.  If the information cannot be
132  // determined, this method returns false.
133  bool UsesGUID(bool *uses_guid);
134
135 private:
136  // Outputs the line/address pairs for each line in the enumerator.
137  // Returns true on success.
138  bool PrintLines(IDiaEnumLineNumbers *lines);
139
140  // Outputs a function address and name, followed by its source line list.
141  // block can be the same object as function, or it can be a reference
142  // to a code block that is lexically part of this function, but
143  // resides at a separate address.
144  // Returns true on success.
145  bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block);
146
147  // Outputs all functions as described above.  Returns true on success.
148  bool PrintFunctions();
149
150  // Outputs all of the source files in the session's pdb file.
151  // Returns true on success.
152  bool PrintSourceFiles();
153
154  // Outputs all of the frame information necessary to construct stack
155  // backtraces in the absence of frame pointers. For x86 data stored in
156  // .pdb files. Returns true on success.
157  bool PrintFrameDataUsingPDB();
158
159  // Outputs all of the frame information necessary to construct stack
160  // backtraces in the absence of frame pointers. For x64 data stored in
161  // .exe, .dll files. Returns true on success.
162  bool PrintFrameDataUsingEXE();
163
164  // Outputs all of the frame information necessary to construct stack
165  // backtraces in the absence of frame pointers.  Returns true on success.
166  bool PrintFrameData();
167
168  // Outputs a single public symbol address and name, if the symbol corresponds
169  // to a code address.  Returns true on success.  If symbol is does not
170  // correspond to code, returns true without outputting anything.
171  bool PrintCodePublicSymbol(IDiaSymbol *symbol);
172
173  // Outputs a line identifying the PDB file that is being dumped, along with
174  // its uuid and age.
175  bool PrintPDBInfo();
176
177  // Outputs a line identifying the PE file corresponding to the PDB
178  // file that is being dumped, along with its code identifier,
179  // which consists of its timestamp and file size.
180  bool PrintPEInfo();
181
182  // Returns true if this filename has already been seen,
183  // and an ID is stored for it, or false if it has not.
184  bool FileIDIsCached(const wstring &file) {
185    return unique_files_.find(file) != unique_files_.end();
186  }
187
188  // Cache this filename and ID for later reuse.
189  void CacheFileID(const wstring &file, DWORD id) {
190    unique_files_[file] = id;
191  }
192
193  // Store this ID in the cache as a duplicate for this filename.
194  void StoreDuplicateFileID(const wstring &file, DWORD id) {
195    unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
196    if (iter != unique_files_.end()) {
197      // map this id to the previously seen one
198      file_ids_[id] = iter->second;
199    }
200  }
201
202  // Given a file's unique ID, return the ID that should be used to
203  // reference it. There may be multiple files with identical filenames
204  // but different unique IDs. The cache attempts to coalesce these into
205  // one ID per unique filename.
206  DWORD GetRealFileID(DWORD id) {
207    unordered_map<DWORD, DWORD>::iterator iter = file_ids_.find(id);
208    if (iter == file_ids_.end())
209      return id;
210    return iter->second;
211  }
212
213  // Find the PE file corresponding to the loaded PDB file, and
214  // set the code_file_ member. Returns false on failure.
215  bool FindPEFile();
216
217  // Returns the function name for a symbol.  If possible, the name is
218  // undecorated.  If the symbol's decorated form indicates the size of
219  // parameters on the stack, this information is returned in stack_param_size.
220  // Returns true on success.  If the symbol doesn't encode parameter size
221  // information, stack_param_size is set to -1.
222  static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
223                                    int *stack_param_size);
224
225  // Returns the number of bytes of stack space used for a function's
226  // parameters.  function must have the tag SymTagFunction.  In the event of
227  // a failure, returns 0, which is also a valid number of bytes.
228  static int GetFunctionStackParamSize(IDiaSymbol *function);
229
230  // The filename of the PE file corresponding to the currently-open
231  // pdb file.
232  wstring code_file_;
233
234  // The session for the currently-open pdb file.
235  CComPtr<IDiaSession> session_;
236
237  // The current output file for this WriteMap invocation.
238  FILE *output_;
239
240  // There may be many duplicate filenames with different IDs.
241  // This maps from the DIA "unique ID" to a single ID per unique
242  // filename.
243  unordered_map<DWORD, DWORD> file_ids_;
244  // This maps unique filenames to file IDs.
245  unordered_map<wstring, DWORD> unique_files_;
246
247  // This is used for calculating post-transform symbol addresses and lengths.
248  ImageMap image_map_;
249
250  // Disallow copy ctor and operator=
251  PDBSourceLineWriter(const PDBSourceLineWriter&);
252  void operator=(const PDBSourceLineWriter&);
253};
254
255}  // namespace google_breakpad
256
257#endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
258