pdb_source_line_writer.cc revision 29401d2457120b6d581affdb440017433ca93e77
1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include <atlbase.h>
31#include <DbgHelp.h>
32#include <dia2.h>
33#include <stdio.h>
34
35#include "common/windows/pdb_source_line_writer.h"
36#include "common/windows/guid_string.h"
37
38// This constant may be missing from DbgHelp.h.  See the documentation for
39// IDiaSymbol::get_undecoratedNameEx.
40#ifndef UNDNAME_NO_ECSU
41#define UNDNAME_NO_ECSU 0x8000  // Suppresses enum/class/struct/union.
42#endif  // UNDNAME_NO_ECSU
43
44namespace google_airbag {
45
46PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) {
47}
48
49PDBSourceLineWriter::~PDBSourceLineWriter() {
50}
51
52bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) {
53  Close();
54
55  if (FAILED(CoInitialize(NULL))) {
56    fprintf(stderr, "CoInitialize failed\n");
57    return false;
58  }
59
60  CComPtr<IDiaDataSource> data_source;
61  if (FAILED(data_source.CoCreateInstance(CLSID_DiaSource))) {
62    fprintf(stderr, "CoCreateInstance CLSID_DiaSource failed "
63            "(msdia80.dll unregistered?)\n");
64    return false;
65  }
66
67  switch (format) {
68    case PDB_FILE:
69      if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
70        fprintf(stderr, "loadDataFromPdb failed\n");
71        return false;
72      }
73      break;
74    case EXE_FILE:
75      if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
76        fprintf(stderr, "loadDataForExe failed\n");
77        return false;
78      }
79      break;
80    default:
81      fprintf(stderr, "Unknown file format\n");
82      return false;
83  }
84
85  if (FAILED(data_source->openSession(&session_))) {
86    fprintf(stderr, "openSession failed\n");
87  }
88
89  return true;
90}
91
92bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) {
93  // The line number format is:
94  // <rva> <line number> <source file id>
95  CComPtr<IDiaLineNumber> line;
96  ULONG count;
97
98  while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
99    DWORD rva;
100    if (FAILED(line->get_relativeVirtualAddress(&rva))) {
101      fprintf(stderr, "failed to get line rva\n");
102      return false;
103    }
104
105    DWORD length;
106    if (FAILED(line->get_length(&length))) {
107      fprintf(stderr, "failed to get line code length\n");
108      return false;
109    }
110
111    DWORD source_id;
112    if (FAILED(line->get_sourceFileId(&source_id))) {
113      fprintf(stderr, "failed to get line source file id\n");
114      return false;
115    }
116
117    DWORD line_num;
118    if (FAILED(line->get_lineNumber(&line_num))) {
119      fprintf(stderr, "failed to get line number\n");
120      return false;
121    }
122
123    fprintf(output_, "%x %x %d %d\n", rva, length, line_num, source_id);
124    line.Release();
125  }
126  return true;
127}
128
129bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function) {
130  // The function format is:
131  // FUNC <address> <length> <param_stack_size> <function>
132  DWORD rva;
133  if (FAILED(function->get_relativeVirtualAddress(&rva))) {
134    fprintf(stderr, "couldn't get rva\n");
135    return false;
136  }
137
138  ULONGLONG length;
139  if (FAILED(function->get_length(&length))) {
140    fprintf(stderr, "failed to get function length\n");
141    return false;
142  }
143
144  CComBSTR name;
145  int stack_param_size;
146  if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
147    return false;
148  }
149
150  // If the decorated name didn't give the parameter size, try to
151  // calculate it.
152  if (stack_param_size < 0) {
153    stack_param_size = GetFunctionStackParamSize(function);
154  }
155
156  fprintf(output_, "FUNC %x %llx %x %ws\n",
157          rva, length, stack_param_size, name);
158
159  CComPtr<IDiaEnumLineNumbers> lines;
160  if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
161    return false;
162  }
163
164  if (!PrintLines(lines)) {
165    return false;
166  }
167  return true;
168}
169
170bool PDBSourceLineWriter::PrintSourceFiles() {
171  CComPtr<IDiaSymbol> global;
172  if (FAILED(session_->get_globalScope(&global))) {
173    fprintf(stderr, "get_globalScope failed\n");
174    return false;
175  }
176
177  CComPtr<IDiaEnumSymbols> compilands;
178  if (FAILED(global->findChildren(SymTagCompiland, NULL,
179                                  nsNone, &compilands))) {
180    fprintf(stderr, "findChildren failed\n");
181    return false;
182  }
183
184  CComPtr<IDiaSymbol> compiland;
185  ULONG count;
186  while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
187    CComPtr<IDiaEnumSourceFiles> source_files;
188    if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
189      return false;
190    }
191    CComPtr<IDiaSourceFile> file;
192    while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
193      DWORD file_id;
194      if (FAILED(file->get_uniqueId(&file_id))) {
195        return false;
196      }
197
198      CComBSTR file_name;
199      if (FAILED(file->get_fileName(&file_name))) {
200        return false;
201      }
202
203      fwprintf(output_, L"FILE %d %s\n", file_id, file_name);
204      file.Release();
205    }
206    compiland.Release();
207  }
208  return true;
209}
210
211bool PDBSourceLineWriter::PrintFunctions() {
212  CComPtr<IDiaEnumSymbolsByAddr> symbols;
213  if (FAILED(session_->getSymbolsByAddr(&symbols))) {
214    fprintf(stderr, "failed to get symbol enumerator\n");
215    return false;
216  }
217
218  CComPtr<IDiaSymbol> symbol;
219  if (FAILED(symbols->symbolByAddr(1, 0, &symbol))) {
220    fprintf(stderr, "failed to enumerate symbols\n");
221    return false;
222  }
223
224  DWORD rva_last = 0;
225  if (FAILED(symbol->get_relativeVirtualAddress(&rva_last))) {
226    fprintf(stderr, "failed to get symbol rva\n");
227    return false;
228  }
229
230  ULONG count;
231  do {
232    DWORD tag;
233    if (FAILED(symbol->get_symTag(&tag))) {
234      fprintf(stderr, "failed to get symbol tag\n");
235      return false;
236    }
237
238    // For a given function, DIA seems to give either a symbol with
239    // SymTagFunction or SymTagPublicSymbol, but not both.  This means
240    // that PDBSourceLineWriter will output either a FUNC or PUBLIC line,
241    // but not both.
242    if (tag == SymTagFunction) {
243      if (!PrintFunction(symbol)) {
244        return false;
245      }
246    } else if (tag == SymTagPublicSymbol) {
247      if (!PrintCodePublicSymbol(symbol)) {
248        return false;
249      }
250    }
251    symbol.Release();
252  } while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1);
253
254  return true;
255}
256
257bool PDBSourceLineWriter::PrintFrameData() {
258  // It would be nice if it were possible to output frame data alongside the
259  // associated function, as is done with line numbers, but the DIA API
260  // doesn't make it possible to get the frame data in that way.
261
262  CComPtr<IDiaEnumTables> tables;
263  if (FAILED(session_->getEnumTables(&tables)))
264    return false;
265
266  // Pick up the first table that supports IDiaEnumFrameData.
267  CComPtr<IDiaEnumFrameData> frame_data_enum;
268  CComPtr<IDiaTable> table;
269  ULONG count;
270  while (!frame_data_enum &&
271         SUCCEEDED(tables->Next(1, &table, &count)) &&
272         count == 1) {
273    table->QueryInterface(_uuidof(IDiaEnumFrameData),
274                          reinterpret_cast<void**>(&frame_data_enum));
275    table.Release();
276  }
277  if (!frame_data_enum)
278    return false;
279
280  CComPtr<IDiaFrameData> frame_data;
281  while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
282         count == 1) {
283    DWORD type;
284    if (FAILED(frame_data->get_type(&type)))
285      return false;
286
287    DWORD rva;
288    if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
289      return false;
290
291    DWORD code_size;
292    if (FAILED(frame_data->get_lengthBlock(&code_size)))
293      return false;
294
295    DWORD prolog_size;
296    if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
297      return false;
298
299    // epliog_size is always 0.
300    DWORD epilog_size = 0;
301
302    // parameter_size is the size of parameters passed on the stack.  If any
303    // parameters are not passed on the stack (such as in registers), their
304    // sizes will not be included in parameter_size.
305    DWORD parameter_size;
306    if (FAILED(frame_data->get_lengthParams(&parameter_size)))
307      return false;
308
309    DWORD saved_register_size;
310    if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
311      return false;
312
313    DWORD local_size;
314    if (FAILED(frame_data->get_lengthLocals(&local_size)))
315      return false;
316
317    // get_maxStack can return S_FALSE, just use 0 in that case.
318    DWORD max_stack_size = 0;
319    if (FAILED(frame_data->get_maxStack(&max_stack_size)))
320      return false;
321
322    // get_programString can return S_FALSE, indicating that there is no
323    // program string.  In that case, check whether %ebp is used.
324    HRESULT program_string_result;
325    CComBSTR program_string;
326    if (FAILED(program_string_result = frame_data->get_program(
327        &program_string))) {
328      return false;
329    }
330
331    // get_allocatesBasePointer can return S_FALSE, treat that as though
332    // %ebp is not used.
333    BOOL allocates_base_pointer = FALSE;
334    if (program_string_result != S_OK) {
335      if (FAILED(frame_data->get_allocatesBasePointer(
336          &allocates_base_pointer))) {
337        return false;
338      }
339    }
340
341    fprintf(output_, "STACK WIN %x %x %x %x %x %x %x %x %x %d ",
342            type, rva, code_size, prolog_size, epilog_size,
343            parameter_size, saved_register_size, local_size, max_stack_size,
344            program_string_result == S_OK);
345    if (program_string_result == S_OK) {
346      fprintf(output_, "%ws\n", program_string);
347    } else {
348      fprintf(output_, "%d\n", allocates_base_pointer);
349    }
350
351    frame_data.Release();
352  }
353
354  return true;
355}
356
357bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) {
358  BOOL is_code;
359  if (FAILED(symbol->get_code(&is_code))) {
360    return false;
361  }
362  if (!is_code) {
363    return true;
364  }
365
366  DWORD rva;
367  if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
368    return false;
369  }
370
371  CComBSTR name;
372  int stack_param_size;
373  if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
374    return false;
375  }
376
377  fprintf(output_, "PUBLIC %x %x %ws\n", rva,
378          stack_param_size > 0 ? stack_param_size : 0, name);
379  return true;
380}
381
382// wcstol_positive_strict is sort of like wcstol, but much stricter.  string
383// should be a buffer pointing to a null-terminated string containing only
384// decimal digits.  If the entire string can be converted to an integer
385// without overflowing, and there are no non-digit characters before the
386// result is set to the value and this function returns true.  Otherwise,
387// this function returns false.  This is an alternative to the strtol, atoi,
388// and scanf families, which are not as strict about input and in some cases
389// don't provide a good way for the caller to determine if a conversion was
390// successful.
391static bool wcstol_positive_strict(wchar_t *string, int *result) {
392  int value = 0;
393  for (wchar_t *c = string; *c != '\0'; ++c) {
394    int last_value = value;
395    value *= 10;
396    // Detect overflow.
397    if (value / 10 != last_value || value < 0) {
398      return false;
399    }
400    if (*c < '0' || *c > '9') {
401      return false;
402    }
403    unsigned int c_value = *c - '0';
404    last_value = value;
405    value += c_value;
406    // Detect overflow.
407    if (value < last_value) {
408      return false;
409    }
410    // Forbid leading zeroes unless the string is just "0".
411    if (value == 0 && *(c+1) != '\0') {
412      return false;
413    }
414  }
415  *result = value;
416  return true;
417}
418
419// static
420bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function,
421                                                BSTR *name,
422                                                int *stack_param_size) {
423  *stack_param_size = -1;
424  const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS |
425                                   UNDNAME_NO_FUNCTION_RETURNS |
426                                   UNDNAME_NO_ALLOCATION_MODEL |
427                                   UNDNAME_NO_ALLOCATION_LANGUAGE |
428                                   UNDNAME_NO_THISTYPE |
429                                   UNDNAME_NO_ACCESS_SPECIFIERS |
430                                   UNDNAME_NO_THROW_SIGNATURES |
431                                   UNDNAME_NO_MEMBER_TYPE |
432                                   UNDNAME_NO_RETURN_UDT_MODEL |
433                                   UNDNAME_NO_ECSU;
434
435  // Use get_undecoratedNameEx to get readable C++ names with arguments.
436  if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) {
437    if (function->get_name(name) != S_OK) {
438      fprintf(stderr, "failed to get function name\n");
439      return false;
440    }
441    // If a name comes from get_name because no undecorated form existed,
442    // it's already formatted properly to be used as output.  Don't do any
443    // additional processing.
444  } else {
445    // C++ uses a bogus "void" argument for functions and methods that don't
446    // take any parameters.  Take it out of the undecorated name because it's
447    // ugly and unnecessary.
448    const wchar_t *replace_string = L"(void)";
449    const size_t replace_length = wcslen(replace_string);
450    const wchar_t *replacement_string = L"()";
451    size_t length = wcslen(*name);
452    if (length >= replace_length) {
453      wchar_t *name_end = *name + length - replace_length;
454      if (wcscmp(name_end, replace_string) == 0) {
455        wcscpy_s(name_end, replace_length, replacement_string);
456        length = wcslen(*name);
457      }
458    }
459
460    // Undecorate names used for stdcall and fastcall.  These names prefix
461    // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
462    // with '@' followed by the number of bytes of parameters, in decimal.
463    // If such a name is found, take note of the size and undecorate it.
464    // Only do this for names that aren't C++, which is determined based on
465    // whether the undecorated name contains any ':' or '(' characters.
466    if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
467        (*name[0] == '_' || *name[0] == '@')) {
468      wchar_t *last_at = wcsrchr(*name + 1, '@');
469      if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
470        // If this function adheres to the fastcall convention, it accepts up
471        // to the first 8 bytes of parameters in registers (%ecx and %edx).
472        // We're only interested in the stack space used for parameters, so
473        // so subtract 8 and don't let the size go below 0.
474        if (*name[0] == '@') {
475          if (*stack_param_size > 8) {
476            *stack_param_size -= 8;
477          } else {
478            *stack_param_size = 0;
479          }
480        }
481
482        // Undecorate the name by moving it one character to the left in its
483        // buffer, and terminating it where the last '@' had been.
484        wcsncpy_s(*name, length, *name + 1, last_at - *name - 1);
485      } else if (*name[0] == '_') {
486        // This symbol's name is encoded according to the cdecl rules.  The
487        // name doesn't end in a '@' character followed by a decimal positive
488        // integer, so it's not a stdcall name.  Strip off the leading
489        // underscore.
490        wcsncpy_s(*name, length, *name + 1, length - 1);
491      }
492    }
493  }
494
495  return true;
496}
497
498// static
499int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) {
500  // This implementation is highly x86-specific.
501
502  // Gather the symbols corresponding to data.
503  CComPtr<IDiaEnumSymbols> data_children;
504  if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
505                                    &data_children))) {
506    return 0;
507  }
508
509  // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
510  // highest_end is one greater than the highest offset (i.e. base + length).
511  // Stack parameters are assumed to be contiguous, because in reality, they
512  // are.
513  int lowest_base = INT_MAX;
514  int highest_end = INT_MIN;
515
516  CComPtr<IDiaSymbol> child;
517  DWORD count;
518  while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
519    // If any operation fails at this point, just proceed to the next child.
520    // Use the next_child label instead of continue because child needs to
521    // be released before it's reused.  Declare constructable/destructable
522    // types early to avoid gotos that cross initializations.
523    CComPtr<IDiaSymbol> child_type;
524
525    // DataIsObjectPtr is only used for |this|.  Because |this| can be passed
526    // as a stack parameter, look for it in addition to traditional
527    // parameters.
528    DWORD child_kind;
529    if (FAILED(child->get_dataKind(&child_kind)) ||
530        (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
531      goto next_child;
532    }
533
534    // Only concentrate on register-relative parameters.  Parameters may also
535    // be enregistered (passed directly in a register), but those don't
536    // consume any stack space, so they're not of interest.
537    DWORD child_location_type;
538    if (FAILED(child->get_locationType(&child_location_type)) ||
539        child_location_type != LocIsRegRel) {
540      goto next_child;
541    }
542
543    // Of register-relative parameters, the only ones that make any sense are
544    // %ebp- or %esp-relative.  Note that MSVC's debugging information always
545    // gives parameters as %ebp-relative even when a function doesn't use a
546    // traditional frame pointer and stack parameters are accessed relative to
547    // %esp, so just look for %ebp-relative parameters.  If you wanted to
548    // access parameters, you'd probably want to treat these %ebp-relative
549    // offsets as if they were relative to %esp before a function's prolog
550    // executed.
551    DWORD child_register;
552    if (FAILED(child->get_registerId(&child_register)) ||
553        child_register != CV_REG_EBP) {
554      goto next_child;
555    }
556
557    LONG child_register_offset;
558    if (FAILED(child->get_offset(&child_register_offset))) {
559      goto next_child;
560    }
561
562    if (FAILED(child->get_type(&child_type))) {
563      goto next_child;
564    }
565
566    ULONGLONG child_length;
567    if (FAILED(child_type->get_length(&child_length))) {
568      goto next_child;
569    }
570
571    int child_end = child_register_offset + static_cast<ULONG>(child_length);
572    if (child_register_offset < lowest_base) {
573      lowest_base = child_register_offset;
574    }
575    if (child_end > highest_end) {
576      highest_end = child_end;
577    }
578
579next_child:
580    child.Release();
581  }
582
583  int param_size = 0;
584  // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
585  // possible address to find a stack parameter before executing a function's
586  // prolog (see above).  Some optimizations cause parameter offsets to be
587  // lower than 4, but we're not concerned with those because we're only
588  // looking for parameters contained in addresses higher than where the
589  // return address is stored.
590  if (lowest_base < 4) {
591    lowest_base = 4;
592  }
593  if (highest_end > lowest_base) {
594    // All stack parameters are pushed as at least 4-byte quantities.  If the
595    // last type was narrower than 4 bytes, promote it.  This assumes that all
596    // parameters' offsets are 4-byte-aligned, which is always the case.  Only
597    // worry about the last type, because we're not summing the type sizes,
598    // just looking at the lowest and highest offsets.
599    int remainder = highest_end % 4;
600    if (remainder) {
601      highest_end += 4 - remainder;
602    }
603
604    param_size = highest_end - lowest_base;
605  }
606
607  return param_size;
608}
609
610bool PDBSourceLineWriter::WriteMap(FILE *map_file) {
611  bool ret = false;
612  output_ = map_file;
613  if (PrintSourceFiles() && PrintFunctions() && PrintFrameData()) {
614    ret = true;
615  }
616
617  output_ = NULL;
618  return ret;
619}
620
621void PDBSourceLineWriter::Close() {
622  session_.Release();
623}
624
625wstring PDBSourceLineWriter::GetModuleGUID() {
626  CComPtr<IDiaSymbol> global;
627  if (FAILED(session_->get_globalScope(&global))) {
628    return L"";
629  }
630
631  GUID guid;
632  if (FAILED(global->get_guid(&guid))) {
633    return L"";
634  }
635
636  return GUIDString::GUIDToWString(&guid);
637}
638
639}  // namespace google_airbag
640