pdb_source_line_writer.cc revision 29401d2457120b6d581affdb440017433ca93e77
1// Copyright (c) 2006, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30#include <atlbase.h> 31#include <DbgHelp.h> 32#include <dia2.h> 33#include <stdio.h> 34 35#include "common/windows/pdb_source_line_writer.h" 36#include "common/windows/guid_string.h" 37 38// This constant may be missing from DbgHelp.h. See the documentation for 39// IDiaSymbol::get_undecoratedNameEx. 40#ifndef UNDNAME_NO_ECSU 41#define UNDNAME_NO_ECSU 0x8000 // Suppresses enum/class/struct/union. 42#endif // UNDNAME_NO_ECSU 43 44namespace google_airbag { 45 46PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) { 47} 48 49PDBSourceLineWriter::~PDBSourceLineWriter() { 50} 51 52bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) { 53 Close(); 54 55 if (FAILED(CoInitialize(NULL))) { 56 fprintf(stderr, "CoInitialize failed\n"); 57 return false; 58 } 59 60 CComPtr<IDiaDataSource> data_source; 61 if (FAILED(data_source.CoCreateInstance(CLSID_DiaSource))) { 62 fprintf(stderr, "CoCreateInstance CLSID_DiaSource failed " 63 "(msdia80.dll unregistered?)\n"); 64 return false; 65 } 66 67 switch (format) { 68 case PDB_FILE: 69 if (FAILED(data_source->loadDataFromPdb(file.c_str()))) { 70 fprintf(stderr, "loadDataFromPdb failed\n"); 71 return false; 72 } 73 break; 74 case EXE_FILE: 75 if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) { 76 fprintf(stderr, "loadDataForExe failed\n"); 77 return false; 78 } 79 break; 80 default: 81 fprintf(stderr, "Unknown file format\n"); 82 return false; 83 } 84 85 if (FAILED(data_source->openSession(&session_))) { 86 fprintf(stderr, "openSession failed\n"); 87 } 88 89 return true; 90} 91 92bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) { 93 // The line number format is: 94 // <rva> <line number> <source file id> 95 CComPtr<IDiaLineNumber> line; 96 ULONG count; 97 98 while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) { 99 DWORD rva; 100 if (FAILED(line->get_relativeVirtualAddress(&rva))) { 101 fprintf(stderr, "failed to get line rva\n"); 102 return false; 103 } 104 105 DWORD length; 106 if (FAILED(line->get_length(&length))) { 107 fprintf(stderr, "failed to get line code length\n"); 108 return false; 109 } 110 111 DWORD source_id; 112 if (FAILED(line->get_sourceFileId(&source_id))) { 113 fprintf(stderr, "failed to get line source file id\n"); 114 return false; 115 } 116 117 DWORD line_num; 118 if (FAILED(line->get_lineNumber(&line_num))) { 119 fprintf(stderr, "failed to get line number\n"); 120 return false; 121 } 122 123 fprintf(output_, "%x %x %d %d\n", rva, length, line_num, source_id); 124 line.Release(); 125 } 126 return true; 127} 128 129bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function) { 130 // The function format is: 131 // FUNC <address> <length> <param_stack_size> <function> 132 DWORD rva; 133 if (FAILED(function->get_relativeVirtualAddress(&rva))) { 134 fprintf(stderr, "couldn't get rva\n"); 135 return false; 136 } 137 138 ULONGLONG length; 139 if (FAILED(function->get_length(&length))) { 140 fprintf(stderr, "failed to get function length\n"); 141 return false; 142 } 143 144 CComBSTR name; 145 int stack_param_size; 146 if (!GetSymbolFunctionName(function, &name, &stack_param_size)) { 147 return false; 148 } 149 150 // If the decorated name didn't give the parameter size, try to 151 // calculate it. 152 if (stack_param_size < 0) { 153 stack_param_size = GetFunctionStackParamSize(function); 154 } 155 156 fprintf(output_, "FUNC %x %llx %x %ws\n", 157 rva, length, stack_param_size, name); 158 159 CComPtr<IDiaEnumLineNumbers> lines; 160 if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) { 161 return false; 162 } 163 164 if (!PrintLines(lines)) { 165 return false; 166 } 167 return true; 168} 169 170bool PDBSourceLineWriter::PrintSourceFiles() { 171 CComPtr<IDiaSymbol> global; 172 if (FAILED(session_->get_globalScope(&global))) { 173 fprintf(stderr, "get_globalScope failed\n"); 174 return false; 175 } 176 177 CComPtr<IDiaEnumSymbols> compilands; 178 if (FAILED(global->findChildren(SymTagCompiland, NULL, 179 nsNone, &compilands))) { 180 fprintf(stderr, "findChildren failed\n"); 181 return false; 182 } 183 184 CComPtr<IDiaSymbol> compiland; 185 ULONG count; 186 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) { 187 CComPtr<IDiaEnumSourceFiles> source_files; 188 if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) { 189 return false; 190 } 191 CComPtr<IDiaSourceFile> file; 192 while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) { 193 DWORD file_id; 194 if (FAILED(file->get_uniqueId(&file_id))) { 195 return false; 196 } 197 198 CComBSTR file_name; 199 if (FAILED(file->get_fileName(&file_name))) { 200 return false; 201 } 202 203 fwprintf(output_, L"FILE %d %s\n", file_id, file_name); 204 file.Release(); 205 } 206 compiland.Release(); 207 } 208 return true; 209} 210 211bool PDBSourceLineWriter::PrintFunctions() { 212 CComPtr<IDiaEnumSymbolsByAddr> symbols; 213 if (FAILED(session_->getSymbolsByAddr(&symbols))) { 214 fprintf(stderr, "failed to get symbol enumerator\n"); 215 return false; 216 } 217 218 CComPtr<IDiaSymbol> symbol; 219 if (FAILED(symbols->symbolByAddr(1, 0, &symbol))) { 220 fprintf(stderr, "failed to enumerate symbols\n"); 221 return false; 222 } 223 224 DWORD rva_last = 0; 225 if (FAILED(symbol->get_relativeVirtualAddress(&rva_last))) { 226 fprintf(stderr, "failed to get symbol rva\n"); 227 return false; 228 } 229 230 ULONG count; 231 do { 232 DWORD tag; 233 if (FAILED(symbol->get_symTag(&tag))) { 234 fprintf(stderr, "failed to get symbol tag\n"); 235 return false; 236 } 237 238 // For a given function, DIA seems to give either a symbol with 239 // SymTagFunction or SymTagPublicSymbol, but not both. This means 240 // that PDBSourceLineWriter will output either a FUNC or PUBLIC line, 241 // but not both. 242 if (tag == SymTagFunction) { 243 if (!PrintFunction(symbol)) { 244 return false; 245 } 246 } else if (tag == SymTagPublicSymbol) { 247 if (!PrintCodePublicSymbol(symbol)) { 248 return false; 249 } 250 } 251 symbol.Release(); 252 } while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1); 253 254 return true; 255} 256 257bool PDBSourceLineWriter::PrintFrameData() { 258 // It would be nice if it were possible to output frame data alongside the 259 // associated function, as is done with line numbers, but the DIA API 260 // doesn't make it possible to get the frame data in that way. 261 262 CComPtr<IDiaEnumTables> tables; 263 if (FAILED(session_->getEnumTables(&tables))) 264 return false; 265 266 // Pick up the first table that supports IDiaEnumFrameData. 267 CComPtr<IDiaEnumFrameData> frame_data_enum; 268 CComPtr<IDiaTable> table; 269 ULONG count; 270 while (!frame_data_enum && 271 SUCCEEDED(tables->Next(1, &table, &count)) && 272 count == 1) { 273 table->QueryInterface(_uuidof(IDiaEnumFrameData), 274 reinterpret_cast<void**>(&frame_data_enum)); 275 table.Release(); 276 } 277 if (!frame_data_enum) 278 return false; 279 280 CComPtr<IDiaFrameData> frame_data; 281 while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) && 282 count == 1) { 283 DWORD type; 284 if (FAILED(frame_data->get_type(&type))) 285 return false; 286 287 DWORD rva; 288 if (FAILED(frame_data->get_relativeVirtualAddress(&rva))) 289 return false; 290 291 DWORD code_size; 292 if (FAILED(frame_data->get_lengthBlock(&code_size))) 293 return false; 294 295 DWORD prolog_size; 296 if (FAILED(frame_data->get_lengthProlog(&prolog_size))) 297 return false; 298 299 // epliog_size is always 0. 300 DWORD epilog_size = 0; 301 302 // parameter_size is the size of parameters passed on the stack. If any 303 // parameters are not passed on the stack (such as in registers), their 304 // sizes will not be included in parameter_size. 305 DWORD parameter_size; 306 if (FAILED(frame_data->get_lengthParams(¶meter_size))) 307 return false; 308 309 DWORD saved_register_size; 310 if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size))) 311 return false; 312 313 DWORD local_size; 314 if (FAILED(frame_data->get_lengthLocals(&local_size))) 315 return false; 316 317 // get_maxStack can return S_FALSE, just use 0 in that case. 318 DWORD max_stack_size = 0; 319 if (FAILED(frame_data->get_maxStack(&max_stack_size))) 320 return false; 321 322 // get_programString can return S_FALSE, indicating that there is no 323 // program string. In that case, check whether %ebp is used. 324 HRESULT program_string_result; 325 CComBSTR program_string; 326 if (FAILED(program_string_result = frame_data->get_program( 327 &program_string))) { 328 return false; 329 } 330 331 // get_allocatesBasePointer can return S_FALSE, treat that as though 332 // %ebp is not used. 333 BOOL allocates_base_pointer = FALSE; 334 if (program_string_result != S_OK) { 335 if (FAILED(frame_data->get_allocatesBasePointer( 336 &allocates_base_pointer))) { 337 return false; 338 } 339 } 340 341 fprintf(output_, "STACK WIN %x %x %x %x %x %x %x %x %x %d ", 342 type, rva, code_size, prolog_size, epilog_size, 343 parameter_size, saved_register_size, local_size, max_stack_size, 344 program_string_result == S_OK); 345 if (program_string_result == S_OK) { 346 fprintf(output_, "%ws\n", program_string); 347 } else { 348 fprintf(output_, "%d\n", allocates_base_pointer); 349 } 350 351 frame_data.Release(); 352 } 353 354 return true; 355} 356 357bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) { 358 BOOL is_code; 359 if (FAILED(symbol->get_code(&is_code))) { 360 return false; 361 } 362 if (!is_code) { 363 return true; 364 } 365 366 DWORD rva; 367 if (FAILED(symbol->get_relativeVirtualAddress(&rva))) { 368 return false; 369 } 370 371 CComBSTR name; 372 int stack_param_size; 373 if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) { 374 return false; 375 } 376 377 fprintf(output_, "PUBLIC %x %x %ws\n", rva, 378 stack_param_size > 0 ? stack_param_size : 0, name); 379 return true; 380} 381 382// wcstol_positive_strict is sort of like wcstol, but much stricter. string 383// should be a buffer pointing to a null-terminated string containing only 384// decimal digits. If the entire string can be converted to an integer 385// without overflowing, and there are no non-digit characters before the 386// result is set to the value and this function returns true. Otherwise, 387// this function returns false. This is an alternative to the strtol, atoi, 388// and scanf families, which are not as strict about input and in some cases 389// don't provide a good way for the caller to determine if a conversion was 390// successful. 391static bool wcstol_positive_strict(wchar_t *string, int *result) { 392 int value = 0; 393 for (wchar_t *c = string; *c != '\0'; ++c) { 394 int last_value = value; 395 value *= 10; 396 // Detect overflow. 397 if (value / 10 != last_value || value < 0) { 398 return false; 399 } 400 if (*c < '0' || *c > '9') { 401 return false; 402 } 403 unsigned int c_value = *c - '0'; 404 last_value = value; 405 value += c_value; 406 // Detect overflow. 407 if (value < last_value) { 408 return false; 409 } 410 // Forbid leading zeroes unless the string is just "0". 411 if (value == 0 && *(c+1) != '\0') { 412 return false; 413 } 414 } 415 *result = value; 416 return true; 417} 418 419// static 420bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function, 421 BSTR *name, 422 int *stack_param_size) { 423 *stack_param_size = -1; 424 const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS | 425 UNDNAME_NO_FUNCTION_RETURNS | 426 UNDNAME_NO_ALLOCATION_MODEL | 427 UNDNAME_NO_ALLOCATION_LANGUAGE | 428 UNDNAME_NO_THISTYPE | 429 UNDNAME_NO_ACCESS_SPECIFIERS | 430 UNDNAME_NO_THROW_SIGNATURES | 431 UNDNAME_NO_MEMBER_TYPE | 432 UNDNAME_NO_RETURN_UDT_MODEL | 433 UNDNAME_NO_ECSU; 434 435 // Use get_undecoratedNameEx to get readable C++ names with arguments. 436 if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) { 437 if (function->get_name(name) != S_OK) { 438 fprintf(stderr, "failed to get function name\n"); 439 return false; 440 } 441 // If a name comes from get_name because no undecorated form existed, 442 // it's already formatted properly to be used as output. Don't do any 443 // additional processing. 444 } else { 445 // C++ uses a bogus "void" argument for functions and methods that don't 446 // take any parameters. Take it out of the undecorated name because it's 447 // ugly and unnecessary. 448 const wchar_t *replace_string = L"(void)"; 449 const size_t replace_length = wcslen(replace_string); 450 const wchar_t *replacement_string = L"()"; 451 size_t length = wcslen(*name); 452 if (length >= replace_length) { 453 wchar_t *name_end = *name + length - replace_length; 454 if (wcscmp(name_end, replace_string) == 0) { 455 wcscpy_s(name_end, replace_length, replacement_string); 456 length = wcslen(*name); 457 } 458 } 459 460 // Undecorate names used for stdcall and fastcall. These names prefix 461 // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it 462 // with '@' followed by the number of bytes of parameters, in decimal. 463 // If such a name is found, take note of the size and undecorate it. 464 // Only do this for names that aren't C++, which is determined based on 465 // whether the undecorated name contains any ':' or '(' characters. 466 if (!wcschr(*name, ':') && !wcschr(*name, '(') && 467 (*name[0] == '_' || *name[0] == '@')) { 468 wchar_t *last_at = wcsrchr(*name + 1, '@'); 469 if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) { 470 // If this function adheres to the fastcall convention, it accepts up 471 // to the first 8 bytes of parameters in registers (%ecx and %edx). 472 // We're only interested in the stack space used for parameters, so 473 // so subtract 8 and don't let the size go below 0. 474 if (*name[0] == '@') { 475 if (*stack_param_size > 8) { 476 *stack_param_size -= 8; 477 } else { 478 *stack_param_size = 0; 479 } 480 } 481 482 // Undecorate the name by moving it one character to the left in its 483 // buffer, and terminating it where the last '@' had been. 484 wcsncpy_s(*name, length, *name + 1, last_at - *name - 1); 485 } else if (*name[0] == '_') { 486 // This symbol's name is encoded according to the cdecl rules. The 487 // name doesn't end in a '@' character followed by a decimal positive 488 // integer, so it's not a stdcall name. Strip off the leading 489 // underscore. 490 wcsncpy_s(*name, length, *name + 1, length - 1); 491 } 492 } 493 } 494 495 return true; 496} 497 498// static 499int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) { 500 // This implementation is highly x86-specific. 501 502 // Gather the symbols corresponding to data. 503 CComPtr<IDiaEnumSymbols> data_children; 504 if (FAILED(function->findChildren(SymTagData, NULL, nsNone, 505 &data_children))) { 506 return 0; 507 } 508 509 // lowest_base is the lowest %ebp-relative byte offset used for a parameter. 510 // highest_end is one greater than the highest offset (i.e. base + length). 511 // Stack parameters are assumed to be contiguous, because in reality, they 512 // are. 513 int lowest_base = INT_MAX; 514 int highest_end = INT_MIN; 515 516 CComPtr<IDiaSymbol> child; 517 DWORD count; 518 while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) { 519 // If any operation fails at this point, just proceed to the next child. 520 // Use the next_child label instead of continue because child needs to 521 // be released before it's reused. Declare constructable/destructable 522 // types early to avoid gotos that cross initializations. 523 CComPtr<IDiaSymbol> child_type; 524 525 // DataIsObjectPtr is only used for |this|. Because |this| can be passed 526 // as a stack parameter, look for it in addition to traditional 527 // parameters. 528 DWORD child_kind; 529 if (FAILED(child->get_dataKind(&child_kind)) || 530 (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) { 531 goto next_child; 532 } 533 534 // Only concentrate on register-relative parameters. Parameters may also 535 // be enregistered (passed directly in a register), but those don't 536 // consume any stack space, so they're not of interest. 537 DWORD child_location_type; 538 if (FAILED(child->get_locationType(&child_location_type)) || 539 child_location_type != LocIsRegRel) { 540 goto next_child; 541 } 542 543 // Of register-relative parameters, the only ones that make any sense are 544 // %ebp- or %esp-relative. Note that MSVC's debugging information always 545 // gives parameters as %ebp-relative even when a function doesn't use a 546 // traditional frame pointer and stack parameters are accessed relative to 547 // %esp, so just look for %ebp-relative parameters. If you wanted to 548 // access parameters, you'd probably want to treat these %ebp-relative 549 // offsets as if they were relative to %esp before a function's prolog 550 // executed. 551 DWORD child_register; 552 if (FAILED(child->get_registerId(&child_register)) || 553 child_register != CV_REG_EBP) { 554 goto next_child; 555 } 556 557 LONG child_register_offset; 558 if (FAILED(child->get_offset(&child_register_offset))) { 559 goto next_child; 560 } 561 562 if (FAILED(child->get_type(&child_type))) { 563 goto next_child; 564 } 565 566 ULONGLONG child_length; 567 if (FAILED(child_type->get_length(&child_length))) { 568 goto next_child; 569 } 570 571 int child_end = child_register_offset + static_cast<ULONG>(child_length); 572 if (child_register_offset < lowest_base) { 573 lowest_base = child_register_offset; 574 } 575 if (child_end > highest_end) { 576 highest_end = child_end; 577 } 578 579next_child: 580 child.Release(); 581 } 582 583 int param_size = 0; 584 // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest 585 // possible address to find a stack parameter before executing a function's 586 // prolog (see above). Some optimizations cause parameter offsets to be 587 // lower than 4, but we're not concerned with those because we're only 588 // looking for parameters contained in addresses higher than where the 589 // return address is stored. 590 if (lowest_base < 4) { 591 lowest_base = 4; 592 } 593 if (highest_end > lowest_base) { 594 // All stack parameters are pushed as at least 4-byte quantities. If the 595 // last type was narrower than 4 bytes, promote it. This assumes that all 596 // parameters' offsets are 4-byte-aligned, which is always the case. Only 597 // worry about the last type, because we're not summing the type sizes, 598 // just looking at the lowest and highest offsets. 599 int remainder = highest_end % 4; 600 if (remainder) { 601 highest_end += 4 - remainder; 602 } 603 604 param_size = highest_end - lowest_base; 605 } 606 607 return param_size; 608} 609 610bool PDBSourceLineWriter::WriteMap(FILE *map_file) { 611 bool ret = false; 612 output_ = map_file; 613 if (PrintSourceFiles() && PrintFunctions() && PrintFrameData()) { 614 ret = true; 615 } 616 617 output_ = NULL; 618 return ret; 619} 620 621void PDBSourceLineWriter::Close() { 622 session_.Release(); 623} 624 625wstring PDBSourceLineWriter::GetModuleGUID() { 626 CComPtr<IDiaSymbol> global; 627 if (FAILED(session_->get_globalScope(&global))) { 628 return L""; 629 } 630 631 GUID guid; 632 if (FAILED(global->get_guid(&guid))) { 633 return L""; 634 } 635 636 return GUIDString::GUIDToWString(&guid); 637} 638 639} // namespace google_airbag 640