dump_symbols.cc revision dd2ff4a21c57672170eb14ccc5142efd7d92f3f1
1// Copyright (c) 2007, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// Author: Alfred Peng 31 32#include <demangle.h> 33#include <fcntl.h> 34#include <gelf.h> 35#include <link.h> 36#include <sys/mman.h> 37#include <stab.h> 38#include <sys/stat.h> 39#include <sys/types.h> 40#include <unistd.h> 41 42#include <functional> 43#include <map> 44#include <vector> 45 46#include "common/solaris/dump_symbols.h" 47#include "common/solaris/file_id.h" 48#include "common/solaris/guid_creator.h" 49#include "processor/scoped_ptr.h" 50 51// This namespace contains helper functions. 52namespace { 53 54using std::make_pair; 55 56#if defined(_LP64) 57typedef Elf64_Sym Elf_Sym; 58#else 59typedef Elf32_Sym Elf_Sym; 60#endif 61 62// Symbol table entry from stabs. Sun CC specific. 63struct slist { 64 // String table index. 65 unsigned int n_strx; 66 // Stab type. 67 unsigned char n_type; 68 char n_other; 69 short n_desc; 70 unsigned long n_value; 71}; 72 73// Symbol table entry 74struct SymbolEntry { 75 // Offset from the start of the file. 76 GElf_Addr offset; 77 // Function size. 78 GElf_Word size; 79}; 80 81// Infomation of a line. 82struct LineInfo { 83 // Offset from start of the function. 84 // Load from stab symbol. 85 GElf_Off rva_to_func; 86 // Offset from base of the loading binary. 87 GElf_Off rva_to_base; 88 // Size of the line. 89 // The first line: equals to rva_to_func. 90 // The other lines: the difference of rva_to_func of the line and 91 // rva_to_func of the previous N_SLINE. 92 uint32_t size; 93 // Line number. 94 uint32_t line_num; 95}; 96 97// Information of a function. 98struct FuncInfo { 99 // Name of the function. 100 const char *name; 101 // Offset from the base of the loading address. 102 GElf_Off rva_to_base; 103 // Virtual address of the function. 104 // Load from stab symbol. 105 GElf_Addr addr; 106 // Size of the function. 107 // Equal to rva_to_func of the last function line. 108 uint32_t size; 109 // Total size of stack parameters. 110 uint32_t stack_param_size; 111 // Line information array. 112 std::vector<struct LineInfo> line_info; 113}; 114 115// Information of a source file. 116struct SourceFileInfo { 117 // Name of the source file. 118 const char *name; 119 // Starting address of the source file. 120 GElf_Addr addr; 121 // Id of the source file. 122 int source_id; 123 // Functions information. 124 std::vector<struct FuncInfo> func_info; 125}; 126 127struct CompareString { 128 bool operator()(const char *s1, const char *s2) const { 129 return strcmp(s1, s2) < 0; 130 } 131}; 132 133typedef std::map<const char *, struct SymbolEntry *, CompareString> SymbolMap; 134 135// Information of a symbol table. 136// This is the root of all types of symbol. 137struct SymbolInfo { 138 std::vector<struct SourceFileInfo> source_file_info; 139 // Symbols information. 140 SymbolMap symbol_entries; 141}; 142 143// Stab section name. 144const char *kStabName = ".stab"; 145 146// Stab str section name. 147const char *kStabStrName = ".stabstr"; 148 149// Symtab section name. 150const char *kSymtabName = ".symtab"; 151 152// Strtab section name. 153const char *kStrtabName = ".strtab"; 154 155// Default buffer lenght for demangle. 156const int demangleLen = 20000; 157 158// Offset to the string table. 159u_int64_t stringOffset = 0; 160 161// Update the offset to the start of the string index of the next 162// object module for every N_ENDM stabs. 163inline void RecalculateOffset(struct slist* cur_list, char *stabstr) { 164 while ((--cur_list)->n_strx == 0) ; 165 stringOffset += cur_list->n_strx; 166 167 char *temp = stabstr + stringOffset; 168 while (*temp != '\0') { 169 ++stringOffset; 170 ++temp; 171 } 172 // Skip the extra '\0' 173 ++stringOffset; 174} 175 176// Demangle using demangle library on Solaris. 177std::string Demangle(const char *mangled) { 178 int status = 0; 179 char *demangled = (char *)malloc(demangleLen); 180 if (!demangled) { 181 fprintf(stderr, "no enough memory.\n"); 182 goto out; 183 } 184 185 if ((status = cplus_demangle(mangled, demangled, demangleLen)) == 186 DEMANGLE_ESPACE) { 187 fprintf(stderr, "incorrect demangle.\n"); 188 goto out; 189 } 190 191 std::string str(demangled); 192 free(demangled); 193 return str; 194 195out: 196 return std::string(mangled); 197} 198 199bool WriteFormat(int fd, const char *fmt, ...) { 200 va_list list; 201 char buffer[4096]; 202 ssize_t expected, written; 203 va_start(list, fmt); 204 vsnprintf(buffer, sizeof(buffer), fmt, list); 205 expected = strlen(buffer); 206 written = write(fd, buffer, strlen(buffer)); 207 va_end(list); 208 return expected == written; 209} 210 211bool IsValidElf(const GElf_Ehdr *elf_header) { 212 return memcmp(elf_header, ELFMAG, SELFMAG) == 0; 213} 214 215static bool FindSectionByName(Elf *elf, const char *name, 216 int shstrndx, 217 GElf_Shdr *shdr) { 218 assert(name != NULL); 219 220 if (strlen(name) == 0) 221 return false; 222 223 Elf_Scn *scn = NULL; 224 225 while ((scn = elf_nextscn(elf, scn)) != NULL) { 226 if (gelf_getshdr(scn, shdr) == (GElf_Shdr *)0) { 227 fprintf(stderr, "failed to read section header: %s\n", elf_errmsg(0)); 228 return false; 229 } 230 231 const char *section_name = elf_strptr(elf, shstrndx, shdr->sh_name); 232 if (!section_name) { 233 fprintf(stderr, "Section name error: %s\n", elf_errmsg(-1)); 234 continue; 235 } 236 237 if (strcmp(section_name, name) == 0) 238 return true; 239 } 240 241 return false; 242} 243 244// The parameter size is used for FPO-optimized code, and 245// this is all tied up with the debugging data for Windows x86. 246// Set it to 0 on Solaris. 247int LoadStackParamSize(struct slist *list, 248 struct slist *list_end, 249 struct FuncInfo *func_info) { 250 struct slist *cur_list = list; 251 int step = 1; 252 while (cur_list < list_end && cur_list->n_type == N_PSYM) { 253 ++cur_list; 254 ++step; 255 } 256 257 func_info->stack_param_size = 0; 258 return step; 259} 260 261int LoadLineInfo(struct slist *list, 262 struct slist *list_end, 263 struct FuncInfo *func_info) { 264 struct slist *cur_list = list; 265 do { 266 // Skip non line information. 267 while (cur_list < list_end && cur_list->n_type != N_SLINE) { 268 // Only exit when got another function, or source file, or end stab. 269 if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO || 270 cur_list->n_type == N_ENDM) { 271 return cur_list - list; 272 } 273 ++cur_list; 274 } 275 struct LineInfo line; 276 while (cur_list < list_end && cur_list->n_type == N_SLINE) { 277 line.rva_to_func = cur_list->n_value; 278 // n_desc is a signed short 279 line.line_num = (unsigned short)cur_list->n_desc; 280 func_info->line_info.push_back(line); 281 ++cur_list; 282 } 283 if (cur_list == list_end && cur_list->n_type == N_ENDM) 284 break; 285 } while (list < list_end); 286 287 return cur_list - list; 288} 289 290int LoadFuncSymbols(struct slist *list, 291 struct slist *list_end, 292 char *stabstr, 293 GElf_Word base, 294 struct SourceFileInfo *source_file_info) { 295 struct slist *cur_list = list; 296 assert(cur_list->n_type == N_SO); 297 ++cur_list; 298 299 source_file_info->func_info.clear(); 300 while (cur_list < list_end) { 301 // Go until the function symbol. 302 while (cur_list < list_end && cur_list->n_type != N_FUN) { 303 if (cur_list->n_type == N_SO) { 304 return cur_list - list; 305 } 306 ++cur_list; 307 if (cur_list->n_type == N_ENDM) 308 RecalculateOffset(cur_list, stabstr); 309 continue; 310 } 311 while (cur_list->n_type == N_FUN) { 312 struct FuncInfo func_info; 313 memset(&func_info, 0, sizeof(func_info)); 314 func_info.name = stabstr + cur_list->n_strx + stringOffset; 315 // The n_value field is always 0 from stab generated by Sun CC. 316 // TODO(Alfred): Find the correct value. 317 func_info.addr = cur_list->n_value; 318 ++cur_list; 319 if (cur_list->n_type == N_ENDM) 320 RecalculateOffset(cur_list, stabstr); 321 if (cur_list->n_type != N_ESYM && cur_list->n_type != N_ISYM && 322 cur_list->n_type != N_FUN) { 323 // Stack parameter size. 324 cur_list += LoadStackParamSize(cur_list, list_end, &func_info); 325 // Line info. 326 cur_list += LoadLineInfo(cur_list, list_end, &func_info); 327 } 328 if (cur_list < list_end && cur_list->n_type == N_ENDM) 329 RecalculateOffset(cur_list, stabstr); 330 // Functions in this module should have address bigger than the module 331 // starting address. 332 // 333 // These two values are always 0 with Sun CC. 334 // TODO(Alfred): Get the correct value or remove the condition statement. 335 if (func_info.addr >= source_file_info->addr) { 336 source_file_info->func_info.push_back(func_info); 337 } 338 } 339 } 340 return cur_list - list; 341} 342 343// Compute size and rva information based on symbols loaded from stab section. 344bool ComputeSizeAndRVA(struct SymbolInfo *symbols) { 345 std::vector<struct SourceFileInfo> *sorted_files = 346 &(symbols->source_file_info); 347 SymbolMap *symbol_entries = &(symbols->symbol_entries); 348 for (size_t i = 0; i < sorted_files->size(); ++i) { 349 struct SourceFileInfo &source_file = (*sorted_files)[i]; 350 std::vector<struct FuncInfo> *sorted_functions = &(source_file.func_info); 351 int func_size = sorted_functions->size(); 352 353 for (size_t j = 0; j < func_size; ++j) { 354 struct FuncInfo &func_info = (*sorted_functions)[j]; 355 int line_count = func_info.line_info.size(); 356 357 // Discard the ending part of the name. 358 std::string func_name(func_info.name); 359 std::string::size_type last_colon = func_name.find_first_of(':'); 360 if (last_colon != std::string::npos) 361 func_name = func_name.substr(0, last_colon); 362 363 // Fine the symbol offset from the loading address and size by name. 364 SymbolMap::const_iterator it = symbol_entries->find(func_name.c_str()); 365 if (it->second) { 366 func_info.rva_to_base = it->second->offset; 367 func_info.size = (line_count == 0) ? 0 : it->second->size; 368 } else { 369 func_info.rva_to_base = 0; 370 func_info.size = 0; 371 } 372 373 // Compute function and line size. 374 for (size_t k = 0; k < line_count; ++k) { 375 struct LineInfo &line_info = func_info.line_info[k]; 376 377 line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base; 378 if (k == line_count - 1) { 379 line_info.size = func_info.size - line_info.rva_to_func; 380 } else { 381 struct LineInfo &next_line = func_info.line_info[k + 1]; 382 line_info.size = next_line.rva_to_func - line_info.rva_to_func; 383 } 384 } // for each line. 385 } // for each function. 386 } // for each source file. 387 for (SymbolMap::iterator it = symbol_entries->begin(); 388 it != symbol_entries->end(); ++it) { 389 free(it->second); 390 } 391 return true; 392} 393 394bool LoadAllSymbols(const GElf_Shdr *stab_section, 395 const GElf_Shdr *stabstr_section, 396 GElf_Word base, 397 struct SymbolInfo *symbols) { 398 if (stab_section == NULL || stabstr_section == NULL) 399 return false; 400 401 char *stabstr = 402 reinterpret_cast<char *>(stabstr_section->sh_offset + base); 403 struct slist *lists = 404 reinterpret_cast<struct slist *>(stab_section->sh_offset + base); 405 int nstab = stab_section->sh_size / sizeof(struct slist); 406 int source_id = 0; 407 408 // First pass, load all symbols from the object file. 409 for (int i = 0; i < nstab; ) { 410 int step = 1; 411 struct slist *cur_list = lists + i; 412 if (cur_list->n_type == N_SO) { 413 // FUNC <address> <size> <param_stack_size> <function> 414 struct SourceFileInfo source_file_info; 415 source_file_info.name = stabstr + cur_list->n_strx + stringOffset; 416 // The n_value field is always 0 from stab generated by Sun CC. 417 // TODO(Alfred): Find the correct value. 418 source_file_info.addr = cur_list->n_value; 419 if (strchr(source_file_info.name, '.')) 420 source_file_info.source_id = source_id++; 421 else 422 source_file_info.source_id = -1; 423 step = LoadFuncSymbols(cur_list, lists + nstab - 1, stabstr, 424 base, &source_file_info); 425 symbols->source_file_info.push_back(source_file_info); 426 } 427 i += step; 428 } 429 // Second pass, compute the size of functions and lines. 430 return ComputeSizeAndRVA(symbols); 431} 432 433bool LoadSymbols(Elf *elf, GElf_Ehdr *elf_header, struct SymbolInfo *symbols, 434 void *obj_base) { 435 GElf_Word base = reinterpret_cast<GElf_Word>(obj_base); 436 437 const GElf_Shdr *sections = 438 reinterpret_cast<GElf_Shdr *>(elf_header->e_shoff + base); 439 GElf_Shdr stab_section; 440 if (!FindSectionByName(elf, kStabName, elf_header->e_shstrndx, 441 &stab_section)) { 442 fprintf(stderr, "Stab section not found.\n"); 443 return false; 444 } 445 GElf_Shdr stabstr_section; 446 if (!FindSectionByName(elf, kStabStrName, elf_header->e_shstrndx, 447 &stabstr_section)) { 448 fprintf(stderr, "Stabstr section not found.\n"); 449 return false; 450 } 451 GElf_Shdr symtab_section; 452 if (!FindSectionByName(elf, kSymtabName, elf_header->e_shstrndx, 453 &symtab_section)) { 454 fprintf(stderr, "Symtab section not found.\n"); 455 return false; 456 } 457 GElf_Shdr strtab_section; 458 if (!FindSectionByName(elf, kStrtabName, elf_header->e_shstrndx, 459 &strtab_section)) { 460 fprintf(stderr, "Strtab section not found.\n"); 461 return false; 462 } 463 464 Elf_Sym *symbol = (Elf_Sym *)((char *)base + symtab_section.sh_offset); 465 for (int i = 0; i < symtab_section.sh_size/symtab_section.sh_entsize; ++i) { 466 struct SymbolEntry *symbol_entry = 467 (struct SymbolEntry *)malloc(sizeof(struct SymbolEntry)); 468 const char *name = reinterpret_cast<char *>( 469 strtab_section.sh_offset + (GElf_Word)base + symbol->st_name); 470 symbol_entry->offset = symbol->st_value; 471 symbol_entry->size = symbol->st_size; 472 symbols->symbol_entries.insert(make_pair(name, symbol_entry)); 473 ++symbol; 474 } 475 476 477 // Load symbols. 478 return LoadAllSymbols(&stab_section, &stabstr_section, base, symbols); 479} 480 481bool WriteModuleInfo(int fd, GElf_Half arch, const std::string &obj_file) { 482 const char *arch_name = NULL; 483 if (arch == EM_386) 484 arch_name = "x86"; 485 else if (arch == EM_X86_64) 486 arch_name = "x86_64"; 487 else if (arch == EM_SPARC32PLUS) 488 arch_name = "SPARC_32+"; 489 else { 490 printf("Please add more ARCH support\n"); 491 return false; 492 } 493 494 unsigned char identifier[16]; 495 google_breakpad::FileID file_id(obj_file.c_str()); 496 if (file_id.ElfFileIdentifier(identifier)) { 497 char identifier_str[40]; 498 file_id.ConvertIdentifierToString(identifier, 499 identifier_str, sizeof(identifier_str)); 500 std::string filename = obj_file; 501 size_t slash_pos = obj_file.find_last_of("/"); 502 if (slash_pos != std::string::npos) 503 filename = obj_file.substr(slash_pos + 1); 504 return WriteFormat(fd, "MODULE solaris %s %s %s\n", arch_name, 505 identifier_str, filename.c_str()); 506 } 507 return false; 508} 509 510bool WriteSourceFileInfo(int fd, const struct SymbolInfo &symbols) { 511 for (size_t i = 0; i < symbols.source_file_info.size(); ++i) { 512 if (symbols.source_file_info[i].source_id != -1) { 513 const char *name = symbols.source_file_info[i].name; 514 if (!WriteFormat(fd, "FILE %d %s\n", 515 symbols.source_file_info[i].source_id, name)) 516 return false; 517 } 518 } 519 return true; 520} 521 522bool WriteOneFunction(int fd, int source_id, 523 const struct FuncInfo &func_info){ 524 // Discard the ending part of the name. 525 std::string func_name(func_info.name); 526 std::string::size_type last_colon = func_name.find_last_of(':'); 527 if (last_colon != std::string::npos) 528 func_name = func_name.substr(0, last_colon); 529 func_name = Demangle(func_name.c_str()); 530 531 if (func_info.size <= 0) 532 return true; 533 534 // rva_to_base could be unsigned long(32 bit) or unsigned long long(64 bit). 535 if (WriteFormat(fd, "FUNC %llx %x %d %s\n", 536 (long long)func_info.rva_to_base, 537 func_info.size, 538 func_info.stack_param_size, 539 func_name.c_str())) { 540 for (size_t i = 0; i < func_info.line_info.size(); ++i) { 541 const struct LineInfo &line_info = func_info.line_info[i]; 542 if (line_info.line_num == 0) 543 return true; 544 if (!WriteFormat(fd, "%llx %x %d %d\n", 545 (long long)line_info.rva_to_base, 546 line_info.size, 547 line_info.line_num, 548 source_id)) 549 return false; 550 } 551 return true; 552 } 553 return false; 554} 555 556bool WriteFunctionInfo(int fd, const struct SymbolInfo &symbols) { 557 for (size_t i = 0; i < symbols.source_file_info.size(); ++i) { 558 const struct SourceFileInfo &file_info = symbols.source_file_info[i]; 559 for (size_t j = 0; j < file_info.func_info.size(); ++j) { 560 const struct FuncInfo &func_info = file_info.func_info[j]; 561 if (!WriteOneFunction(fd, file_info.source_id, func_info)) 562 return false; 563 } 564 } 565 return true; 566} 567 568bool DumpStabSymbols(int fd, const struct SymbolInfo &symbols) { 569 return WriteSourceFileInfo(fd, symbols) && 570 WriteFunctionInfo(fd, symbols); 571} 572 573// 574// FDWrapper 575// 576// Wrapper class to make sure opened file is closed. 577// 578class FDWrapper { 579 public: 580 explicit FDWrapper(int fd) : 581 fd_(fd) { 582 } 583 ~FDWrapper() { 584 if (fd_ != -1) 585 close(fd_); 586 } 587 int get() { 588 return fd_; 589 } 590 int release() { 591 int fd = fd_; 592 fd_ = -1; 593 return fd; 594 } 595 private: 596 int fd_; 597}; 598 599// 600// MmapWrapper 601// 602// Wrapper class to make sure mapped regions are unmapped. 603// 604class MmapWrapper { 605 public: 606 MmapWrapper(void *mapped_address, size_t mapped_size) : 607 base_(mapped_address), size_(mapped_size) { 608 } 609 ~MmapWrapper() { 610 if (base_ != NULL) { 611 assert(size_ > 0); 612 munmap((char *)base_, size_); 613 } 614 } 615 void release() { 616 base_ = NULL; 617 size_ = 0; 618 } 619 620 private: 621 void *base_; 622 size_t size_; 623}; 624 625} // namespace 626 627namespace google_breakpad { 628 629class AutoElfEnder { 630 public: 631 AutoElfEnder(Elf *elf) : elf_(elf) {} 632 ~AutoElfEnder() { if (elf_) elf_end(elf_); } 633 private: 634 Elf *elf_; 635}; 636 637 638bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, int sym_fd) { 639 if (elf_version(EV_CURRENT) == EV_NONE) { 640 fprintf(stderr, "elf_version() failed: %s\n", elf_errmsg(0)); 641 return false; 642 } 643 644 int obj_fd = open(obj_file.c_str(), O_RDONLY); 645 if (obj_fd < 0) 646 return false; 647 FDWrapper obj_fd_wrapper(obj_fd); 648 struct stat st; 649 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) 650 return false; 651 void *obj_base = mmap(NULL, st.st_size, 652 PROT_READ, MAP_PRIVATE, obj_fd, 0); 653 if (!obj_base) 654 return false; 655 MmapWrapper map_wrapper(obj_base, st.st_size); 656 GElf_Ehdr elf_header; 657 Elf *elf = elf_begin(obj_fd, ELF_C_READ, NULL); 658 AutoElfEnder elfEnder(elf); 659 660 if (gelf_getehdr(elf, &elf_header) == (GElf_Ehdr *)NULL) { 661 fprintf(stderr, "failed to read elf header: %s\n", elf_errmsg(-1)); 662 return false; 663 } 664 665 if (!IsValidElf(&elf_header)) { 666 fprintf(stderr, "header magic doesn't match\n"); 667 return false; 668 } 669 struct SymbolInfo symbols; 670 if (!LoadSymbols(elf, &elf_header, &symbols, obj_base)) 671 return false; 672 // Write to symbol file. 673 if (WriteModuleInfo(sym_fd, elf_header.e_machine, obj_file) && 674 DumpStabSymbols(sym_fd, symbols)) 675 return true; 676 677 return false; 678} 679 680} // namespace google_breakpad 681