1// Copyright (c) 2010 Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// Author: Alfred Peng 31 32#include <demangle.h> 33#include <fcntl.h> 34#include <gelf.h> 35#include <link.h> 36#include <sys/mman.h> 37#include <stab.h> 38#include <sys/stat.h> 39#include <sys/types.h> 40#include <unistd.h> 41 42#include <functional> 43#include <map> 44#include <vector> 45 46#include "common/scoped_ptr.h" 47#include "common/solaris/dump_symbols.h" 48#include "common/solaris/file_id.h" 49#include "common/solaris/guid_creator.h" 50 51// This namespace contains helper functions. 52namespace { 53 54using std::make_pair; 55 56#if defined(_LP64) 57typedef Elf64_Sym Elf_Sym; 58#else 59typedef Elf32_Sym Elf_Sym; 60#endif 61 62// Symbol table entry from stabs. Sun CC specific. 63struct slist { 64 // String table index. 65 unsigned int n_strx; 66 // Stab type. 67 unsigned char n_type; 68 char n_other; 69 short n_desc; 70 unsigned long n_value; 71}; 72 73// Symbol table entry 74struct SymbolEntry { 75 // Offset from the start of the file. 76 GElf_Addr offset; 77 // Function size. 78 GElf_Word size; 79}; 80 81// Infomation of a line. 82struct LineInfo { 83 // Offset from start of the function. 84 // Load from stab symbol. 85 GElf_Off rva_to_func; 86 // Offset from base of the loading binary. 87 GElf_Off rva_to_base; 88 // Size of the line. 89 // The first line: equals to rva_to_func. 90 // The other lines: the difference of rva_to_func of the line and 91 // rva_to_func of the previous N_SLINE. 92 uint32_t size; 93 // Line number. 94 uint32_t line_num; 95}; 96 97// Information of a function. 98struct FuncInfo { 99 // Name of the function. 100 const char *name; 101 // Offset from the base of the loading address. 102 GElf_Off rva_to_base; 103 // Virtual address of the function. 104 // Load from stab symbol. 105 GElf_Addr addr; 106 // Size of the function. 107 // Equal to rva_to_func of the last function line. 108 uint32_t size; 109 // Total size of stack parameters. 110 uint32_t stack_param_size; 111 // Line information array. 112 std::vector<struct LineInfo> line_info; 113}; 114 115// Information of a source file. 116struct SourceFileInfo { 117 // Name of the source file. 118 const char *name; 119 // Starting address of the source file. 120 GElf_Addr addr; 121 // Id of the source file. 122 int source_id; 123 // Functions information. 124 std::vector<struct FuncInfo> func_info; 125}; 126 127struct CompareString { 128 bool operator()(const char *s1, const char *s2) const { 129 return strcmp(s1, s2) < 0; 130 } 131}; 132 133typedef std::map<const char *, struct SymbolEntry *, CompareString> SymbolMap; 134 135// Information of a symbol table. 136// This is the root of all types of symbol. 137struct SymbolInfo { 138 std::vector<struct SourceFileInfo> source_file_info; 139 // Symbols information. 140 SymbolMap symbol_entries; 141}; 142 143// Stab section name. 144const char *kStabName = ".stab"; 145 146// Stab str section name. 147const char *kStabStrName = ".stabstr"; 148 149// Symtab section name. 150const char *kSymtabName = ".symtab"; 151 152// Strtab section name. 153const char *kStrtabName = ".strtab"; 154 155// Default buffer lenght for demangle. 156const int demangleLen = 20000; 157 158// Offset to the string table. 159uint64_t stringOffset = 0; 160 161// Update the offset to the start of the string index of the next 162// object module for every N_ENDM stabs. 163inline void RecalculateOffset(struct slist* cur_list, char *stabstr) { 164 while ((--cur_list)->n_strx == 0) ; 165 stringOffset += cur_list->n_strx; 166 167 char *temp = stabstr + stringOffset; 168 while (*temp != '\0') { 169 ++stringOffset; 170 ++temp; 171 } 172 // Skip the extra '\0' 173 ++stringOffset; 174} 175 176// Demangle using demangle library on Solaris. 177std::string Demangle(const char *mangled) { 178 int status = 0; 179 std::string str(mangled); 180 char *demangled = (char *)malloc(demangleLen); 181 182 if (!demangled) { 183 fprintf(stderr, "no enough memory.\n"); 184 goto out; 185 } 186 187 if ((status = cplus_demangle(mangled, demangled, demangleLen)) == 188 DEMANGLE_ESPACE) { 189 fprintf(stderr, "incorrect demangle.\n"); 190 goto out; 191 } 192 193 str = demangled; 194 free(demangled); 195 196out: 197 return str; 198} 199 200bool WriteFormat(int fd, const char *fmt, ...) { 201 va_list list; 202 char buffer[4096]; 203 ssize_t expected, written; 204 va_start(list, fmt); 205 vsnprintf(buffer, sizeof(buffer), fmt, list); 206 expected = strlen(buffer); 207 written = write(fd, buffer, strlen(buffer)); 208 va_end(list); 209 return expected == written; 210} 211 212bool IsValidElf(const GElf_Ehdr *elf_header) { 213 return memcmp(elf_header, ELFMAG, SELFMAG) == 0; 214} 215 216static bool FindSectionByName(Elf *elf, const char *name, 217 int shstrndx, 218 GElf_Shdr *shdr) { 219 assert(name != NULL); 220 221 if (strlen(name) == 0) 222 return false; 223 224 Elf_Scn *scn = NULL; 225 226 while ((scn = elf_nextscn(elf, scn)) != NULL) { 227 if (gelf_getshdr(scn, shdr) == (GElf_Shdr *)0) { 228 fprintf(stderr, "failed to read section header: %s\n", elf_errmsg(0)); 229 return false; 230 } 231 232 const char *section_name = elf_strptr(elf, shstrndx, shdr->sh_name); 233 if (!section_name) { 234 fprintf(stderr, "Section name error: %s\n", elf_errmsg(-1)); 235 continue; 236 } 237 238 if (strcmp(section_name, name) == 0) 239 return true; 240 } 241 242 return false; 243} 244 245// The parameter size is used for FPO-optimized code, and 246// this is all tied up with the debugging data for Windows x86. 247// Set it to 0 on Solaris. 248int LoadStackParamSize(struct slist *list, 249 struct slist *list_end, 250 struct FuncInfo *func_info) { 251 struct slist *cur_list = list; 252 int step = 1; 253 while (cur_list < list_end && cur_list->n_type == N_PSYM) { 254 ++cur_list; 255 ++step; 256 } 257 258 func_info->stack_param_size = 0; 259 return step; 260} 261 262int LoadLineInfo(struct slist *list, 263 struct slist *list_end, 264 struct FuncInfo *func_info) { 265 struct slist *cur_list = list; 266 do { 267 // Skip non line information. 268 while (cur_list < list_end && cur_list->n_type != N_SLINE) { 269 // Only exit when got another function, or source file, or end stab. 270 if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO || 271 cur_list->n_type == N_ENDM) { 272 return cur_list - list; 273 } 274 ++cur_list; 275 } 276 struct LineInfo line; 277 while (cur_list < list_end && cur_list->n_type == N_SLINE) { 278 line.rva_to_func = cur_list->n_value; 279 // n_desc is a signed short 280 line.line_num = (unsigned short)cur_list->n_desc; 281 func_info->line_info.push_back(line); 282 ++cur_list; 283 } 284 if (cur_list == list_end && cur_list->n_type == N_ENDM) 285 break; 286 } while (list < list_end); 287 288 return cur_list - list; 289} 290 291int LoadFuncSymbols(struct slist *list, 292 struct slist *list_end, 293 char *stabstr, 294 GElf_Word base, 295 struct SourceFileInfo *source_file_info) { 296 struct slist *cur_list = list; 297 assert(cur_list->n_type == N_SO); 298 ++cur_list; 299 300 source_file_info->func_info.clear(); 301 while (cur_list < list_end) { 302 // Go until the function symbol. 303 while (cur_list < list_end && cur_list->n_type != N_FUN) { 304 if (cur_list->n_type == N_SO) { 305 return cur_list - list; 306 } 307 ++cur_list; 308 if (cur_list->n_type == N_ENDM) 309 RecalculateOffset(cur_list, stabstr); 310 continue; 311 } 312 while (cur_list->n_type == N_FUN) { 313 struct FuncInfo func_info; 314 memset(&func_info, 0, sizeof(func_info)); 315 func_info.name = stabstr + cur_list->n_strx + stringOffset; 316 // The n_value field is always 0 from stab generated by Sun CC. 317 // TODO(Alfred): Find the correct value. 318 func_info.addr = cur_list->n_value; 319 ++cur_list; 320 if (cur_list->n_type == N_ENDM) 321 RecalculateOffset(cur_list, stabstr); 322 if (cur_list->n_type != N_ESYM && cur_list->n_type != N_ISYM && 323 cur_list->n_type != N_FUN) { 324 // Stack parameter size. 325 cur_list += LoadStackParamSize(cur_list, list_end, &func_info); 326 // Line info. 327 cur_list += LoadLineInfo(cur_list, list_end, &func_info); 328 } 329 if (cur_list < list_end && cur_list->n_type == N_ENDM) 330 RecalculateOffset(cur_list, stabstr); 331 // Functions in this module should have address bigger than the module 332 // starting address. 333 // 334 // These two values are always 0 with Sun CC. 335 // TODO(Alfred): Get the correct value or remove the condition statement. 336 if (func_info.addr >= source_file_info->addr) { 337 source_file_info->func_info.push_back(func_info); 338 } 339 } 340 } 341 return cur_list - list; 342} 343 344// Compute size and rva information based on symbols loaded from stab section. 345bool ComputeSizeAndRVA(struct SymbolInfo *symbols) { 346 std::vector<struct SourceFileInfo> *sorted_files = 347 &(symbols->source_file_info); 348 SymbolMap *symbol_entries = &(symbols->symbol_entries); 349 for (size_t i = 0; i < sorted_files->size(); ++i) { 350 struct SourceFileInfo &source_file = (*sorted_files)[i]; 351 std::vector<struct FuncInfo> *sorted_functions = &(source_file.func_info); 352 int func_size = sorted_functions->size(); 353 354 for (size_t j = 0; j < func_size; ++j) { 355 struct FuncInfo &func_info = (*sorted_functions)[j]; 356 int line_count = func_info.line_info.size(); 357 358 // Discard the ending part of the name. 359 std::string func_name(func_info.name); 360 std::string::size_type last_colon = func_name.find_first_of(':'); 361 if (last_colon != std::string::npos) 362 func_name = func_name.substr(0, last_colon); 363 364 // Fine the symbol offset from the loading address and size by name. 365 SymbolMap::const_iterator it = symbol_entries->find(func_name.c_str()); 366 if (it->second) { 367 func_info.rva_to_base = it->second->offset; 368 func_info.size = (line_count == 0) ? 0 : it->second->size; 369 } else { 370 func_info.rva_to_base = 0; 371 func_info.size = 0; 372 } 373 374 // Compute function and line size. 375 for (size_t k = 0; k < line_count; ++k) { 376 struct LineInfo &line_info = func_info.line_info[k]; 377 378 line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base; 379 if (k == line_count - 1) { 380 line_info.size = func_info.size - line_info.rva_to_func; 381 } else { 382 struct LineInfo &next_line = func_info.line_info[k + 1]; 383 line_info.size = next_line.rva_to_func - line_info.rva_to_func; 384 } 385 } // for each line. 386 } // for each function. 387 } // for each source file. 388 for (SymbolMap::iterator it = symbol_entries->begin(); 389 it != symbol_entries->end(); ++it) { 390 free(it->second); 391 } 392 return true; 393} 394 395bool LoadAllSymbols(const GElf_Shdr *stab_section, 396 const GElf_Shdr *stabstr_section, 397 GElf_Word base, 398 struct SymbolInfo *symbols) { 399 if (stab_section == NULL || stabstr_section == NULL) 400 return false; 401 402 char *stabstr = 403 reinterpret_cast<char *>(stabstr_section->sh_offset + base); 404 struct slist *lists = 405 reinterpret_cast<struct slist *>(stab_section->sh_offset + base); 406 int nstab = stab_section->sh_size / sizeof(struct slist); 407 int source_id = 0; 408 409 // First pass, load all symbols from the object file. 410 for (int i = 0; i < nstab; ) { 411 int step = 1; 412 struct slist *cur_list = lists + i; 413 if (cur_list->n_type == N_SO) { 414 // FUNC <address> <size> <param_stack_size> <function> 415 struct SourceFileInfo source_file_info; 416 source_file_info.name = stabstr + cur_list->n_strx + stringOffset; 417 // The n_value field is always 0 from stab generated by Sun CC. 418 // TODO(Alfred): Find the correct value. 419 source_file_info.addr = cur_list->n_value; 420 if (strchr(source_file_info.name, '.')) 421 source_file_info.source_id = source_id++; 422 else 423 source_file_info.source_id = -1; 424 step = LoadFuncSymbols(cur_list, lists + nstab - 1, stabstr, 425 base, &source_file_info); 426 symbols->source_file_info.push_back(source_file_info); 427 } 428 i += step; 429 } 430 // Second pass, compute the size of functions and lines. 431 return ComputeSizeAndRVA(symbols); 432} 433 434bool LoadSymbols(Elf *elf, GElf_Ehdr *elf_header, struct SymbolInfo *symbols, 435 void *obj_base) { 436 GElf_Word base = reinterpret_cast<GElf_Word>(obj_base); 437 438 const GElf_Shdr *sections = 439 reinterpret_cast<GElf_Shdr *>(elf_header->e_shoff + base); 440 GElf_Shdr stab_section; 441 if (!FindSectionByName(elf, kStabName, elf_header->e_shstrndx, 442 &stab_section)) { 443 fprintf(stderr, "Stab section not found.\n"); 444 return false; 445 } 446 GElf_Shdr stabstr_section; 447 if (!FindSectionByName(elf, kStabStrName, elf_header->e_shstrndx, 448 &stabstr_section)) { 449 fprintf(stderr, "Stabstr section not found.\n"); 450 return false; 451 } 452 GElf_Shdr symtab_section; 453 if (!FindSectionByName(elf, kSymtabName, elf_header->e_shstrndx, 454 &symtab_section)) { 455 fprintf(stderr, "Symtab section not found.\n"); 456 return false; 457 } 458 GElf_Shdr strtab_section; 459 if (!FindSectionByName(elf, kStrtabName, elf_header->e_shstrndx, 460 &strtab_section)) { 461 fprintf(stderr, "Strtab section not found.\n"); 462 return false; 463 } 464 465 Elf_Sym *symbol = (Elf_Sym *)((char *)base + symtab_section.sh_offset); 466 for (int i = 0; i < symtab_section.sh_size/symtab_section.sh_entsize; ++i) { 467 struct SymbolEntry *symbol_entry = 468 (struct SymbolEntry *)malloc(sizeof(struct SymbolEntry)); 469 const char *name = reinterpret_cast<char *>( 470 strtab_section.sh_offset + (GElf_Word)base + symbol->st_name); 471 symbol_entry->offset = symbol->st_value; 472 symbol_entry->size = symbol->st_size; 473 symbols->symbol_entries.insert(make_pair(name, symbol_entry)); 474 ++symbol; 475 } 476 477 478 // Load symbols. 479 return LoadAllSymbols(&stab_section, &stabstr_section, base, symbols); 480} 481 482bool WriteModuleInfo(int fd, GElf_Half arch, const std::string &obj_file) { 483 const char *arch_name = NULL; 484 if (arch == EM_386) 485 arch_name = "x86"; 486 else if (arch == EM_X86_64) 487 arch_name = "x86_64"; 488 else if (arch == EM_SPARC32PLUS) 489 arch_name = "SPARC_32+"; 490 else { 491 printf("Please add more ARCH support\n"); 492 return false; 493 } 494 495 unsigned char identifier[16]; 496 google_breakpad::FileID file_id(obj_file.c_str()); 497 if (file_id.ElfFileIdentifier(identifier)) { 498 char identifier_str[40]; 499 file_id.ConvertIdentifierToString(identifier, 500 identifier_str, sizeof(identifier_str)); 501 std::string filename = obj_file; 502 size_t slash_pos = obj_file.find_last_of("/"); 503 if (slash_pos != std::string::npos) 504 filename = obj_file.substr(slash_pos + 1); 505 return WriteFormat(fd, "MODULE solaris %s %s %s\n", arch_name, 506 identifier_str, filename.c_str()); 507 } 508 return false; 509} 510 511bool WriteSourceFileInfo(int fd, const struct SymbolInfo &symbols) { 512 for (size_t i = 0; i < symbols.source_file_info.size(); ++i) { 513 if (symbols.source_file_info[i].source_id != -1) { 514 const char *name = symbols.source_file_info[i].name; 515 if (!WriteFormat(fd, "FILE %d %s\n", 516 symbols.source_file_info[i].source_id, name)) 517 return false; 518 } 519 } 520 return true; 521} 522 523bool WriteOneFunction(int fd, int source_id, 524 const struct FuncInfo &func_info){ 525 // Discard the ending part of the name. 526 std::string func_name(func_info.name); 527 std::string::size_type last_colon = func_name.find_last_of(':'); 528 if (last_colon != std::string::npos) 529 func_name = func_name.substr(0, last_colon); 530 func_name = Demangle(func_name.c_str()); 531 532 if (func_info.size <= 0) 533 return true; 534 535 // rva_to_base could be unsigned long(32 bit) or unsigned long long(64 bit). 536 if (WriteFormat(fd, "FUNC %llx %x %d %s\n", 537 (long long)func_info.rva_to_base, 538 func_info.size, 539 func_info.stack_param_size, 540 func_name.c_str())) { 541 for (size_t i = 0; i < func_info.line_info.size(); ++i) { 542 const struct LineInfo &line_info = func_info.line_info[i]; 543 if (line_info.line_num == 0) 544 return true; 545 if (!WriteFormat(fd, "%llx %x %d %d\n", 546 (long long)line_info.rva_to_base, 547 line_info.size, 548 line_info.line_num, 549 source_id)) 550 return false; 551 } 552 return true; 553 } 554 return false; 555} 556 557bool WriteFunctionInfo(int fd, const struct SymbolInfo &symbols) { 558 for (size_t i = 0; i < symbols.source_file_info.size(); ++i) { 559 const struct SourceFileInfo &file_info = symbols.source_file_info[i]; 560 for (size_t j = 0; j < file_info.func_info.size(); ++j) { 561 const struct FuncInfo &func_info = file_info.func_info[j]; 562 if (!WriteOneFunction(fd, file_info.source_id, func_info)) 563 return false; 564 } 565 } 566 return true; 567} 568 569bool DumpStabSymbols(int fd, const struct SymbolInfo &symbols) { 570 return WriteSourceFileInfo(fd, symbols) && 571 WriteFunctionInfo(fd, symbols); 572} 573 574// 575// FDWrapper 576// 577// Wrapper class to make sure opened file is closed. 578// 579class FDWrapper { 580 public: 581 explicit FDWrapper(int fd) : 582 fd_(fd) { 583 } 584 ~FDWrapper() { 585 if (fd_ != -1) 586 close(fd_); 587 } 588 int get() { 589 return fd_; 590 } 591 int release() { 592 int fd = fd_; 593 fd_ = -1; 594 return fd; 595 } 596 private: 597 int fd_; 598}; 599 600// 601// MmapWrapper 602// 603// Wrapper class to make sure mapped regions are unmapped. 604// 605class MmapWrapper { 606 public: 607 MmapWrapper(void *mapped_address, size_t mapped_size) : 608 base_(mapped_address), size_(mapped_size) { 609 } 610 ~MmapWrapper() { 611 if (base_ != NULL) { 612 assert(size_ > 0); 613 munmap((char *)base_, size_); 614 } 615 } 616 void release() { 617 base_ = NULL; 618 size_ = 0; 619 } 620 621 private: 622 void *base_; 623 size_t size_; 624}; 625 626} // namespace 627 628namespace google_breakpad { 629 630class AutoElfEnder { 631 public: 632 AutoElfEnder(Elf *elf) : elf_(elf) {} 633 ~AutoElfEnder() { if (elf_) elf_end(elf_); } 634 private: 635 Elf *elf_; 636}; 637 638 639bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, int sym_fd) { 640 if (elf_version(EV_CURRENT) == EV_NONE) { 641 fprintf(stderr, "elf_version() failed: %s\n", elf_errmsg(0)); 642 return false; 643 } 644 645 int obj_fd = open(obj_file.c_str(), O_RDONLY); 646 if (obj_fd < 0) 647 return false; 648 FDWrapper obj_fd_wrapper(obj_fd); 649 struct stat st; 650 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) 651 return false; 652 void *obj_base = mmap(NULL, st.st_size, 653 PROT_READ, MAP_PRIVATE, obj_fd, 0); 654 if (obj_base == MAP_FAILED) 655 return false; 656 MmapWrapper map_wrapper(obj_base, st.st_size); 657 GElf_Ehdr elf_header; 658 Elf *elf = elf_begin(obj_fd, ELF_C_READ, NULL); 659 AutoElfEnder elfEnder(elf); 660 661 if (gelf_getehdr(elf, &elf_header) == (GElf_Ehdr *)NULL) { 662 fprintf(stderr, "failed to read elf header: %s\n", elf_errmsg(-1)); 663 return false; 664 } 665 666 if (!IsValidElf(&elf_header)) { 667 fprintf(stderr, "header magic doesn't match\n"); 668 return false; 669 } 670 struct SymbolInfo symbols; 671 if (!LoadSymbols(elf, &elf_header, &symbols, obj_base)) 672 return false; 673 // Write to symbol file. 674 if (WriteModuleInfo(sym_fd, elf_header.e_machine, obj_file) && 675 DumpStabSymbols(sym_fd, symbols)) 676 return true; 677 678 return false; 679} 680 681} // namespace google_breakpad 682