dump_symbols.cc revision 4af5fe0b593f5d79aee8c54f6bc1542b1deed87e
1// Copyright (c) 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Author: Alfred Peng
31
32#include <demangle.h>
33#include <fcntl.h>
34#include <gelf.h>
35#include <link.h>
36#include <sys/mman.h>
37#include <stab.h>
38#include <sys/stat.h>
39#include <sys/types.h>
40#include <unistd.h>
41
42#include <functional>
43#include <map>
44#include <vector>
45
46#include "common/solaris/dump_symbols.h"
47#include "common/solaris/file_id.h"
48#include "common/solaris/guid_creator.h"
49#include "processor/scoped_ptr.h"
50
51// This namespace contains helper functions.
52namespace {
53
54using std::make_pair;
55
56#if defined(_LP64)
57typedef Elf64_Sym   Elf_Sym;
58#else
59typedef Elf32_Sym   Elf_Sym;
60#endif
61
62// Symbol table entry from stabs. Sun CC specific.
63struct slist {
64  // String table index.
65  unsigned int n_strx;
66  // Stab type.
67  unsigned char n_type;
68  char n_other;
69  short n_desc;
70  unsigned long n_value;
71};
72
73// Symbol table entry
74struct SymbolEntry {
75  // Offset from the start of the file.
76  GElf_Addr offset;
77  // Function size.
78  GElf_Word size;
79};
80
81// Infomation of a line.
82struct LineInfo {
83  // Offset from start of the function.
84  // Load from stab symbol.
85  GElf_Off rva_to_func;
86  // Offset from base of the loading binary.
87  GElf_Off rva_to_base;
88  // Size of the line.
89  // The first line: equals to rva_to_func.
90  // The other lines: the difference of rva_to_func of the line and
91  // rva_to_func of the previous N_SLINE.
92  uint32_t size;
93  // Line number.
94  uint32_t line_num;
95};
96
97// Information of a function.
98struct FuncInfo {
99  // Name of the function.
100  const char *name;
101  // Offset from the base of the loading address.
102  GElf_Off rva_to_base;
103  // Virtual address of the function.
104  // Load from stab symbol.
105  GElf_Addr addr;
106  // Size of the function.
107  // Equal to rva_to_func of the last function line.
108  uint32_t size;
109  // Total size of stack parameters.
110  uint32_t stack_param_size;
111  // Line information array.
112  std::vector<struct LineInfo> line_info;
113};
114
115// Information of a source file.
116struct SourceFileInfo {
117  // Name of the source file.
118  const char *name;
119  // Starting address of the source file.
120  GElf_Addr addr;
121  // Id of the source file.
122  int source_id;
123  // Functions information.
124  std::vector<struct FuncInfo> func_info;
125};
126
127struct CompareString {
128  bool operator()(const char *s1, const char *s2) const {
129    return strcmp(s1, s2) < 0;
130  }
131};
132
133typedef std::map<const char *, struct SymbolEntry *, CompareString> SymbolMap;
134
135// Information of a symbol table.
136// This is the root of all types of symbol.
137struct SymbolInfo {
138  std::vector<struct SourceFileInfo> source_file_info;
139  // Symbols information.
140  SymbolMap symbol_entries;
141};
142
143// Stab section name.
144const char *kStabName = ".stab";
145
146// Stab str section name.
147const char *kStabStrName = ".stabstr";
148
149// Symtab section name.
150const char *kSymtabName = ".symtab";
151
152// Strtab section name.
153const char *kStrtabName = ".strtab";
154
155// Default buffer lenght for demangle.
156const int demangleLen = 20000;
157
158// Offset to the string table.
159u_int64_t stringOffset = 0;
160
161// Update the offset to the start of the string index of the next
162// object module for every N_ENDM stabs.
163inline void RecalculateOffset(struct slist* cur_list, char *stabstr) {
164  while ((--cur_list)->n_strx == 0) ;
165  stringOffset += cur_list->n_strx;
166
167  char *temp = stabstr + stringOffset;
168  while (*temp != '\0') {
169    ++stringOffset;
170    ++temp;
171  }
172  // Skip the extra '\0'
173  ++stringOffset;
174}
175
176// Demangle using demangle library on Solaris.
177std::string Demangle(const char *mangled) {
178  int status = 0;
179  std::string str(mangled);
180  char *demangled = (char *)malloc(demangleLen);
181
182  if (!demangled) {
183    fprintf(stderr, "no enough memory.\n");
184    goto out;
185  }
186
187  if ((status = cplus_demangle(mangled, demangled, demangleLen)) ==
188      DEMANGLE_ESPACE) {
189    fprintf(stderr, "incorrect demangle.\n");
190    goto out;
191  }
192
193  str = demangled;
194  free(demangled);
195
196out:
197  return str;
198}
199
200bool WriteFormat(int fd, const char *fmt, ...) {
201  va_list list;
202  char buffer[4096];
203  ssize_t expected, written;
204  va_start(list, fmt);
205  vsnprintf(buffer, sizeof(buffer), fmt, list);
206  expected = strlen(buffer);
207  written = write(fd, buffer, strlen(buffer));
208  va_end(list);
209  return expected == written;
210}
211
212bool IsValidElf(const GElf_Ehdr *elf_header) {
213  return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
214}
215
216static bool FindSectionByName(Elf *elf, const char *name,
217                              int shstrndx,
218                              GElf_Shdr *shdr) {
219  assert(name != NULL);
220
221  if (strlen(name) == 0)
222    return false;
223
224  Elf_Scn *scn = NULL;
225
226  while ((scn = elf_nextscn(elf, scn)) != NULL) {
227    if (gelf_getshdr(scn, shdr) == (GElf_Shdr *)0) {
228      fprintf(stderr, "failed to read section header: %s\n", elf_errmsg(0));
229      return false;
230    }
231
232    const char *section_name = elf_strptr(elf, shstrndx, shdr->sh_name);
233    if (!section_name) {
234      fprintf(stderr, "Section name error: %s\n", elf_errmsg(-1));
235      continue;
236    }
237
238    if (strcmp(section_name, name) == 0)
239      return true;
240  }
241
242  return false;
243}
244
245// The parameter size is used for FPO-optimized code, and
246// this is all tied up with the debugging data for Windows x86.
247// Set it to 0 on Solaris.
248int LoadStackParamSize(struct slist *list,
249                       struct slist *list_end,
250                       struct FuncInfo *func_info) {
251  struct slist *cur_list = list;
252  int step = 1;
253  while (cur_list < list_end && cur_list->n_type == N_PSYM) {
254    ++cur_list;
255    ++step;
256  }
257
258  func_info->stack_param_size = 0;
259  return step;
260}
261
262int LoadLineInfo(struct slist *list,
263                 struct slist *list_end,
264                 struct FuncInfo *func_info) {
265  struct slist *cur_list = list;
266  do {
267    // Skip non line information.
268    while (cur_list < list_end && cur_list->n_type != N_SLINE) {
269      // Only exit when got another function, or source file, or end stab.
270      if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO ||
271          cur_list->n_type == N_ENDM) {
272        return cur_list - list;
273      }
274      ++cur_list;
275    }
276    struct LineInfo line;
277    while (cur_list < list_end && cur_list->n_type == N_SLINE) {
278      line.rva_to_func = cur_list->n_value;
279      // n_desc is a signed short
280      line.line_num = (unsigned short)cur_list->n_desc;
281      func_info->line_info.push_back(line);
282      ++cur_list;
283    }
284    if (cur_list == list_end && cur_list->n_type == N_ENDM)
285      break;
286  } while (list < list_end);
287
288  return cur_list - list;
289}
290
291int LoadFuncSymbols(struct slist *list,
292                    struct slist *list_end,
293                    char *stabstr,
294                    GElf_Word base,
295                    struct SourceFileInfo *source_file_info) {
296  struct slist *cur_list = list;
297  assert(cur_list->n_type == N_SO);
298  ++cur_list;
299
300  source_file_info->func_info.clear();
301  while (cur_list < list_end) {
302    // Go until the function symbol.
303    while (cur_list < list_end && cur_list->n_type != N_FUN) {
304      if (cur_list->n_type == N_SO) {
305        return cur_list - list;
306      }
307      ++cur_list;
308      if (cur_list->n_type == N_ENDM)
309        RecalculateOffset(cur_list, stabstr);
310      continue;
311    }
312    while (cur_list->n_type == N_FUN) {
313      struct FuncInfo func_info;
314      memset(&func_info, 0, sizeof(func_info));
315      func_info.name = stabstr + cur_list->n_strx + stringOffset;
316      // The n_value field is always 0 from stab generated by Sun CC.
317      // TODO(Alfred): Find the correct value.
318      func_info.addr = cur_list->n_value;
319      ++cur_list;
320      if (cur_list->n_type == N_ENDM)
321        RecalculateOffset(cur_list, stabstr);
322      if (cur_list->n_type != N_ESYM && cur_list->n_type != N_ISYM &&
323          cur_list->n_type != N_FUN) {
324        // Stack parameter size.
325        cur_list += LoadStackParamSize(cur_list, list_end, &func_info);
326        // Line info.
327        cur_list += LoadLineInfo(cur_list, list_end, &func_info);
328      }
329      if (cur_list < list_end && cur_list->n_type == N_ENDM)
330        RecalculateOffset(cur_list, stabstr);
331      // Functions in this module should have address bigger than the module
332      // starting address.
333      //
334      // These two values are always 0 with Sun CC.
335      // TODO(Alfred): Get the correct value or remove the condition statement.
336      if (func_info.addr >= source_file_info->addr) {
337        source_file_info->func_info.push_back(func_info);
338      }
339    }
340  }
341  return cur_list - list;
342}
343
344// Compute size and rva information based on symbols loaded from stab section.
345bool ComputeSizeAndRVA(struct SymbolInfo *symbols) {
346  std::vector<struct SourceFileInfo> *sorted_files =
347    &(symbols->source_file_info);
348  SymbolMap *symbol_entries = &(symbols->symbol_entries);
349  for (size_t i = 0; i < sorted_files->size(); ++i) {
350    struct SourceFileInfo &source_file = (*sorted_files)[i];
351    std::vector<struct FuncInfo> *sorted_functions = &(source_file.func_info);
352    int func_size = sorted_functions->size();
353
354    for (size_t j = 0; j < func_size; ++j) {
355      struct FuncInfo &func_info = (*sorted_functions)[j];
356      int line_count = func_info.line_info.size();
357
358      // Discard the ending part of the name.
359      std::string func_name(func_info.name);
360      std::string::size_type last_colon = func_name.find_first_of(':');
361      if (last_colon != std::string::npos)
362        func_name = func_name.substr(0, last_colon);
363
364      // Fine the symbol offset from the loading address and size by name.
365      SymbolMap::const_iterator it = symbol_entries->find(func_name.c_str());
366      if (it->second) {
367        func_info.rva_to_base = it->second->offset;
368        func_info.size = (line_count == 0) ? 0 : it->second->size;
369      } else {
370        func_info.rva_to_base = 0;
371        func_info.size = 0;
372      }
373
374      // Compute function and line size.
375      for (size_t k = 0; k < line_count; ++k) {
376        struct LineInfo &line_info = func_info.line_info[k];
377
378        line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base;
379        if (k == line_count - 1) {
380          line_info.size = func_info.size - line_info.rva_to_func;
381        } else {
382          struct LineInfo &next_line = func_info.line_info[k + 1];
383          line_info.size = next_line.rva_to_func - line_info.rva_to_func;
384        }
385      }  // for each line.
386    }  // for each function.
387  }  // for each source file.
388  for (SymbolMap::iterator it = symbol_entries->begin();
389       it != symbol_entries->end(); ++it) {
390    free(it->second);
391  }
392  return true;
393}
394
395bool LoadAllSymbols(const GElf_Shdr *stab_section,
396                    const GElf_Shdr *stabstr_section,
397                    GElf_Word base,
398                    struct SymbolInfo *symbols) {
399  if (stab_section == NULL || stabstr_section == NULL)
400    return false;
401
402  char *stabstr =
403    reinterpret_cast<char *>(stabstr_section->sh_offset + base);
404  struct slist *lists =
405    reinterpret_cast<struct slist *>(stab_section->sh_offset + base);
406  int nstab = stab_section->sh_size / sizeof(struct slist);
407  int source_id = 0;
408
409  // First pass, load all symbols from the object file.
410  for (int i = 0; i < nstab; ) {
411    int step = 1;
412    struct slist *cur_list = lists + i;
413    if (cur_list->n_type == N_SO) {
414      // FUNC <address> <size> <param_stack_size> <function>
415      struct SourceFileInfo source_file_info;
416      source_file_info.name = stabstr + cur_list->n_strx + stringOffset;
417      // The n_value field is always 0 from stab generated by Sun CC.
418      // TODO(Alfred): Find the correct value.
419      source_file_info.addr = cur_list->n_value;
420      if (strchr(source_file_info.name, '.'))
421        source_file_info.source_id = source_id++;
422      else
423        source_file_info.source_id = -1;
424      step = LoadFuncSymbols(cur_list, lists + nstab - 1, stabstr,
425                             base, &source_file_info);
426      symbols->source_file_info.push_back(source_file_info);
427    }
428    i += step;
429  }
430  // Second pass, compute the size of functions and lines.
431  return ComputeSizeAndRVA(symbols);
432}
433
434bool LoadSymbols(Elf *elf, GElf_Ehdr *elf_header, struct SymbolInfo *symbols,
435                 void *obj_base) {
436  GElf_Word base = reinterpret_cast<GElf_Word>(obj_base);
437
438  const GElf_Shdr *sections =
439    reinterpret_cast<GElf_Shdr *>(elf_header->e_shoff + base);
440  GElf_Shdr stab_section;
441  if (!FindSectionByName(elf, kStabName, elf_header->e_shstrndx,
442                         &stab_section)) {
443    fprintf(stderr, "Stab section not found.\n");
444    return false;
445  }
446  GElf_Shdr stabstr_section;
447  if (!FindSectionByName(elf, kStabStrName, elf_header->e_shstrndx,
448                         &stabstr_section)) {
449    fprintf(stderr, "Stabstr section not found.\n");
450    return false;
451  }
452  GElf_Shdr symtab_section;
453  if (!FindSectionByName(elf, kSymtabName, elf_header->e_shstrndx,
454                         &symtab_section)) {
455    fprintf(stderr, "Symtab section not found.\n");
456    return false;
457  }
458  GElf_Shdr strtab_section;
459  if (!FindSectionByName(elf, kStrtabName, elf_header->e_shstrndx,
460                         &strtab_section)) {
461    fprintf(stderr, "Strtab section not found.\n");
462    return false;
463  }
464
465  Elf_Sym *symbol = (Elf_Sym *)((char *)base + symtab_section.sh_offset);
466  for (int i = 0; i < symtab_section.sh_size/symtab_section.sh_entsize; ++i) {
467    struct SymbolEntry *symbol_entry =
468        (struct SymbolEntry *)malloc(sizeof(struct SymbolEntry));
469    const char *name = reinterpret_cast<char *>(
470        strtab_section.sh_offset + (GElf_Word)base + symbol->st_name);
471    symbol_entry->offset = symbol->st_value;
472    symbol_entry->size = symbol->st_size;
473    symbols->symbol_entries.insert(make_pair(name, symbol_entry));
474    ++symbol;
475  }
476
477
478  // Load symbols.
479  return LoadAllSymbols(&stab_section, &stabstr_section, base, symbols);
480}
481
482bool WriteModuleInfo(int fd, GElf_Half arch, const std::string &obj_file) {
483  const char *arch_name = NULL;
484  if (arch == EM_386)
485    arch_name = "x86";
486  else if (arch == EM_X86_64)
487    arch_name = "x86_64";
488  else if (arch == EM_SPARC32PLUS)
489    arch_name = "SPARC_32+";
490  else {
491    printf("Please add more ARCH support\n");
492    return false;
493  }
494
495  unsigned char identifier[16];
496  google_breakpad::FileID file_id(obj_file.c_str());
497  if (file_id.ElfFileIdentifier(identifier)) {
498    char identifier_str[40];
499    file_id.ConvertIdentifierToString(identifier,
500                                      identifier_str, sizeof(identifier_str));
501    std::string filename = obj_file;
502    size_t slash_pos = obj_file.find_last_of("/");
503    if (slash_pos != std::string::npos)
504      filename = obj_file.substr(slash_pos + 1);
505    return WriteFormat(fd, "MODULE solaris %s %s %s\n", arch_name,
506                       identifier_str, filename.c_str());
507  }
508  return false;
509}
510
511bool WriteSourceFileInfo(int fd, const struct SymbolInfo &symbols) {
512  for (size_t i = 0; i < symbols.source_file_info.size(); ++i) {
513    if (symbols.source_file_info[i].source_id != -1) {
514      const char *name = symbols.source_file_info[i].name;
515      if (!WriteFormat(fd, "FILE %d %s\n",
516                       symbols.source_file_info[i].source_id, name))
517        return false;
518    }
519  }
520  return true;
521}
522
523bool WriteOneFunction(int fd, int source_id,
524                      const struct FuncInfo &func_info){
525  // Discard the ending part of the name.
526  std::string func_name(func_info.name);
527  std::string::size_type last_colon = func_name.find_last_of(':');
528  if (last_colon != std::string::npos)
529    func_name = func_name.substr(0, last_colon);
530  func_name = Demangle(func_name.c_str());
531
532  if (func_info.size <= 0)
533    return true;
534
535  // rva_to_base could be unsigned long(32 bit) or unsigned long long(64 bit).
536  if (WriteFormat(fd, "FUNC %llx %x %d %s\n",
537                  (long long)func_info.rva_to_base,
538                  func_info.size,
539                  func_info.stack_param_size,
540                  func_name.c_str())) {
541    for (size_t i = 0; i < func_info.line_info.size(); ++i) {
542      const struct LineInfo &line_info = func_info.line_info[i];
543      if (line_info.line_num == 0)
544        return true;
545      if (!WriteFormat(fd, "%llx %x %d %d\n",
546                       (long long)line_info.rva_to_base,
547                       line_info.size,
548                       line_info.line_num,
549                       source_id))
550        return false;
551    }
552    return true;
553  }
554  return false;
555}
556
557bool WriteFunctionInfo(int fd, const struct SymbolInfo &symbols) {
558  for (size_t i = 0; i < symbols.source_file_info.size(); ++i) {
559    const struct SourceFileInfo &file_info = symbols.source_file_info[i];
560    for (size_t j = 0; j < file_info.func_info.size(); ++j) {
561      const struct FuncInfo &func_info = file_info.func_info[j];
562      if (!WriteOneFunction(fd, file_info.source_id, func_info))
563        return false;
564    }
565  }
566  return true;
567}
568
569bool DumpStabSymbols(int fd, const struct SymbolInfo &symbols) {
570  return WriteSourceFileInfo(fd, symbols) &&
571    WriteFunctionInfo(fd, symbols);
572}
573
574//
575// FDWrapper
576//
577// Wrapper class to make sure opened file is closed.
578//
579class FDWrapper {
580 public:
581  explicit FDWrapper(int fd) :
582    fd_(fd) {
583    }
584  ~FDWrapper() {
585    if (fd_ != -1)
586      close(fd_);
587  }
588  int get() {
589    return fd_;
590  }
591  int release() {
592    int fd = fd_;
593    fd_ = -1;
594    return fd;
595  }
596 private:
597  int fd_;
598};
599
600//
601// MmapWrapper
602//
603// Wrapper class to make sure mapped regions are unmapped.
604//
605class MmapWrapper {
606 public:
607  MmapWrapper(void *mapped_address, size_t mapped_size) :
608    base_(mapped_address), size_(mapped_size) {
609  }
610  ~MmapWrapper() {
611    if (base_ != NULL) {
612      assert(size_ > 0);
613      munmap((char *)base_, size_);
614    }
615  }
616  void release() {
617    base_ = NULL;
618    size_ = 0;
619  }
620
621 private:
622  void *base_;
623  size_t size_;
624};
625
626}  // namespace
627
628namespace google_breakpad {
629
630class AutoElfEnder {
631 public:
632  AutoElfEnder(Elf *elf) : elf_(elf) {}
633  ~AutoElfEnder() { if (elf_) elf_end(elf_); }
634 private:
635  Elf *elf_;
636};
637
638
639bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, int sym_fd) {
640  if (elf_version(EV_CURRENT) == EV_NONE) {
641    fprintf(stderr, "elf_version() failed: %s\n", elf_errmsg(0));
642    return false;
643  }
644
645  int obj_fd = open(obj_file.c_str(), O_RDONLY);
646  if (obj_fd < 0)
647    return false;
648  FDWrapper obj_fd_wrapper(obj_fd);
649  struct stat st;
650  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0)
651    return false;
652  void *obj_base = mmap(NULL, st.st_size,
653                        PROT_READ, MAP_PRIVATE, obj_fd, 0);
654  if (obj_base == MAP_FAILED))
655    return false;
656  MmapWrapper map_wrapper(obj_base, st.st_size);
657  GElf_Ehdr elf_header;
658  Elf *elf = elf_begin(obj_fd, ELF_C_READ, NULL);
659  AutoElfEnder elfEnder(elf);
660
661  if (gelf_getehdr(elf, &elf_header) == (GElf_Ehdr *)NULL) {
662    fprintf(stderr, "failed to read elf header: %s\n", elf_errmsg(-1));
663    return false;
664  }
665
666  if (!IsValidElf(&elf_header)) {
667    fprintf(stderr, "header magic doesn't match\n");
668    return false;
669  }
670  struct SymbolInfo symbols;
671  if (!LoadSymbols(elf, &elf_header, &symbols, obj_base))
672    return false;
673  // Write to symbol file.
674  if (WriteModuleInfo(sym_fd, elf_header.e_machine, obj_file) &&
675      DumpStabSymbols(sym_fd, symbols))
676    return true;
677
678  return false;
679}
680
681}  // namespace google_breakpad
682