dump_symbols.cc revision dd2ff4a21c57672170eb14ccc5142efd7d92f3f1
1// Copyright (c) 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Author: Alfred Peng
31
32#include <demangle.h>
33#include <fcntl.h>
34#include <gelf.h>
35#include <link.h>
36#include <sys/mman.h>
37#include <stab.h>
38#include <sys/stat.h>
39#include <sys/types.h>
40#include <unistd.h>
41
42#include <functional>
43#include <map>
44#include <vector>
45
46#include "common/solaris/dump_symbols.h"
47#include "common/solaris/file_id.h"
48#include "common/solaris/guid_creator.h"
49#include "processor/scoped_ptr.h"
50
51// This namespace contains helper functions.
52namespace {
53
54using std::make_pair;
55
56#if defined(_LP64)
57typedef Elf64_Sym   Elf_Sym;
58#else
59typedef Elf32_Sym   Elf_Sym;
60#endif
61
62// Symbol table entry from stabs. Sun CC specific.
63struct slist {
64  // String table index.
65  unsigned int n_strx;
66  // Stab type.
67  unsigned char n_type;
68  char n_other;
69  short n_desc;
70  unsigned long n_value;
71};
72
73// Symbol table entry
74struct SymbolEntry {
75  // Offset from the start of the file.
76  GElf_Addr offset;
77  // Function size.
78  GElf_Word size;
79};
80
81// Infomation of a line.
82struct LineInfo {
83  // Offset from start of the function.
84  // Load from stab symbol.
85  GElf_Off rva_to_func;
86  // Offset from base of the loading binary.
87  GElf_Off rva_to_base;
88  // Size of the line.
89  // The first line: equals to rva_to_func.
90  // The other lines: the difference of rva_to_func of the line and
91  // rva_to_func of the previous N_SLINE.
92  uint32_t size;
93  // Line number.
94  uint32_t line_num;
95};
96
97// Information of a function.
98struct FuncInfo {
99  // Name of the function.
100  const char *name;
101  // Offset from the base of the loading address.
102  GElf_Off rva_to_base;
103  // Virtual address of the function.
104  // Load from stab symbol.
105  GElf_Addr addr;
106  // Size of the function.
107  // Equal to rva_to_func of the last function line.
108  uint32_t size;
109  // Total size of stack parameters.
110  uint32_t stack_param_size;
111  // Line information array.
112  std::vector<struct LineInfo> line_info;
113};
114
115// Information of a source file.
116struct SourceFileInfo {
117  // Name of the source file.
118  const char *name;
119  // Starting address of the source file.
120  GElf_Addr addr;
121  // Id of the source file.
122  int source_id;
123  // Functions information.
124  std::vector<struct FuncInfo> func_info;
125};
126
127struct CompareString {
128  bool operator()(const char *s1, const char *s2) const {
129    return strcmp(s1, s2) < 0;
130  }
131};
132
133typedef std::map<const char *, struct SymbolEntry *, CompareString> SymbolMap;
134
135// Information of a symbol table.
136// This is the root of all types of symbol.
137struct SymbolInfo {
138  std::vector<struct SourceFileInfo> source_file_info;
139  // Symbols information.
140  SymbolMap symbol_entries;
141};
142
143// Stab section name.
144const char *kStabName = ".stab";
145
146// Stab str section name.
147const char *kStabStrName = ".stabstr";
148
149// Symtab section name.
150const char *kSymtabName = ".symtab";
151
152// Strtab section name.
153const char *kStrtabName = ".strtab";
154
155// Default buffer lenght for demangle.
156const int demangleLen = 20000;
157
158// Offset to the string table.
159u_int64_t stringOffset = 0;
160
161// Update the offset to the start of the string index of the next
162// object module for every N_ENDM stabs.
163inline void RecalculateOffset(struct slist* cur_list, char *stabstr) {
164  while ((--cur_list)->n_strx == 0) ;
165  stringOffset += cur_list->n_strx;
166
167  char *temp = stabstr + stringOffset;
168  while (*temp != '\0') {
169    ++stringOffset;
170    ++temp;
171  }
172  // Skip the extra '\0'
173  ++stringOffset;
174}
175
176// Demangle using demangle library on Solaris.
177std::string Demangle(const char *mangled) {
178  int status = 0;
179  char *demangled = (char *)malloc(demangleLen);
180  if (!demangled) {
181    fprintf(stderr, "no enough memory.\n");
182    goto out;
183  }
184
185  if ((status = cplus_demangle(mangled, demangled, demangleLen)) ==
186      DEMANGLE_ESPACE) {
187    fprintf(stderr, "incorrect demangle.\n");
188    goto out;
189  }
190
191  std::string str(demangled);
192  free(demangled);
193  return str;
194
195out:
196  return std::string(mangled);
197}
198
199bool WriteFormat(int fd, const char *fmt, ...) {
200  va_list list;
201  char buffer[4096];
202  ssize_t expected, written;
203  va_start(list, fmt);
204  vsnprintf(buffer, sizeof(buffer), fmt, list);
205  expected = strlen(buffer);
206  written = write(fd, buffer, strlen(buffer));
207  va_end(list);
208  return expected == written;
209}
210
211bool IsValidElf(const GElf_Ehdr *elf_header) {
212  return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
213}
214
215static bool FindSectionByName(Elf *elf, const char *name,
216                              int shstrndx,
217                              GElf_Shdr *shdr) {
218  assert(name != NULL);
219
220  if (strlen(name) == 0)
221    return false;
222
223  Elf_Scn *scn = NULL;
224
225  while ((scn = elf_nextscn(elf, scn)) != NULL) {
226    if (gelf_getshdr(scn, shdr) == (GElf_Shdr *)0) {
227      fprintf(stderr, "failed to read section header: %s\n", elf_errmsg(0));
228      return false;
229    }
230
231    const char *section_name = elf_strptr(elf, shstrndx, shdr->sh_name);
232    if (!section_name) {
233      fprintf(stderr, "Section name error: %s\n", elf_errmsg(-1));
234      continue;
235    }
236
237    if (strcmp(section_name, name) == 0)
238      return true;
239  }
240
241  return false;
242}
243
244// The parameter size is used for FPO-optimized code, and
245// this is all tied up with the debugging data for Windows x86.
246// Set it to 0 on Solaris.
247int LoadStackParamSize(struct slist *list,
248                       struct slist *list_end,
249                       struct FuncInfo *func_info) {
250  struct slist *cur_list = list;
251  int step = 1;
252  while (cur_list < list_end && cur_list->n_type == N_PSYM) {
253    ++cur_list;
254    ++step;
255  }
256
257  func_info->stack_param_size = 0;
258  return step;
259}
260
261int LoadLineInfo(struct slist *list,
262                 struct slist *list_end,
263                 struct FuncInfo *func_info) {
264  struct slist *cur_list = list;
265  do {
266    // Skip non line information.
267    while (cur_list < list_end && cur_list->n_type != N_SLINE) {
268      // Only exit when got another function, or source file, or end stab.
269      if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO ||
270          cur_list->n_type == N_ENDM) {
271        return cur_list - list;
272      }
273      ++cur_list;
274    }
275    struct LineInfo line;
276    while (cur_list < list_end && cur_list->n_type == N_SLINE) {
277      line.rva_to_func = cur_list->n_value;
278      // n_desc is a signed short
279      line.line_num = (unsigned short)cur_list->n_desc;
280      func_info->line_info.push_back(line);
281      ++cur_list;
282    }
283    if (cur_list == list_end && cur_list->n_type == N_ENDM)
284      break;
285  } while (list < list_end);
286
287  return cur_list - list;
288}
289
290int LoadFuncSymbols(struct slist *list,
291                    struct slist *list_end,
292                    char *stabstr,
293                    GElf_Word base,
294                    struct SourceFileInfo *source_file_info) {
295  struct slist *cur_list = list;
296  assert(cur_list->n_type == N_SO);
297  ++cur_list;
298
299  source_file_info->func_info.clear();
300  while (cur_list < list_end) {
301    // Go until the function symbol.
302    while (cur_list < list_end && cur_list->n_type != N_FUN) {
303      if (cur_list->n_type == N_SO) {
304        return cur_list - list;
305      }
306      ++cur_list;
307      if (cur_list->n_type == N_ENDM)
308        RecalculateOffset(cur_list, stabstr);
309      continue;
310    }
311    while (cur_list->n_type == N_FUN) {
312      struct FuncInfo func_info;
313      memset(&func_info, 0, sizeof(func_info));
314      func_info.name = stabstr + cur_list->n_strx + stringOffset;
315      // The n_value field is always 0 from stab generated by Sun CC.
316      // TODO(Alfred): Find the correct value.
317      func_info.addr = cur_list->n_value;
318      ++cur_list;
319      if (cur_list->n_type == N_ENDM)
320        RecalculateOffset(cur_list, stabstr);
321      if (cur_list->n_type != N_ESYM && cur_list->n_type != N_ISYM &&
322          cur_list->n_type != N_FUN) {
323        // Stack parameter size.
324        cur_list += LoadStackParamSize(cur_list, list_end, &func_info);
325        // Line info.
326        cur_list += LoadLineInfo(cur_list, list_end, &func_info);
327      }
328      if (cur_list < list_end && cur_list->n_type == N_ENDM)
329        RecalculateOffset(cur_list, stabstr);
330      // Functions in this module should have address bigger than the module
331      // starting address.
332      //
333      // These two values are always 0 with Sun CC.
334      // TODO(Alfred): Get the correct value or remove the condition statement.
335      if (func_info.addr >= source_file_info->addr) {
336        source_file_info->func_info.push_back(func_info);
337      }
338    }
339  }
340  return cur_list - list;
341}
342
343// Compute size and rva information based on symbols loaded from stab section.
344bool ComputeSizeAndRVA(struct SymbolInfo *symbols) {
345  std::vector<struct SourceFileInfo> *sorted_files =
346    &(symbols->source_file_info);
347  SymbolMap *symbol_entries = &(symbols->symbol_entries);
348  for (size_t i = 0; i < sorted_files->size(); ++i) {
349    struct SourceFileInfo &source_file = (*sorted_files)[i];
350    std::vector<struct FuncInfo> *sorted_functions = &(source_file.func_info);
351    int func_size = sorted_functions->size();
352
353    for (size_t j = 0; j < func_size; ++j) {
354      struct FuncInfo &func_info = (*sorted_functions)[j];
355      int line_count = func_info.line_info.size();
356
357      // Discard the ending part of the name.
358      std::string func_name(func_info.name);
359      std::string::size_type last_colon = func_name.find_first_of(':');
360      if (last_colon != std::string::npos)
361        func_name = func_name.substr(0, last_colon);
362
363      // Fine the symbol offset from the loading address and size by name.
364      SymbolMap::const_iterator it = symbol_entries->find(func_name.c_str());
365      if (it->second) {
366        func_info.rva_to_base = it->second->offset;
367        func_info.size = (line_count == 0) ? 0 : it->second->size;
368      } else {
369        func_info.rva_to_base = 0;
370        func_info.size = 0;
371      }
372
373      // Compute function and line size.
374      for (size_t k = 0; k < line_count; ++k) {
375        struct LineInfo &line_info = func_info.line_info[k];
376
377        line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base;
378        if (k == line_count - 1) {
379          line_info.size = func_info.size - line_info.rva_to_func;
380        } else {
381          struct LineInfo &next_line = func_info.line_info[k + 1];
382          line_info.size = next_line.rva_to_func - line_info.rva_to_func;
383        }
384      }  // for each line.
385    }  // for each function.
386  }  // for each source file.
387  for (SymbolMap::iterator it = symbol_entries->begin();
388       it != symbol_entries->end(); ++it) {
389    free(it->second);
390  }
391  return true;
392}
393
394bool LoadAllSymbols(const GElf_Shdr *stab_section,
395                    const GElf_Shdr *stabstr_section,
396                    GElf_Word base,
397                    struct SymbolInfo *symbols) {
398  if (stab_section == NULL || stabstr_section == NULL)
399    return false;
400
401  char *stabstr =
402    reinterpret_cast<char *>(stabstr_section->sh_offset + base);
403  struct slist *lists =
404    reinterpret_cast<struct slist *>(stab_section->sh_offset + base);
405  int nstab = stab_section->sh_size / sizeof(struct slist);
406  int source_id = 0;
407
408  // First pass, load all symbols from the object file.
409  for (int i = 0; i < nstab; ) {
410    int step = 1;
411    struct slist *cur_list = lists + i;
412    if (cur_list->n_type == N_SO) {
413      // FUNC <address> <size> <param_stack_size> <function>
414      struct SourceFileInfo source_file_info;
415      source_file_info.name = stabstr + cur_list->n_strx + stringOffset;
416      // The n_value field is always 0 from stab generated by Sun CC.
417      // TODO(Alfred): Find the correct value.
418      source_file_info.addr = cur_list->n_value;
419      if (strchr(source_file_info.name, '.'))
420        source_file_info.source_id = source_id++;
421      else
422        source_file_info.source_id = -1;
423      step = LoadFuncSymbols(cur_list, lists + nstab - 1, stabstr,
424                             base, &source_file_info);
425      symbols->source_file_info.push_back(source_file_info);
426    }
427    i += step;
428  }
429  // Second pass, compute the size of functions and lines.
430  return ComputeSizeAndRVA(symbols);
431}
432
433bool LoadSymbols(Elf *elf, GElf_Ehdr *elf_header, struct SymbolInfo *symbols,
434                 void *obj_base) {
435  GElf_Word base = reinterpret_cast<GElf_Word>(obj_base);
436
437  const GElf_Shdr *sections =
438    reinterpret_cast<GElf_Shdr *>(elf_header->e_shoff + base);
439  GElf_Shdr stab_section;
440  if (!FindSectionByName(elf, kStabName, elf_header->e_shstrndx,
441                         &stab_section)) {
442    fprintf(stderr, "Stab section not found.\n");
443    return false;
444  }
445  GElf_Shdr stabstr_section;
446  if (!FindSectionByName(elf, kStabStrName, elf_header->e_shstrndx,
447                         &stabstr_section)) {
448    fprintf(stderr, "Stabstr section not found.\n");
449    return false;
450  }
451  GElf_Shdr symtab_section;
452  if (!FindSectionByName(elf, kSymtabName, elf_header->e_shstrndx,
453                         &symtab_section)) {
454    fprintf(stderr, "Symtab section not found.\n");
455    return false;
456  }
457  GElf_Shdr strtab_section;
458  if (!FindSectionByName(elf, kStrtabName, elf_header->e_shstrndx,
459                         &strtab_section)) {
460    fprintf(stderr, "Strtab section not found.\n");
461    return false;
462  }
463
464  Elf_Sym *symbol = (Elf_Sym *)((char *)base + symtab_section.sh_offset);
465  for (int i = 0; i < symtab_section.sh_size/symtab_section.sh_entsize; ++i) {
466    struct SymbolEntry *symbol_entry =
467        (struct SymbolEntry *)malloc(sizeof(struct SymbolEntry));
468    const char *name = reinterpret_cast<char *>(
469        strtab_section.sh_offset + (GElf_Word)base + symbol->st_name);
470    symbol_entry->offset = symbol->st_value;
471    symbol_entry->size = symbol->st_size;
472    symbols->symbol_entries.insert(make_pair(name, symbol_entry));
473    ++symbol;
474  }
475
476
477  // Load symbols.
478  return LoadAllSymbols(&stab_section, &stabstr_section, base, symbols);
479}
480
481bool WriteModuleInfo(int fd, GElf_Half arch, const std::string &obj_file) {
482  const char *arch_name = NULL;
483  if (arch == EM_386)
484    arch_name = "x86";
485  else if (arch == EM_X86_64)
486    arch_name = "x86_64";
487  else if (arch == EM_SPARC32PLUS)
488    arch_name = "SPARC_32+";
489  else {
490    printf("Please add more ARCH support\n");
491    return false;
492  }
493
494  unsigned char identifier[16];
495  google_breakpad::FileID file_id(obj_file.c_str());
496  if (file_id.ElfFileIdentifier(identifier)) {
497    char identifier_str[40];
498    file_id.ConvertIdentifierToString(identifier,
499                                      identifier_str, sizeof(identifier_str));
500    std::string filename = obj_file;
501    size_t slash_pos = obj_file.find_last_of("/");
502    if (slash_pos != std::string::npos)
503      filename = obj_file.substr(slash_pos + 1);
504    return WriteFormat(fd, "MODULE solaris %s %s %s\n", arch_name,
505                       identifier_str, filename.c_str());
506  }
507  return false;
508}
509
510bool WriteSourceFileInfo(int fd, const struct SymbolInfo &symbols) {
511  for (size_t i = 0; i < symbols.source_file_info.size(); ++i) {
512    if (symbols.source_file_info[i].source_id != -1) {
513      const char *name = symbols.source_file_info[i].name;
514      if (!WriteFormat(fd, "FILE %d %s\n",
515                       symbols.source_file_info[i].source_id, name))
516        return false;
517    }
518  }
519  return true;
520}
521
522bool WriteOneFunction(int fd, int source_id,
523                      const struct FuncInfo &func_info){
524  // Discard the ending part of the name.
525  std::string func_name(func_info.name);
526  std::string::size_type last_colon = func_name.find_last_of(':');
527  if (last_colon != std::string::npos)
528    func_name = func_name.substr(0, last_colon);
529  func_name = Demangle(func_name.c_str());
530
531  if (func_info.size <= 0)
532    return true;
533
534  // rva_to_base could be unsigned long(32 bit) or unsigned long long(64 bit).
535  if (WriteFormat(fd, "FUNC %llx %x %d %s\n",
536                  (long long)func_info.rva_to_base,
537                  func_info.size,
538                  func_info.stack_param_size,
539                  func_name.c_str())) {
540    for (size_t i = 0; i < func_info.line_info.size(); ++i) {
541      const struct LineInfo &line_info = func_info.line_info[i];
542      if (line_info.line_num == 0)
543        return true;
544      if (!WriteFormat(fd, "%llx %x %d %d\n",
545                       (long long)line_info.rva_to_base,
546                       line_info.size,
547                       line_info.line_num,
548                       source_id))
549        return false;
550    }
551    return true;
552  }
553  return false;
554}
555
556bool WriteFunctionInfo(int fd, const struct SymbolInfo &symbols) {
557  for (size_t i = 0; i < symbols.source_file_info.size(); ++i) {
558    const struct SourceFileInfo &file_info = symbols.source_file_info[i];
559    for (size_t j = 0; j < file_info.func_info.size(); ++j) {
560      const struct FuncInfo &func_info = file_info.func_info[j];
561      if (!WriteOneFunction(fd, file_info.source_id, func_info))
562        return false;
563    }
564  }
565  return true;
566}
567
568bool DumpStabSymbols(int fd, const struct SymbolInfo &symbols) {
569  return WriteSourceFileInfo(fd, symbols) &&
570    WriteFunctionInfo(fd, symbols);
571}
572
573//
574// FDWrapper
575//
576// Wrapper class to make sure opened file is closed.
577//
578class FDWrapper {
579 public:
580  explicit FDWrapper(int fd) :
581    fd_(fd) {
582    }
583  ~FDWrapper() {
584    if (fd_ != -1)
585      close(fd_);
586  }
587  int get() {
588    return fd_;
589  }
590  int release() {
591    int fd = fd_;
592    fd_ = -1;
593    return fd;
594  }
595 private:
596  int fd_;
597};
598
599//
600// MmapWrapper
601//
602// Wrapper class to make sure mapped regions are unmapped.
603//
604class MmapWrapper {
605 public:
606  MmapWrapper(void *mapped_address, size_t mapped_size) :
607    base_(mapped_address), size_(mapped_size) {
608  }
609  ~MmapWrapper() {
610    if (base_ != NULL) {
611      assert(size_ > 0);
612      munmap((char *)base_, size_);
613    }
614  }
615  void release() {
616    base_ = NULL;
617    size_ = 0;
618  }
619
620 private:
621  void *base_;
622  size_t size_;
623};
624
625}  // namespace
626
627namespace google_breakpad {
628
629class AutoElfEnder {
630 public:
631  AutoElfEnder(Elf *elf) : elf_(elf) {}
632  ~AutoElfEnder() { if (elf_) elf_end(elf_); }
633 private:
634  Elf *elf_;
635};
636
637
638bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, int sym_fd) {
639  if (elf_version(EV_CURRENT) == EV_NONE) {
640    fprintf(stderr, "elf_version() failed: %s\n", elf_errmsg(0));
641    return false;
642  }
643
644  int obj_fd = open(obj_file.c_str(), O_RDONLY);
645  if (obj_fd < 0)
646    return false;
647  FDWrapper obj_fd_wrapper(obj_fd);
648  struct stat st;
649  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0)
650    return false;
651  void *obj_base = mmap(NULL, st.st_size,
652                        PROT_READ, MAP_PRIVATE, obj_fd, 0);
653  if (!obj_base)
654    return false;
655  MmapWrapper map_wrapper(obj_base, st.st_size);
656  GElf_Ehdr elf_header;
657  Elf *elf = elf_begin(obj_fd, ELF_C_READ, NULL);
658  AutoElfEnder elfEnder(elf);
659
660  if (gelf_getehdr(elf, &elf_header) == (GElf_Ehdr *)NULL) {
661    fprintf(stderr, "failed to read elf header: %s\n", elf_errmsg(-1));
662    return false;
663  }
664
665  if (!IsValidElf(&elf_header)) {
666    fprintf(stderr, "header magic doesn't match\n");
667    return false;
668  }
669  struct SymbolInfo symbols;
670  if (!LoadSymbols(elf, &elf_header, &symbols, obj_base))
671    return false;
672  // Write to symbol file.
673  if (WriteModuleInfo(sym_fd, elf_header.e_machine, obj_file) &&
674      DumpStabSymbols(sym_fd, symbols))
675    return true;
676
677  return false;
678}
679
680}  // namespace google_breakpad
681