1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// Author: Satoru Takabayashi
31// Stack-footprint reduction work done by Raksit Ashok
32//
33// Implementation note:
34//
35// We don't use heaps but only use stacks.  We want to reduce the
36// stack consumption so that the symbolizer can run on small stacks.
37//
38// Here are some numbers collected with GCC 4.1.0 on x86:
39// - sizeof(Elf32_Sym)  = 16
40// - sizeof(Elf32_Shdr) = 40
41// - sizeof(Elf64_Sym)  = 24
42// - sizeof(Elf64_Shdr) = 64
43//
44// This implementation is intended to be async-signal-safe but uses
45// some functions which are not guaranteed to be so, such as memchr()
46// and memmove().  We assume they are async-signal-safe.
47//
48// Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE
49// macro to add platform specific defines (e.g. OS_OPENBSD).
50
51#ifdef GLOG_BUILD_CONFIG_INCLUDE
52#include GLOG_BUILD_CONFIG_INCLUDE
53#endif  // GLOG_BUILD_CONFIG_INCLUDE
54
55#include "utilities.h"
56
57#if defined(HAVE_SYMBOLIZE)
58
59#include <limits>
60
61#include "symbolize.h"
62#include "demangle.h"
63
64_START_GOOGLE_NAMESPACE_
65
66// We don't use assert() since it's not guaranteed to be
67// async-signal-safe.  Instead we define a minimal assertion
68// macro. So far, we don't need pretty printing for __FILE__, etc.
69
70// A wrapper for abort() to make it callable in ? :.
71static int AssertFail() {
72  abort();
73  return 0;  // Should not reach.
74}
75
76#define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail())
77
78static SymbolizeCallback g_symbolize_callback = NULL;
79void InstallSymbolizeCallback(SymbolizeCallback callback) {
80  g_symbolize_callback = callback;
81}
82
83static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback =
84    NULL;
85void InstallSymbolizeOpenObjectFileCallback(
86    SymbolizeOpenObjectFileCallback callback) {
87  g_symbolize_open_object_file_callback = callback;
88}
89
90// This function wraps the Demangle function to provide an interface
91// where the input symbol is demangled in-place.
92// To keep stack consumption low, we would like this function to not
93// get inlined.
94static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) {
95  char demangled[256];  // Big enough for sane demangled symbols.
96  if (Demangle(out, demangled, sizeof(demangled))) {
97    // Demangling succeeded. Copy to out if the space allows.
98    size_t len = strlen(demangled);
99    if (len + 1 <= (size_t)out_size) {  // +1 for '\0'.
100      SAFE_ASSERT(len < sizeof(demangled));
101      memmove(out, demangled, len + 1);
102    }
103  }
104}
105
106_END_GOOGLE_NAMESPACE_
107
108#if defined(__ELF__)
109
110#include <dlfcn.h>
111#if defined(OS_OPENBSD)
112#include <sys/exec_elf.h>
113#else
114#include <elf.h>
115#endif
116#include <errno.h>
117#include <fcntl.h>
118#include <limits.h>
119#include <stdint.h>
120#include <stdio.h>
121#include <stdlib.h>
122#include <stddef.h>
123#include <string.h>
124#include <sys/stat.h>
125#include <sys/types.h>
126#include <unistd.h>
127
128#include "symbolize.h"
129#include "config.h"
130#include "glog/raw_logging.h"
131
132// Re-runs fn until it doesn't cause EINTR.
133#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
134
135_START_GOOGLE_NAMESPACE_
136
137// Read up to "count" bytes from file descriptor "fd" into the buffer
138// starting at "buf" while handling short reads and EINTR.  On
139// success, return the number of bytes read.  Otherwise, return -1.
140static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) {
141  SAFE_ASSERT(fd >= 0);
142  SAFE_ASSERT(count <= std::numeric_limits<ssize_t>::max());
143  char *buf0 = reinterpret_cast<char *>(buf);
144  ssize_t num_bytes = 0;
145  while (num_bytes < count) {
146    ssize_t len;
147    NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes));
148    if (len < 0) {  // There was an error other than EINTR.
149      return -1;
150    }
151    if (len == 0) {  // Reached EOF.
152      break;
153    }
154    num_bytes += len;
155  }
156  SAFE_ASSERT(num_bytes <= count);
157  return num_bytes;
158}
159
160// Read up to "count" bytes from "offset" in the file pointed by file
161// descriptor "fd" into the buffer starting at "buf".  On success,
162// return the number of bytes read.  Otherwise, return -1.
163static ssize_t ReadFromOffset(const int fd, void *buf,
164                              const size_t count, const off_t offset) {
165  off_t off = lseek(fd, offset, SEEK_SET);
166  if (off == (off_t)-1) {
167    return -1;
168  }
169  return ReadPersistent(fd, buf, count);
170}
171
172// Try reading exactly "count" bytes from "offset" bytes in a file
173// pointed by "fd" into the buffer starting at "buf" while handling
174// short reads and EINTR.  On success, return true. Otherwise, return
175// false.
176static bool ReadFromOffsetExact(const int fd, void *buf,
177                                const size_t count, const off_t offset) {
178  ssize_t len = ReadFromOffset(fd, buf, count, offset);
179  return len == count;
180}
181
182// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
183static int FileGetElfType(const int fd) {
184  ElfW(Ehdr) elf_header;
185  if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
186    return -1;
187  }
188  if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
189    return -1;
190  }
191  return elf_header.e_type;
192}
193
194// Read the section headers in the given ELF binary, and if a section
195// of the specified type is found, set the output to this section header
196// and return true.  Otherwise, return false.
197// To keep stack consumption low, we would like this function to not get
198// inlined.
199static ATTRIBUTE_NOINLINE bool
200GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset,
201                       ElfW(Word) type, ElfW(Shdr) *out) {
202  // Read at most 16 section headers at a time to save read calls.
203  ElfW(Shdr) buf[16];
204  for (int i = 0; i < sh_num;) {
205    const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]);
206    const ssize_t num_bytes_to_read =
207        (sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf);
208    const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read,
209                                       sh_offset + i * sizeof(buf[0]));
210    SAFE_ASSERT(len % sizeof(buf[0]) == 0);
211    const ssize_t num_headers_in_buf = len / sizeof(buf[0]);
212    SAFE_ASSERT(num_headers_in_buf <= sizeof(buf) / sizeof(buf[0]));
213    for (int j = 0; j < num_headers_in_buf; ++j) {
214      if (buf[j].sh_type == type) {
215        *out = buf[j];
216        return true;
217      }
218    }
219    i += num_headers_in_buf;
220  }
221  return false;
222}
223
224// There is no particular reason to limit section name to 63 characters,
225// but there has (as yet) been no need for anything longer either.
226const int kMaxSectionNameLen = 64;
227
228// name_len should include terminating '\0'.
229bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
230                            ElfW(Shdr) *out) {
231  ElfW(Ehdr) elf_header;
232  if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
233    return false;
234  }
235
236  ElfW(Shdr) shstrtab;
237  off_t shstrtab_offset = (elf_header.e_shoff +
238                           elf_header.e_shentsize * elf_header.e_shstrndx);
239  if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
240    return false;
241  }
242
243  for (int i = 0; i < elf_header.e_shnum; ++i) {
244    off_t section_header_offset = (elf_header.e_shoff +
245                                   elf_header.e_shentsize * i);
246    if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
247      return false;
248    }
249    char header_name[kMaxSectionNameLen];
250    if (sizeof(header_name) < name_len) {
251      RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); "
252              "section will not be found (even if present).", name, name_len);
253      // No point in even trying.
254      return false;
255    }
256    off_t name_offset = shstrtab.sh_offset + out->sh_name;
257    ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
258    if (n_read == -1) {
259      return false;
260    } else if (n_read != name_len) {
261      // Short read -- name could be at end of file.
262      continue;
263    }
264    if (memcmp(header_name, name, name_len) == 0) {
265      return true;
266    }
267  }
268  return false;
269}
270
271// Read a symbol table and look for the symbol containing the
272// pc. Iterate over symbols in a symbol table and look for the symbol
273// containing "pc".  On success, return true and write the symbol name
274// to out.  Otherwise, return false.
275// To keep stack consumption low, we would like this function to not get
276// inlined.
277static ATTRIBUTE_NOINLINE bool
278FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
279           uint64_t symbol_offset, const ElfW(Shdr) *strtab,
280           const ElfW(Shdr) *symtab) {
281  if (symtab == NULL) {
282    return false;
283  }
284  const int num_symbols = symtab->sh_size / symtab->sh_entsize;
285  for (int i = 0; i < num_symbols;) {
286    off_t offset = symtab->sh_offset + i * symtab->sh_entsize;
287
288    // If we are reading Elf64_Sym's, we want to limit this array to
289    // 32 elements (to keep stack consumption low), otherwise we can
290    // have a 64 element Elf32_Sym array.
291#if __WORDSIZE == 64
292#define NUM_SYMBOLS 32
293#else
294#define NUM_SYMBOLS 64
295#endif
296
297    // Read at most NUM_SYMBOLS symbols at once to save read() calls.
298    ElfW(Sym) buf[NUM_SYMBOLS];
299    const ssize_t len = ReadFromOffset(fd, &buf, sizeof(buf), offset);
300    SAFE_ASSERT(len % sizeof(buf[0]) == 0);
301    const ssize_t num_symbols_in_buf = len / sizeof(buf[0]);
302    SAFE_ASSERT(num_symbols_in_buf <= sizeof(buf)/sizeof(buf[0]));
303    for (int j = 0; j < num_symbols_in_buf; ++j) {
304      const ElfW(Sym)& symbol = buf[j];
305      uint64_t start_address = symbol.st_value;
306      start_address += symbol_offset;
307      uint64_t end_address = start_address + symbol.st_size;
308      if (symbol.st_value != 0 &&  // Skip null value symbols.
309          symbol.st_shndx != 0 &&  // Skip undefined symbols.
310          start_address <= pc && pc < end_address) {
311        ssize_t len1 = ReadFromOffset(fd, out, out_size,
312                                      strtab->sh_offset + symbol.st_name);
313        if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) {
314          return false;
315        }
316        return true;  // Obtained the symbol name.
317      }
318    }
319    i += num_symbols_in_buf;
320  }
321  return false;
322}
323
324// Get the symbol name of "pc" from the file pointed by "fd".  Process
325// both regular and dynamic symbol tables if necessary.  On success,
326// write the symbol name to "out" and return true.  Otherwise, return
327// false.
328static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
329                                    char *out, int out_size,
330                                    uint64_t map_start_address) {
331  // Read the ELF header.
332  ElfW(Ehdr) elf_header;
333  if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
334    return false;
335  }
336
337  uint64_t symbol_offset = 0;
338  if (elf_header.e_type == ET_DYN) {  // DSO needs offset adjustment.
339    symbol_offset = map_start_address;
340  }
341
342  ElfW(Shdr) symtab, strtab;
343
344  // Consult a regular symbol table first.
345  if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
346                             SHT_SYMTAB, &symtab)) {
347    if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
348                             symtab.sh_link * sizeof(symtab))) {
349      return false;
350    }
351    if (FindSymbol(pc, fd, out, out_size, symbol_offset,
352                   &strtab, &symtab)) {
353      return true;  // Found the symbol in a regular symbol table.
354    }
355  }
356
357  // If the symbol is not found, then consult a dynamic symbol table.
358  if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
359                             SHT_DYNSYM, &symtab)) {
360    if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
361                             symtab.sh_link * sizeof(symtab))) {
362      return false;
363    }
364    if (FindSymbol(pc, fd, out, out_size, symbol_offset,
365                   &strtab, &symtab)) {
366      return true;  // Found the symbol in a dynamic symbol table.
367    }
368  }
369
370  return false;
371}
372
373namespace {
374// Thin wrapper around a file descriptor so that the file descriptor
375// gets closed for sure.
376struct FileDescriptor {
377  const int fd_;
378  explicit FileDescriptor(int fd) : fd_(fd) {}
379  ~FileDescriptor() {
380    if (fd_ >= 0) {
381      NO_INTR(close(fd_));
382    }
383  }
384  int get() { return fd_; }
385
386 private:
387  explicit FileDescriptor(const FileDescriptor&);
388  void operator=(const FileDescriptor&);
389};
390
391// Helper class for reading lines from file.
392//
393// Note: we don't use ProcMapsIterator since the object is big (it has
394// a 5k array member) and uses async-unsafe functions such as sscanf()
395// and snprintf().
396class LineReader {
397 public:
398  explicit LineReader(int fd, char *buf, int buf_len) : fd_(fd),
399    buf_(buf), buf_len_(buf_len), bol_(buf), eol_(buf), eod_(buf) {
400  }
401
402  // Read '\n'-terminated line from file.  On success, modify "bol"
403  // and "eol", then return true.  Otherwise, return false.
404  //
405  // Note: if the last line doesn't end with '\n', the line will be
406  // dropped.  It's an intentional behavior to make the code simple.
407  bool ReadLine(const char **bol, const char **eol) {
408    if (BufferIsEmpty()) {  // First time.
409      const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_);
410      if (num_bytes <= 0) {  // EOF or error.
411        return false;
412      }
413      eod_ = buf_ + num_bytes;
414      bol_ = buf_;
415    } else {
416      bol_ = eol_ + 1;  // Advance to the next line in the buffer.
417      SAFE_ASSERT(bol_ <= eod_);  // "bol_" can point to "eod_".
418      if (!HasCompleteLine()) {
419        const int incomplete_line_length = eod_ - bol_;
420        // Move the trailing incomplete line to the beginning.
421        memmove(buf_, bol_, incomplete_line_length);
422        // Read text from file and append it.
423        char * const append_pos = buf_ + incomplete_line_length;
424        const int capacity_left = buf_len_ - incomplete_line_length;
425        const ssize_t num_bytes = ReadPersistent(fd_, append_pos,
426                                                 capacity_left);
427        if (num_bytes <= 0) {  // EOF or error.
428          return false;
429        }
430        eod_ = append_pos + num_bytes;
431        bol_ = buf_;
432      }
433    }
434    eol_ = FindLineFeed();
435    if (eol_ == NULL) {  // '\n' not found.  Malformed line.
436      return false;
437    }
438    *eol_ = '\0';  // Replace '\n' with '\0'.
439
440    *bol = bol_;
441    *eol = eol_;
442    return true;
443  }
444
445  // Beginning of line.
446  const char *bol() {
447    return bol_;
448  }
449
450  // End of line.
451  const char *eol() {
452    return eol_;
453  }
454
455 private:
456  explicit LineReader(const LineReader&);
457  void operator=(const LineReader&);
458
459  char *FindLineFeed() {
460    return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_));
461  }
462
463  bool BufferIsEmpty() {
464    return buf_ == eod_;
465  }
466
467  bool HasCompleteLine() {
468    return !BufferIsEmpty() && FindLineFeed() != NULL;
469  }
470
471  const int fd_;
472  char * const buf_;
473  const int buf_len_;
474  char *bol_;
475  char *eol_;
476  const char *eod_;  // End of data in "buf_".
477};
478}  // namespace
479
480// Place the hex number read from "start" into "*hex".  The pointer to
481// the first non-hex character or "end" is returned.
482static char *GetHex(const char *start, const char *end, uint64_t *hex) {
483  *hex = 0;
484  const char *p;
485  for (p = start; p < end; ++p) {
486    int ch = *p;
487    if ((ch >= '0' && ch <= '9') ||
488        (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) {
489      *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
490    } else {  // Encountered the first non-hex character.
491      break;
492    }
493  }
494  SAFE_ASSERT(p <= end);
495  return const_cast<char *>(p);
496}
497
498// Searches for the object file (from /proc/self/maps) that contains
499// the specified pc.  If found, sets |start_address| to the start address
500// of where this object file is mapped in memory, sets the module base
501// address into |base_address|, copies the object file name into
502// |out_file_name|, and attempts to open the object file.  If the object
503// file is opened successfully, returns the file descriptor.  Otherwise,
504// returns -1.  |out_file_name_size| is the size of the file name buffer
505// (including the null-terminator).
506static ATTRIBUTE_NOINLINE int
507OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
508                                             uint64_t &start_address,
509                                             uint64_t &base_address,
510                                             char *out_file_name,
511                                             int out_file_name_size) {
512  int object_fd;
513
514  // Open /proc/self/maps.
515  int maps_fd;
516  NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
517  FileDescriptor wrapped_maps_fd(maps_fd);
518  if (wrapped_maps_fd.get() < 0) {
519    return -1;
520  }
521
522  // Iterate over maps and look for the map containing the pc.  Then
523  // look into the symbol tables inside.
524  char buf[1024];  // Big enough for line of sane /proc/self/maps
525  int num_maps = 0;
526  LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf));
527  while (true) {
528    num_maps++;
529    const char *cursor;
530    const char *eol;
531    if (!reader.ReadLine(&cursor, &eol)) {  // EOF or malformed line.
532      return -1;
533    }
534
535    // Start parsing line in /proc/self/maps.  Here is an example:
536    //
537    // 08048000-0804c000 r-xp 00000000 08:01 2142121    /bin/cat
538    //
539    // We want start address (08048000), end address (0804c000), flags
540    // (r-xp) and file name (/bin/cat).
541
542    // Read start address.
543    cursor = GetHex(cursor, eol, &start_address);
544    if (cursor == eol || *cursor != '-') {
545      return -1;  // Malformed line.
546    }
547    ++cursor;  // Skip '-'.
548
549    // Read end address.
550    uint64_t end_address;
551    cursor = GetHex(cursor, eol, &end_address);
552    if (cursor == eol || *cursor != ' ') {
553      return -1;  // Malformed line.
554    }
555    ++cursor;  // Skip ' '.
556
557    // Check start and end addresses.
558    if (!(start_address <= pc && pc < end_address)) {
559      continue;  // We skip this map.  PC isn't in this map.
560    }
561
562    // Read flags.  Skip flags until we encounter a space or eol.
563    const char * const flags_start = cursor;
564    while (cursor < eol && *cursor != ' ') {
565      ++cursor;
566    }
567    // We expect at least four letters for flags (ex. "r-xp").
568    if (cursor == eol || cursor < flags_start + 4) {
569      return -1;  // Malformed line.
570    }
571
572    // Check flags.  We are only interested in "r-x" maps.
573    if (memcmp(flags_start, "r-x", 3) != 0) {  // Not a "r-x" map.
574      continue;  // We skip this map.
575    }
576    ++cursor;  // Skip ' '.
577
578    // Read file offset.
579    uint64_t file_offset;
580    cursor = GetHex(cursor, eol, &file_offset);
581    if (cursor == eol || *cursor != ' ') {
582      return -1;  // Malformed line.
583    }
584    ++cursor;  // Skip ' '.
585
586    // Don't subtract 'start_address' from the first entry:
587    // * If a binary is compiled w/o -pie, then the first entry in
588    //   process maps is likely the binary itself (all dynamic libs
589    //   are mapped higher in address space). For such a binary,
590    //   instruction offset in binary coincides with the actual
591    //   instruction address in virtual memory (as code section
592    //   is mapped to a fixed memory range).
593    // * If a binary is compiled with -pie, all the modules are
594    //   mapped high at address space (in particular, higher than
595    //   shadow memory of the tool), so the module can't be the
596    //   first entry.
597    base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;
598
599    // Skip to file name.  "cursor" now points to dev.  We need to
600    // skip at least two spaces for dev and inode.
601    int num_spaces = 0;
602    while (cursor < eol) {
603      if (*cursor == ' ') {
604        ++num_spaces;
605      } else if (num_spaces >= 2) {
606        // The first non-space character after skipping two spaces
607        // is the beginning of the file name.
608        break;
609      }
610      ++cursor;
611    }
612    if (cursor == eol) {
613      return -1;  // Malformed line.
614    }
615
616    // Finally, "cursor" now points to file name of our interest.
617    NO_INTR(object_fd = open(cursor, O_RDONLY));
618    if (object_fd < 0) {
619      // Failed to open object file.  Copy the object file name to
620      // |out_file_name|.
621      strncpy(out_file_name, cursor, out_file_name_size);
622      // Making sure |out_file_name| is always null-terminated.
623      out_file_name[out_file_name_size - 1] = '\0';
624      return -1;
625    }
626    return object_fd;
627  }
628}
629
630// POSIX doesn't define any async-signal safe function for converting
631// an integer to ASCII. We'll have to define our own version.
632// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
633// conversion was successful or NULL otherwise. It never writes more than "sz"
634// bytes. Output will be truncated as needed, and a NUL character is always
635// appended.
636// NOTE: code from sandbox/linux/seccomp-bpf/demo.cc.
637char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) {
638  // Make sure we can write at least one NUL byte.
639  size_t n = 1;
640  if (n > sz)
641    return NULL;
642
643  if (base < 2 || base > 16) {
644    buf[0] = '\000';
645    return NULL;
646  }
647
648  char *start = buf;
649
650  uintptr_t j = i;
651
652  // Handle negative numbers (only for base 10).
653  if (i < 0 && base == 10) {
654    j = -i;
655
656    // Make sure we can write the '-' character.
657    if (++n > sz) {
658      buf[0] = '\000';
659      return NULL;
660    }
661    *start++ = '-';
662  }
663
664  // Loop until we have converted the entire number. Output at least one
665  // character (i.e. '0').
666  char *ptr = start;
667  do {
668    // Make sure there is still enough space left in our output buffer.
669    if (++n > sz) {
670      buf[0] = '\000';
671      return NULL;
672    }
673
674    // Output the next digit.
675    *ptr++ = "0123456789abcdef"[j % base];
676    j /= base;
677
678    if (padding > 0)
679      padding--;
680  } while (j > 0 || padding > 0);
681
682  // Terminate the output with a NUL character.
683  *ptr = '\000';
684
685  // Conversion to ASCII actually resulted in the digits being in reverse
686  // order. We can't easily generate them in forward order, as we can't tell
687  // the number of characters needed until we are done converting.
688  // So, now, we reverse the string (except for the possible "-" sign).
689  while (--ptr > start) {
690    char ch = *ptr;
691    *ptr = *start;
692    *start++ = ch;
693  }
694  return buf;
695}
696
697// Safely appends string |source| to string |dest|.  Never writes past the
698// buffer size |dest_size| and guarantees that |dest| is null-terminated.
699void SafeAppendString(const char* source, char* dest, int dest_size) {
700  int dest_string_length = strlen(dest);
701  SAFE_ASSERT(dest_string_length < dest_size);
702  dest += dest_string_length;
703  dest_size -= dest_string_length;
704  strncpy(dest, source, dest_size);
705  // Making sure |dest| is always null-terminated.
706  dest[dest_size - 1] = '\0';
707}
708
709// Converts a 64-bit value into a hex string, and safely appends it to |dest|.
710// Never writes past the buffer size |dest_size| and guarantees that |dest| is
711// null-terminated.
712void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) {
713  // 64-bit numbers in hex can have up to 16 digits.
714  char buf[17] = {'\0'};
715  SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size);
716}
717
718// The implementation of our symbolization routine.  If it
719// successfully finds the symbol containing "pc" and obtains the
720// symbol name, returns true and write the symbol name to "out".
721// Otherwise, returns false. If Callback function is installed via
722// InstallSymbolizeCallback(), the function is also called in this function,
723// and "out" is used as its output.
724// To keep stack consumption low, we would like this function to not
725// get inlined.
726static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
727                                                    int out_size) {
728  uint64_t pc0 = reinterpret_cast<uintptr_t>(pc);
729  uint64_t start_address = 0;
730  uint64_t base_address = 0;
731  int object_fd = -1;
732
733  if (out_size < 1) {
734    return false;
735  }
736  out[0] = '\0';
737  SafeAppendString("(", out, out_size);
738
739  if (g_symbolize_open_object_file_callback) {
740    object_fd = g_symbolize_open_object_file_callback(pc0, start_address,
741                                                      base_address, out + 1,
742                                                      out_size - 1);
743  } else {
744    object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address,
745                                                             base_address,
746                                                             out + 1,
747                                                             out_size - 1);
748  }
749
750  // Check whether a file name was returned.
751  if (object_fd < 0) {
752    if (out[1]) {
753      // The object file containing PC was determined successfully however the
754      // object file was not opened successfully.  This is still considered
755      // success because the object file name and offset are known and tools
756      // like asan_symbolize.py can be used for the symbolization.
757      out[out_size - 1] = '\0';  // Making sure |out| is always null-terminated.
758      SafeAppendString("+0x", out, out_size);
759      SafeAppendHexNumber(pc0 - base_address, out, out_size);
760      SafeAppendString(")", out, out_size);
761      return true;
762    }
763    // Failed to determine the object file containing PC.  Bail out.
764    return false;
765  }
766  FileDescriptor wrapped_object_fd(object_fd);
767  int elf_type = FileGetElfType(wrapped_object_fd.get());
768  if (elf_type == -1) {
769    return false;
770  }
771  if (g_symbolize_callback) {
772    // Run the call back if it's installed.
773    // Note: relocation (and much of the rest of this code) will be
774    // wrong for prelinked shared libraries and PIE executables.
775    uint64 relocation = (elf_type == ET_DYN) ? start_address : 0;
776    int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(),
777                                                 pc, out, out_size,
778                                                 relocation);
779    if (num_bytes_written > 0) {
780      out += num_bytes_written;
781      out_size -= num_bytes_written;
782    }
783  }
784  if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0,
785                               out, out_size, start_address)) {
786    return false;
787  }
788
789  // Symbolization succeeded.  Now we try to demangle the symbol.
790  DemangleInplace(out, out_size);
791  return true;
792}
793
794_END_GOOGLE_NAMESPACE_
795
796#elif defined(OS_MACOSX) && defined(HAVE_DLADDR)
797
798#include <dlfcn.h>
799#include <string.h>
800
801_START_GOOGLE_NAMESPACE_
802
803static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
804                                                    int out_size) {
805  Dl_info info;
806  if (dladdr(pc, &info)) {
807    if ((int)strlen(info.dli_sname) < out_size) {
808      strcpy(out, info.dli_sname);
809      // Symbolization succeeded.  Now we try to demangle the symbol.
810      DemangleInplace(out, out_size);
811      return true;
812    }
813  }
814  return false;
815}
816
817_END_GOOGLE_NAMESPACE_
818
819#else
820# error BUG: HAVE_SYMBOLIZE was wrongly set
821#endif
822
823_START_GOOGLE_NAMESPACE_
824
825bool Symbolize(void *pc, char *out, int out_size) {
826  SAFE_ASSERT(out_size >= 0);
827  return SymbolizeAndDemangle(pc, out, out_size);
828}
829
830_END_GOOGLE_NAMESPACE_
831
832#else  /* HAVE_SYMBOLIZE */
833
834#include <assert.h>
835
836#include "config.h"
837
838_START_GOOGLE_NAMESPACE_
839
840// TODO: Support other environments.
841bool Symbolize(void *pc, char *out, int out_size) {
842  assert(0);
843  return false;
844}
845
846_END_GOOGLE_NAMESPACE_
847
848#endif
849