sanitizer_symbolizer_posix_libcdep.cc revision 66d91e3356a0c4d7aff3beaaaff3e87bbaec805c
1//===-- sanitizer_symbolizer_posix_libcdep.cc -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is shared between AddressSanitizer and ThreadSanitizer
11// run-time libraries.
12// POSIX-specific implementation of symbolizer parts.
13//===----------------------------------------------------------------------===//
14
15#include "sanitizer_platform.h"
16#if SANITIZER_POSIX
17#include "sanitizer_allocator_internal.h"
18#include "sanitizer_common.h"
19#include "sanitizer_internal_defs.h"
20#include "sanitizer_linux.h"
21#include "sanitizer_placement_new.h"
22#include "sanitizer_procmaps.h"
23#include "sanitizer_symbolizer.h"
24
25#include <errno.h>
26#include <stdlib.h>
27#include <sys/wait.h>
28#include <unistd.h>
29
30// C++ demangling function, as required by Itanium C++ ABI. This is weak,
31// because we do not require a C++ ABI library to be linked to a program
32// using sanitizers; if it's not present, we'll just use the mangled name.
33namespace __cxxabiv1 {
34  extern "C" SANITIZER_WEAK_ATTRIBUTE
35  char *__cxa_demangle(const char *mangled, char *buffer,
36                                  size_t *length, int *status);
37}
38
39namespace __sanitizer {
40
41// Attempts to demangle the name via __cxa_demangle from __cxxabiv1.
42static const char *DemangleCXXABI(const char *name) {
43  // FIXME: __cxa_demangle aggressively insists on allocating memory.
44  // There's not much we can do about that, short of providing our
45  // own demangler (libc++abi's implementation could be adapted so that
46  // it does not allocate). For now, we just call it anyway, and we leak
47  // the returned value.
48  if (__cxxabiv1::__cxa_demangle)
49    if (const char *demangled_name =
50          __cxxabiv1::__cxa_demangle(name, 0, 0, 0))
51      return demangled_name;
52
53  return name;
54}
55
56#if defined(__x86_64__)
57static const char* const kSymbolizerArch = "--default-arch=x86_64";
58#elif defined(__i386__)
59static const char* const kSymbolizerArch = "--default-arch=i386";
60#elif defined(__powerpc64__)
61static const char* const kSymbolizerArch = "--default-arch=powerpc64";
62#else
63static const char* const kSymbolizerArch = "--default-arch=unknown";
64#endif
65
66static const int kSymbolizerStartupTimeMillis = 10;
67
68// Creates external symbolizer connected via pipe, user should write
69// to output_fd and read from input_fd.
70static bool StartSymbolizerSubprocess(const char *path_to_symbolizer,
71                                      int *input_fd, int *output_fd) {
72  if (!FileExists(path_to_symbolizer)) {
73    Report("WARNING: invalid path to external symbolizer!\n");
74    return false;
75  }
76
77  int *infd = NULL;
78  int *outfd = NULL;
79  // The client program may close its stdin and/or stdout and/or stderr
80  // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
81  // In this case the communication between the forked processes may be
82  // broken if either the parent or the child tries to close or duplicate
83  // these descriptors. The loop below produces two pairs of file
84  // descriptors, each greater than 2 (stderr).
85  int sock_pair[5][2];
86  for (int i = 0; i < 5; i++) {
87    if (pipe(sock_pair[i]) == -1) {
88      for (int j = 0; j < i; j++) {
89        internal_close(sock_pair[j][0]);
90        internal_close(sock_pair[j][1]);
91      }
92      Report("WARNING: Can't create a socket pair to start "
93             "external symbolizer (errno: %d)\n", errno);
94      return false;
95    } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) {
96      if (infd == NULL) {
97        infd = sock_pair[i];
98      } else {
99        outfd = sock_pair[i];
100        for (int j = 0; j < i; j++) {
101          if (sock_pair[j] == infd) continue;
102          internal_close(sock_pair[j][0]);
103          internal_close(sock_pair[j][1]);
104        }
105        break;
106      }
107    }
108  }
109  CHECK(infd);
110  CHECK(outfd);
111
112  int pid = fork();
113  if (pid == -1) {
114    // Fork() failed.
115    internal_close(infd[0]);
116    internal_close(infd[1]);
117    internal_close(outfd[0]);
118    internal_close(outfd[1]);
119    Report("WARNING: failed to fork external symbolizer "
120           " (errno: %d)\n", errno);
121    return false;
122  } else if (pid == 0) {
123    // Child subprocess.
124    internal_close(STDOUT_FILENO);
125    internal_close(STDIN_FILENO);
126    internal_dup2(outfd[0], STDIN_FILENO);
127    internal_dup2(infd[1], STDOUT_FILENO);
128    internal_close(outfd[0]);
129    internal_close(outfd[1]);
130    internal_close(infd[0]);
131    internal_close(infd[1]);
132    for (int fd = getdtablesize(); fd > 2; fd--)
133      internal_close(fd);
134    execl(path_to_symbolizer, path_to_symbolizer, kSymbolizerArch, (char*)0);
135    internal__exit(1);
136  }
137
138  // Continue execution in parent process.
139  internal_close(outfd[0]);
140  internal_close(infd[1]);
141  *input_fd = infd[0];
142  *output_fd = outfd[1];
143
144  // Check that symbolizer subprocess started successfully.
145  int pid_status;
146  SleepForMillis(kSymbolizerStartupTimeMillis);
147  int exited_pid = waitpid(pid, &pid_status, WNOHANG);
148  if (exited_pid != 0) {
149    // Either waitpid failed, or child has already exited.
150    Report("WARNING: external symbolizer didn't start up correctly!\n");
151    return false;
152  }
153
154  return true;
155}
156
157// Extracts the prefix of "str" that consists of any characters not
158// present in "delims" string, and copies this prefix to "result", allocating
159// space for it.
160// Returns a pointer to "str" after skipping extracted prefix and first
161// delimiter char.
162static const char *ExtractToken(const char *str, const char *delims,
163                                char **result) {
164  uptr prefix_len = internal_strcspn(str, delims);
165  *result = (char*)InternalAlloc(prefix_len + 1);
166  internal_memcpy(*result, str, prefix_len);
167  (*result)[prefix_len] = '\0';
168  const char *prefix_end = str + prefix_len;
169  if (*prefix_end != '\0') prefix_end++;
170  return prefix_end;
171}
172
173// Same as ExtractToken, but converts extracted token to integer.
174static const char *ExtractInt(const char *str, const char *delims,
175                              int *result) {
176  char *buff;
177  const char *ret = ExtractToken(str, delims, &buff);
178  if (buff != 0) {
179    *result = (int)internal_atoll(buff);
180  }
181  InternalFree(buff);
182  return ret;
183}
184
185static const char *ExtractUptr(const char *str, const char *delims,
186                               uptr *result) {
187  char *buff;
188  const char *ret = ExtractToken(str, delims, &buff);
189  if (buff != 0) {
190    *result = (uptr)internal_atoll(buff);
191  }
192  InternalFree(buff);
193  return ret;
194}
195
196// ExternalSymbolizer encapsulates communication between the tool and
197// external symbolizer program, running in a different subprocess,
198// For now we assume the following protocol:
199// For each request of the form
200//   <module_name> <module_offset>
201// passed to STDIN, external symbolizer prints to STDOUT response:
202//   <function_name>
203//   <file_name>:<line_number>:<column_number>
204//   <function_name>
205//   <file_name>:<line_number>:<column_number>
206//   ...
207//   <empty line>
208class ExternalSymbolizer {
209 public:
210  ExternalSymbolizer(const char *path, int input_fd, int output_fd)
211      : path_(path),
212        input_fd_(input_fd),
213        output_fd_(output_fd),
214        times_restarted_(0) {
215    CHECK(path_);
216    CHECK_NE(input_fd_, kInvalidFd);
217    CHECK_NE(output_fd_, kInvalidFd);
218  }
219
220  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
221    CHECK(module_name);
222    internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
223                      is_data ? "DATA " : "", module_name, module_offset);
224    if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
225      return 0;
226    if (!readFromSymbolizer(buffer_, kBufferSize))
227      return 0;
228    return buffer_;
229  }
230
231  bool Restart() {
232    if (times_restarted_ >= kMaxTimesRestarted) return false;
233    times_restarted_++;
234    internal_close(input_fd_);
235    internal_close(output_fd_);
236    return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
237  }
238
239  void Flush() {
240  }
241
242 private:
243  bool readFromSymbolizer(char *buffer, uptr max_length) {
244    if (max_length == 0)
245      return true;
246    uptr read_len = 0;
247    while (true) {
248      uptr just_read = internal_read(input_fd_, buffer + read_len,
249                                     max_length - read_len);
250      // We can't read 0 bytes, as we don't expect external symbolizer to close
251      // its stdout.
252      if (just_read == 0 || just_read == (uptr)-1) {
253        Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
254        return false;
255      }
256      read_len += just_read;
257      // Empty line marks the end of symbolizer output.
258      if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
259                           buffer[read_len - 2] == '\n') {
260        break;
261      }
262    }
263    return true;
264  }
265
266  bool writeToSymbolizer(const char *buffer, uptr length) {
267    if (length == 0)
268      return true;
269    uptr write_len = internal_write(output_fd_, buffer, length);
270    if (write_len == 0 || write_len == (uptr)-1) {
271      Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
272      return false;
273    }
274    return true;
275  }
276
277  const char *path_;
278  int input_fd_;
279  int output_fd_;
280
281  static const uptr kBufferSize = 16 * 1024;
282  char buffer_[kBufferSize];
283
284  static const uptr kMaxTimesRestarted = 5;
285  uptr times_restarted_;
286};
287
288#if SANITIZER_SUPPORTS_WEAK_HOOKS
289extern "C" {
290SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
291bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
292                                char *Buffer, int MaxLength);
293SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
294bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
295                                char *Buffer, int MaxLength);
296SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
297void __sanitizer_symbolize_flush();
298SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
299int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
300                                   int MaxLength);
301}  // extern "C"
302
303class InternalSymbolizer {
304 public:
305  typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
306
307  static InternalSymbolizer *get(LowLevelAllocator *alloc) {
308    if (__sanitizer_symbolize_code != 0 &&
309        __sanitizer_symbolize_data != 0) {
310      return new(*alloc) InternalSymbolizer();
311    }
312    return 0;
313  }
314
315  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
316    SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
317                                                : __sanitizer_symbolize_code;
318    if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
319      return buffer_;
320    return 0;
321  }
322
323  void Flush() {
324    if (__sanitizer_symbolize_flush)
325      __sanitizer_symbolize_flush();
326  }
327
328  const char *Demangle(const char *name) {
329    if (__sanitizer_symbolize_demangle) {
330      for (uptr res_length = 1024;
331           res_length <= InternalSizeClassMap::kMaxSize;) {
332        char *res_buff = static_cast<char*>(InternalAlloc(res_length));
333        uptr req_length =
334            __sanitizer_symbolize_demangle(name, res_buff, res_length);
335        if (req_length > res_length) {
336          res_length = req_length + 1;
337          InternalFree(res_buff);
338          continue;
339        }
340        return res_buff;
341      }
342    }
343    return name;
344  }
345
346 private:
347  InternalSymbolizer() { }
348
349  static const int kBufferSize = 16 * 1024;
350  static const int kMaxDemangledNameSize = 1024;
351  char buffer_[kBufferSize];
352};
353#else  // SANITIZER_SUPPORTS_WEAK_HOOKS
354
355class InternalSymbolizer {
356 public:
357  static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; }
358  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
359    return 0;
360  }
361  void Flush() { }
362  const char *Demangle(const char *name) { return name; }
363};
364
365#endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
366
367class POSIXSymbolizer : public Symbolizer {
368 public:
369  POSIXSymbolizer(ExternalSymbolizer *external_symbolizer,
370                  InternalSymbolizer *internal_symbolizer)
371      : Symbolizer(),
372        external_symbolizer_(external_symbolizer),
373        internal_symbolizer_(internal_symbolizer) {}
374
375  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
376    BlockingMutexLock l(&mu_);
377    if (max_frames == 0)
378      return 0;
379    LoadedModule *module = FindModuleForAddress(addr);
380    if (module == 0)
381      return 0;
382    const char *module_name = module->full_name();
383    uptr module_offset = addr - module->base_address();
384    const char *str = SendCommand(false, module_name, module_offset);
385    if (str == 0) {
386      // External symbolizer was not initialized or failed. Fill only data
387      // about module name and offset.
388      AddressInfo *info = &frames[0];
389      info->Clear();
390      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
391      return 1;
392    }
393    uptr frame_id = 0;
394    for (frame_id = 0; frame_id < max_frames; frame_id++) {
395      AddressInfo *info = &frames[frame_id];
396      char *function_name = 0;
397      str = ExtractToken(str, "\n", &function_name);
398      CHECK(function_name);
399      if (function_name[0] == '\0') {
400        // There are no more frames.
401        break;
402      }
403      info->Clear();
404      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
405      info->function = function_name;
406      // Parse <file>:<line>:<column> buffer.
407      char *file_line_info = 0;
408      str = ExtractToken(str, "\n", &file_line_info);
409      CHECK(file_line_info);
410      const char *line_info = ExtractToken(file_line_info, ":", &info->file);
411      line_info = ExtractInt(line_info, ":", &info->line);
412      line_info = ExtractInt(line_info, "", &info->column);
413      InternalFree(file_line_info);
414
415      // Functions and filenames can be "??", in which case we write 0
416      // to address info to mark that names are unknown.
417      if (0 == internal_strcmp(info->function, "??")) {
418        InternalFree(info->function);
419        info->function = 0;
420      }
421      if (0 == internal_strcmp(info->file, "??")) {
422        InternalFree(info->file);
423        info->file = 0;
424      }
425    }
426    if (frame_id == 0) {
427      // Make sure we return at least one frame.
428      AddressInfo *info = &frames[0];
429      info->Clear();
430      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
431      frame_id = 1;
432    }
433    return frame_id;
434  }
435
436  bool SymbolizeData(uptr addr, DataInfo *info) {
437    BlockingMutexLock l(&mu_);
438    LoadedModule *module = FindModuleForAddress(addr);
439    if (module == 0)
440      return false;
441    const char *module_name = module->full_name();
442    uptr module_offset = addr - module->base_address();
443    internal_memset(info, 0, sizeof(*info));
444    info->address = addr;
445    info->module = internal_strdup(module_name);
446    info->module_offset = module_offset;
447    const char *str = SendCommand(true, module_name, module_offset);
448    if (str == 0)
449      return true;
450    str = ExtractToken(str, "\n", &info->name);
451    str = ExtractUptr(str, " ", &info->start);
452    str = ExtractUptr(str, "\n", &info->size);
453    info->start += module->base_address();
454    return true;
455  }
456
457  bool IsAvailable() {
458    return internal_symbolizer_ != 0 || external_symbolizer_ != 0;
459  }
460
461  bool IsExternalAvailable() {
462    return external_symbolizer_ != 0;
463  }
464
465  void Flush() {
466    BlockingMutexLock l(&mu_);
467    if (internal_symbolizer_ != 0) {
468      SymbolizerScope sym_scope(this);
469      internal_symbolizer_->Flush();
470    }
471    if (external_symbolizer_ != 0)
472      external_symbolizer_->Flush();
473  }
474
475  const char *Demangle(const char *name) {
476    BlockingMutexLock l(&mu_);
477    // Run hooks even if we don't use internal symbolizer, as cxxabi
478    // demangle may call system functions.
479    SymbolizerScope sym_scope(this);
480    if (internal_symbolizer_ != 0)
481      return internal_symbolizer_->Demangle(name);
482    return DemangleCXXABI(name);
483  }
484
485  void PrepareForSandboxing() {
486#if SANITIZER_LINUX && !SANITIZER_ANDROID
487    BlockingMutexLock l(&mu_);
488    // Cache /proc/self/exe on Linux.
489    CacheBinaryName();
490#endif
491  }
492
493 private:
494  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
495    mu_.CheckLocked();
496    // First, try to use internal symbolizer.
497    if (internal_symbolizer_) {
498      SymbolizerScope sym_scope(this);
499      return internal_symbolizer_->SendCommand(is_data, module_name,
500                                               module_offset);
501    }
502    // Otherwise, fall back to external symbolizer.
503    if (external_symbolizer_ == 0) {
504      ReportExternalSymbolizerError(
505          "WARNING: Trying to symbolize code, but external "
506          "symbolizer is not initialized!\n");
507      return 0;
508    }
509    for (;;) {
510      char *reply = external_symbolizer_->SendCommand(is_data, module_name,
511          module_offset);
512      if (reply)
513        return reply;
514      // Try to restart symbolizer subprocess. If we don't succeed, forget
515      // about it and don't try to use it later.
516      if (!external_symbolizer_->Restart()) {
517        ReportExternalSymbolizerError(
518            "WARNING: Failed to use and restart external symbolizer!\n");
519        external_symbolizer_ = 0;
520        return 0;
521      }
522    }
523  }
524
525  LoadedModule *FindModuleForAddress(uptr address) {
526    mu_.CheckLocked();
527    bool modules_were_reloaded = false;
528    if (modules_ == 0 || !modules_fresh_) {
529      modules_ = (LoadedModule*)(symbolizer_allocator_.Allocate(
530          kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
531      CHECK(modules_);
532      n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
533                                    /* filter */ 0);
534      // FIXME: Return this check when GetListOfModules is implemented on Mac.
535      // CHECK_GT(n_modules_, 0);
536      CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
537      modules_fresh_ = true;
538      modules_were_reloaded = true;
539    }
540    for (uptr i = 0; i < n_modules_; i++) {
541      if (modules_[i].containsAddress(address)) {
542        return &modules_[i];
543      }
544    }
545    // Reload the modules and look up again, if we haven't tried it yet.
546    if (!modules_were_reloaded) {
547      // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
548      // It's too aggressive to reload the list of modules each time we fail
549      // to find a module for a given address.
550      modules_fresh_ = false;
551      return FindModuleForAddress(address);
552    }
553    return 0;
554  }
555
556  void ReportExternalSymbolizerError(const char *msg) {
557    // Don't use atomics here for now, as SymbolizeCode can't be called
558    // from multiple threads anyway.
559    static bool reported;
560    if (!reported) {
561      Report(msg);
562      reported = true;
563    }
564  }
565
566  // 16K loaded modules should be enough for everyone.
567  static const uptr kMaxNumberOfModuleContexts = 1 << 14;
568  LoadedModule *modules_;  // Array of module descriptions is leaked.
569  uptr n_modules_;
570  // If stale, need to reload the modules before looking up addresses.
571  bool modules_fresh_;
572  BlockingMutex mu_;
573
574  ExternalSymbolizer *external_symbolizer_;        // Leaked.
575  InternalSymbolizer *const internal_symbolizer_;  // Leaked.
576};
577
578Symbolizer *Symbolizer::PlatformInit(const char *path_to_external) {
579  InternalSymbolizer* internal_symbolizer =
580      InternalSymbolizer::get(&symbolizer_allocator_);
581  ExternalSymbolizer *external_symbolizer = 0;
582
583  if (!internal_symbolizer) {
584    if (!path_to_external || path_to_external[0] == '\0')
585      path_to_external = FindPathToBinary("llvm-symbolizer");
586
587    int input_fd, output_fd;
588    if (path_to_external &&
589        StartSymbolizerSubprocess(path_to_external, &input_fd, &output_fd)) {
590      external_symbolizer = new(symbolizer_allocator_)
591          ExternalSymbolizer(path_to_external, input_fd, output_fd);
592    }
593  }
594
595  return new(symbolizer_allocator_)
596      POSIXSymbolizer(external_symbolizer, internal_symbolizer);
597}
598
599}  // namespace __sanitizer
600
601#endif  // SANITIZER_POSIX
602