sanitizer_symbolizer_libcdep.cc revision 7ef67083ecf12c9b83dffd91eac0b3c739fa7a89
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is shared between AddressSanitizer and ThreadSanitizer
11// run-time libraries. See sanitizer_symbolizer.h for details.
12//===----------------------------------------------------------------------===//
13
14#include "sanitizer_allocator_internal.h"
15#include "sanitizer_common.h"
16#include "sanitizer_placement_new.h"
17#include "sanitizer_procmaps.h"
18#include "sanitizer_symbolizer.h"
19
20namespace __sanitizer {
21
22void AddressInfo::Clear() {
23  InternalFree(module);
24  InternalFree(function);
25  InternalFree(file);
26  internal_memset(this, 0, sizeof(AddressInfo));
27}
28
29LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
30  full_name_ = internal_strdup(module_name);
31  base_address_ = base_address;
32  n_ranges_ = 0;
33}
34
35void LoadedModule::addAddressRange(uptr beg, uptr end) {
36  CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
37  ranges_[n_ranges_].beg = beg;
38  ranges_[n_ranges_].end = end;
39  n_ranges_++;
40}
41
42bool LoadedModule::containsAddress(uptr address) const {
43  for (uptr i = 0; i < n_ranges_; i++) {
44    if (ranges_[i].beg <= address && address < ranges_[i].end)
45      return true;
46  }
47  return false;
48}
49
50// Extracts the prefix of "str" that consists of any characters not
51// present in "delims" string, and copies this prefix to "result", allocating
52// space for it.
53// Returns a pointer to "str" after skipping extracted prefix and first
54// delimiter char.
55static const char *ExtractToken(const char *str, const char *delims,
56                                char **result) {
57  uptr prefix_len = internal_strcspn(str, delims);
58  *result = (char*)InternalAlloc(prefix_len + 1);
59  internal_memcpy(*result, str, prefix_len);
60  (*result)[prefix_len] = '\0';
61  const char *prefix_end = str + prefix_len;
62  if (*prefix_end != '\0') prefix_end++;
63  return prefix_end;
64}
65
66// Same as ExtractToken, but converts extracted token to integer.
67static const char *ExtractInt(const char *str, const char *delims,
68                              int *result) {
69  char *buff;
70  const char *ret = ExtractToken(str, delims, &buff);
71  if (buff != 0) {
72    *result = (int)internal_atoll(buff);
73  }
74  InternalFree(buff);
75  return ret;
76}
77
78static const char *ExtractUptr(const char *str, const char *delims,
79                               uptr *result) {
80  char *buff;
81  const char *ret = ExtractToken(str, delims, &buff);
82  if (buff != 0) {
83    *result = (uptr)internal_atoll(buff);
84  }
85  InternalFree(buff);
86  return ret;
87}
88
89// ExternalSymbolizer encapsulates communication between the tool and
90// external symbolizer program, running in a different subprocess,
91// For now we assume the following protocol:
92// For each request of the form
93//   <module_name> <module_offset>
94// passed to STDIN, external symbolizer prints to STDOUT response:
95//   <function_name>
96//   <file_name>:<line_number>:<column_number>
97//   <function_name>
98//   <file_name>:<line_number>:<column_number>
99//   ...
100//   <empty line>
101class ExternalSymbolizer {
102 public:
103  ExternalSymbolizer(const char *path, int input_fd, int output_fd)
104      : path_(path),
105        input_fd_(input_fd),
106        output_fd_(output_fd),
107        times_restarted_(0) {
108    CHECK(path_);
109    CHECK_NE(input_fd_, kInvalidFd);
110    CHECK_NE(output_fd_, kInvalidFd);
111  }
112
113  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
114    CHECK(module_name);
115    internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
116                      is_data ? "DATA " : "", module_name, module_offset);
117    if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
118      return 0;
119    if (!readFromSymbolizer(buffer_, kBufferSize))
120      return 0;
121    return buffer_;
122  }
123
124  bool Restart() {
125    if (times_restarted_ >= kMaxTimesRestarted) return false;
126    times_restarted_++;
127    internal_close(input_fd_);
128    internal_close(output_fd_);
129    return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
130  }
131
132  void Flush() {
133  }
134
135 private:
136  bool readFromSymbolizer(char *buffer, uptr max_length) {
137    if (max_length == 0)
138      return true;
139    uptr read_len = 0;
140    while (true) {
141      uptr just_read = internal_read(input_fd_, buffer + read_len,
142                                     max_length - read_len);
143      // We can't read 0 bytes, as we don't expect external symbolizer to close
144      // its stdout.
145      if (just_read == 0 || just_read == (uptr)-1) {
146        Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
147        return false;
148      }
149      read_len += just_read;
150      // Empty line marks the end of symbolizer output.
151      if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
152                           buffer[read_len - 2] == '\n') {
153        break;
154      }
155    }
156    return true;
157  }
158
159  bool writeToSymbolizer(const char *buffer, uptr length) {
160    if (length == 0)
161      return true;
162    uptr write_len = internal_write(output_fd_, buffer, length);
163    if (write_len == 0 || write_len == (uptr)-1) {
164      Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
165      return false;
166    }
167    return true;
168  }
169
170  const char *path_;
171  int input_fd_;
172  int output_fd_;
173
174  static const uptr kBufferSize = 16 * 1024;
175  char buffer_[kBufferSize];
176
177  static const uptr kMaxTimesRestarted = 5;
178  uptr times_restarted_;
179};
180
181static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
182
183#if SANITIZER_SUPPORTS_WEAK_HOOKS
184extern "C" {
185SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
186bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
187                                char *Buffer, int MaxLength);
188SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
189bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
190                                char *Buffer, int MaxLength);
191SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
192void __sanitizer_symbolize_flush();
193SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
194int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
195                                   int MaxLength);
196}  // extern "C"
197
198class InternalSymbolizer {
199 public:
200  typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
201
202  static InternalSymbolizer *get() {
203    if (__sanitizer_symbolize_code != 0 &&
204        __sanitizer_symbolize_data != 0) {
205      void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
206      return new(mem) InternalSymbolizer();
207    }
208    return 0;
209  }
210
211  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
212    SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
213                                                : __sanitizer_symbolize_code;
214    if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
215      return buffer_;
216    return 0;
217  }
218
219  void Flush() {
220    if (__sanitizer_symbolize_flush)
221      __sanitizer_symbolize_flush();
222  }
223
224  const char *Demangle(const char *name) {
225    if (__sanitizer_symbolize_demangle) {
226      for (uptr res_length = 1024;
227           res_length <= InternalSizeClassMap::kMaxSize;) {
228        char *res_buff = static_cast<char*>(InternalAlloc(res_length));
229        uptr req_length =
230            __sanitizer_symbolize_demangle(name, res_buff, res_length);
231        if (req_length > res_length) {
232          res_length = req_length + 1;
233          InternalFree(res_buff);
234          continue;
235        }
236        return res_buff;
237      }
238    }
239    return name;
240  }
241
242 private:
243  InternalSymbolizer() { }
244
245  static const int kBufferSize = 16 * 1024;
246  static const int kMaxDemangledNameSize = 1024;
247  char buffer_[kBufferSize];
248};
249#else  // SANITIZER_SUPPORTS_WEAK_HOOKS
250
251class InternalSymbolizer {
252 public:
253  static InternalSymbolizer *get() { return 0; }
254  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
255    return 0;
256  }
257  void Flush() { }
258  const char *Demangle(const char *name) { return name; }
259};
260
261#endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
262
263class Symbolizer {
264  // This class has no constructor, as global constructors are forbidden in
265  // sanitizer_common. It should be linker initialized instead.
266 public:
267  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
268    if (max_frames == 0)
269      return 0;
270    LoadedModule *module = FindModuleForAddress(addr);
271    if (module == 0)
272      return 0;
273    const char *module_name = module->full_name();
274    uptr module_offset = addr - module->base_address();
275    const char *str = SendCommand(false, module_name, module_offset);
276    if (str == 0) {
277      // External symbolizer was not initialized or failed. Fill only data
278      // about module name and offset.
279      AddressInfo *info = &frames[0];
280      info->Clear();
281      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
282      return 1;
283    }
284    uptr frame_id = 0;
285    for (frame_id = 0; frame_id < max_frames; frame_id++) {
286      AddressInfo *info = &frames[frame_id];
287      char *function_name = 0;
288      str = ExtractToken(str, "\n", &function_name);
289      CHECK(function_name);
290      if (function_name[0] == '\0') {
291        // There are no more frames.
292        break;
293      }
294      info->Clear();
295      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
296      info->function = function_name;
297      // Parse <file>:<line>:<column> buffer.
298      char *file_line_info = 0;
299      str = ExtractToken(str, "\n", &file_line_info);
300      CHECK(file_line_info);
301      const char *line_info = ExtractToken(file_line_info, ":", &info->file);
302      line_info = ExtractInt(line_info, ":", &info->line);
303      line_info = ExtractInt(line_info, "", &info->column);
304      InternalFree(file_line_info);
305
306      // Functions and filenames can be "??", in which case we write 0
307      // to address info to mark that names are unknown.
308      if (0 == internal_strcmp(info->function, "??")) {
309        InternalFree(info->function);
310        info->function = 0;
311      }
312      if (0 == internal_strcmp(info->file, "??")) {
313        InternalFree(info->file);
314        info->file = 0;
315      }
316    }
317    if (frame_id == 0) {
318      // Make sure we return at least one frame.
319      AddressInfo *info = &frames[0];
320      info->Clear();
321      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
322      frame_id = 1;
323    }
324    return frame_id;
325  }
326
327  bool SymbolizeData(uptr addr, DataInfo *info) {
328    LoadedModule *module = FindModuleForAddress(addr);
329    if (module == 0)
330      return false;
331    const char *module_name = module->full_name();
332    uptr module_offset = addr - module->base_address();
333    internal_memset(info, 0, sizeof(*info));
334    info->address = addr;
335    info->module = internal_strdup(module_name);
336    info->module_offset = module_offset;
337    const char *str = SendCommand(true, module_name, module_offset);
338    if (str == 0)
339      return true;
340    str = ExtractToken(str, "\n", &info->name);
341    str = ExtractUptr(str, " ", &info->start);
342    str = ExtractUptr(str, "\n", &info->size);
343    info->start += module->base_address();
344    return true;
345  }
346
347  bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
348    int input_fd, output_fd;
349    if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
350      return false;
351    void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
352    external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
353                                                       input_fd, output_fd);
354    return true;
355  }
356
357  bool IsSymbolizerAvailable() {
358    if (internal_symbolizer_ == 0)
359      internal_symbolizer_ = InternalSymbolizer::get();
360    return internal_symbolizer_ || external_symbolizer_;
361  }
362
363  void Flush() {
364    if (internal_symbolizer_)
365      internal_symbolizer_->Flush();
366    if (external_symbolizer_)
367      external_symbolizer_->Flush();
368  }
369
370  const char *Demangle(const char *name) {
371    if (IsSymbolizerAvailable() && internal_symbolizer_ != 0)
372      return internal_symbolizer_->Demangle(name);
373    return DemangleCXXABI(name);
374  }
375
376 private:
377  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
378    // First, try to use internal symbolizer.
379    if (!IsSymbolizerAvailable()) {
380      return 0;
381    }
382    if (internal_symbolizer_) {
383      return internal_symbolizer_->SendCommand(is_data, module_name,
384                                               module_offset);
385    }
386    // Otherwise, fall back to external symbolizer.
387    if (external_symbolizer_ == 0) {
388      ReportExternalSymbolizerError(
389          "WARNING: Trying to symbolize code, but external "
390          "symbolizer is not initialized!\n");
391      return 0;
392    }
393    for (;;) {
394      char *reply = external_symbolizer_->SendCommand(is_data, module_name,
395          module_offset);
396      if (reply)
397        return reply;
398      // Try to restart symbolizer subprocess. If we don't succeed, forget
399      // about it and don't try to use it later.
400      if (!external_symbolizer_->Restart()) {
401        ReportExternalSymbolizerError(
402            "WARNING: Failed to use and restart external symbolizer!\n");
403        external_symbolizer_ = 0;
404        return 0;
405      }
406    }
407  }
408
409  LoadedModule *FindModuleForAddress(uptr address) {
410    bool modules_were_reloaded = false;
411    if (modules_ == 0 || !modules_fresh_) {
412      modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
413          kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
414      CHECK(modules_);
415      n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
416                                    /* filter */ 0);
417      // FIXME: Return this check when GetListOfModules is implemented on Mac.
418      // CHECK_GT(n_modules_, 0);
419      CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
420      modules_fresh_ = true;
421      modules_were_reloaded = true;
422    }
423    for (uptr i = 0; i < n_modules_; i++) {
424      if (modules_[i].containsAddress(address)) {
425        return &modules_[i];
426      }
427    }
428    // Reload the modules and look up again, if we haven't tried it yet.
429    if (!modules_were_reloaded) {
430      // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
431      // It's too aggressive to reload the list of modules each time we fail
432      // to find a module for a given address.
433      modules_fresh_ = false;
434      return FindModuleForAddress(address);
435    }
436    return 0;
437  }
438
439  void ReportExternalSymbolizerError(const char *msg) {
440    // Don't use atomics here for now, as SymbolizeCode can't be called
441    // from multiple threads anyway.
442    static bool reported;
443    if (!reported) {
444      Report(msg);
445      reported = true;
446    }
447  }
448
449  // 16K loaded modules should be enough for everyone.
450  static const uptr kMaxNumberOfModuleContexts = 1 << 14;
451  LoadedModule *modules_;  // Array of module descriptions is leaked.
452  uptr n_modules_;
453  // If stale, need to reload the modules before looking up addresses.
454  bool modules_fresh_;
455
456  ExternalSymbolizer *external_symbolizer_;  // Leaked.
457  InternalSymbolizer *internal_symbolizer_;  // Leaked.
458};
459
460static Symbolizer symbolizer;  // Linker initialized.
461
462uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
463  return symbolizer.SymbolizeCode(address, frames, max_frames);
464}
465
466bool SymbolizeData(uptr address, DataInfo *info) {
467  return symbolizer.SymbolizeData(address, info);
468}
469
470bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
471  return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
472}
473
474bool IsSymbolizerAvailable() {
475  return symbolizer.IsSymbolizerAvailable();
476}
477
478void FlushSymbolizer() {
479  symbolizer.Flush();
480}
481
482const char *Demangle(const char *name) {
483  return symbolizer.Demangle(name);
484}
485
486}  // namespace __sanitizer
487