sanitizer_symbolizer_libcdep.cc revision 8b3af3a0e9426e9e0b2986b97360bd820ef05c7f
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is shared between AddressSanitizer and ThreadSanitizer
11// run-time libraries. See sanitizer_symbolizer.h for details.
12//===----------------------------------------------------------------------===//
13
14#include "sanitizer_common.h"
15#include "sanitizer_placement_new.h"
16#include "sanitizer_procmaps.h"
17#include "sanitizer_symbolizer.h"
18
19namespace __sanitizer {
20
21void AddressInfo::Clear() {
22  InternalFree(module);
23  InternalFree(function);
24  InternalFree(file);
25  internal_memset(this, 0, sizeof(AddressInfo));
26}
27
28LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
29  full_name_ = internal_strdup(module_name);
30  base_address_ = base_address;
31  n_ranges_ = 0;
32}
33
34void LoadedModule::addAddressRange(uptr beg, uptr end) {
35  CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
36  ranges_[n_ranges_].beg = beg;
37  ranges_[n_ranges_].end = end;
38  n_ranges_++;
39}
40
41bool LoadedModule::containsAddress(uptr address) const {
42  for (uptr i = 0; i < n_ranges_; i++) {
43    if (ranges_[i].beg <= address && address < ranges_[i].end)
44      return true;
45  }
46  return false;
47}
48
49// Extracts the prefix of "str" that consists of any characters not
50// present in "delims" string, and copies this prefix to "result", allocating
51// space for it.
52// Returns a pointer to "str" after skipping extracted prefix and first
53// delimiter char.
54static const char *ExtractToken(const char *str, const char *delims,
55                                char **result) {
56  uptr prefix_len = internal_strcspn(str, delims);
57  *result = (char*)InternalAlloc(prefix_len + 1);
58  internal_memcpy(*result, str, prefix_len);
59  (*result)[prefix_len] = '\0';
60  const char *prefix_end = str + prefix_len;
61  if (*prefix_end != '\0') prefix_end++;
62  return prefix_end;
63}
64
65// Same as ExtractToken, but converts extracted token to integer.
66static const char *ExtractInt(const char *str, const char *delims,
67                              int *result) {
68  char *buff;
69  const char *ret = ExtractToken(str, delims, &buff);
70  if (buff != 0) {
71    *result = (int)internal_atoll(buff);
72  }
73  InternalFree(buff);
74  return ret;
75}
76
77static const char *ExtractUptr(const char *str, const char *delims,
78                               uptr *result) {
79  char *buff;
80  const char *ret = ExtractToken(str, delims, &buff);
81  if (buff != 0) {
82    *result = (uptr)internal_atoll(buff);
83  }
84  InternalFree(buff);
85  return ret;
86}
87
88// ExternalSymbolizer encapsulates communication between the tool and
89// external symbolizer program, running in a different subprocess,
90// For now we assume the following protocol:
91// For each request of the form
92//   <module_name> <module_offset>
93// passed to STDIN, external symbolizer prints to STDOUT response:
94//   <function_name>
95//   <file_name>:<line_number>:<column_number>
96//   <function_name>
97//   <file_name>:<line_number>:<column_number>
98//   ...
99//   <empty line>
100class ExternalSymbolizer {
101 public:
102  ExternalSymbolizer(const char *path, int input_fd, int output_fd)
103      : path_(path),
104        input_fd_(input_fd),
105        output_fd_(output_fd),
106        times_restarted_(0) {
107    CHECK(path_);
108    CHECK_NE(input_fd_, kInvalidFd);
109    CHECK_NE(output_fd_, kInvalidFd);
110  }
111
112  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
113    CHECK(module_name);
114    internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
115                      is_data ? "DATA " : "", module_name, module_offset);
116    if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
117      return 0;
118    if (!readFromSymbolizer(buffer_, kBufferSize))
119      return 0;
120    return buffer_;
121  }
122
123  bool Restart() {
124    if (times_restarted_ >= kMaxTimesRestarted) return false;
125    times_restarted_++;
126    internal_close(input_fd_);
127    internal_close(output_fd_);
128    return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
129  }
130
131  void Flush() {
132  }
133
134 private:
135  bool readFromSymbolizer(char *buffer, uptr max_length) {
136    if (max_length == 0)
137      return true;
138    uptr read_len = 0;
139    while (true) {
140      uptr just_read = internal_read(input_fd_, buffer + read_len,
141                                     max_length - read_len);
142      // We can't read 0 bytes, as we don't expect external symbolizer to close
143      // its stdout.
144      if (just_read == 0 || just_read == (uptr)-1) {
145        Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
146        return false;
147      }
148      read_len += just_read;
149      // Empty line marks the end of symbolizer output.
150      if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
151                           buffer[read_len - 2] == '\n') {
152        break;
153      }
154    }
155    return true;
156  }
157
158  bool writeToSymbolizer(const char *buffer, uptr length) {
159    if (length == 0)
160      return true;
161    uptr write_len = internal_write(output_fd_, buffer, length);
162    if (write_len == 0 || write_len == (uptr)-1) {
163      Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
164      return false;
165    }
166    return true;
167  }
168
169  const char *path_;
170  int input_fd_;
171  int output_fd_;
172
173  static const uptr kBufferSize = 16 * 1024;
174  char buffer_[kBufferSize];
175
176  static const uptr kMaxTimesRestarted = 5;
177  uptr times_restarted_;
178};
179
180static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
181
182#if SANITIZER_SUPPORTS_WEAK_HOOKS
183extern "C" {
184SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
185bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
186                                char *Buffer, int MaxLength);
187SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
188bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
189                                char *Buffer, int MaxLength);
190SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
191void __sanitizer_symbolize_flush();
192}  // extern "C"
193
194class InternalSymbolizer {
195 public:
196  typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
197
198  static InternalSymbolizer *get() {
199    if (__sanitizer_symbolize_code != 0 &&
200        __sanitizer_symbolize_data != 0) {
201      void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
202      return new(mem) InternalSymbolizer();
203    }
204    return 0;
205  }
206
207  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
208    SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
209                                                : __sanitizer_symbolize_code;
210    if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
211      return buffer_;
212    return 0;
213  }
214
215  void Flush() {
216    if (__sanitizer_symbolize_flush)
217      __sanitizer_symbolize_flush();
218  }
219
220 private:
221  InternalSymbolizer() { }
222
223  static const int kBufferSize = 16 * 1024;
224  char buffer_[kBufferSize];
225};
226#else  // SANITIZER_SUPPORTS_WEAK_HOOKS
227
228class InternalSymbolizer {
229 public:
230  static InternalSymbolizer *get() { return 0; }
231  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
232    return 0;
233  }
234  void Flush() {
235  }
236};
237
238#endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
239
240class Symbolizer {
241  // This class has no constructor, as global constructors are forbidden in
242  // sanitizer_common. It should be linker initialized instead.
243 public:
244  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
245    if (max_frames == 0)
246      return 0;
247    LoadedModule *module = FindModuleForAddress(addr);
248    if (module == 0)
249      return 0;
250    const char *module_name = module->full_name();
251    uptr module_offset = addr - module->base_address();
252    const char *str = SendCommand(false, module_name, module_offset);
253    if (str == 0) {
254      // External symbolizer was not initialized or failed. Fill only data
255      // about module name and offset.
256      AddressInfo *info = &frames[0];
257      info->Clear();
258      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
259      return 1;
260    }
261    uptr frame_id = 0;
262    for (frame_id = 0; frame_id < max_frames; frame_id++) {
263      AddressInfo *info = &frames[frame_id];
264      char *function_name = 0;
265      str = ExtractToken(str, "\n", &function_name);
266      CHECK(function_name);
267      if (function_name[0] == '\0') {
268        // There are no more frames.
269        break;
270      }
271      info->Clear();
272      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
273      info->function = function_name;
274      // Parse <file>:<line>:<column> buffer.
275      char *file_line_info = 0;
276      str = ExtractToken(str, "\n", &file_line_info);
277      CHECK(file_line_info);
278      const char *line_info = ExtractToken(file_line_info, ":", &info->file);
279      line_info = ExtractInt(line_info, ":", &info->line);
280      line_info = ExtractInt(line_info, "", &info->column);
281      InternalFree(file_line_info);
282
283      // Functions and filenames can be "??", in which case we write 0
284      // to address info to mark that names are unknown.
285      if (0 == internal_strcmp(info->function, "??")) {
286        InternalFree(info->function);
287        info->function = 0;
288      }
289      if (0 == internal_strcmp(info->file, "??")) {
290        InternalFree(info->file);
291        info->file = 0;
292      }
293    }
294    if (frame_id == 0) {
295      // Make sure we return at least one frame.
296      AddressInfo *info = &frames[0];
297      info->Clear();
298      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
299      frame_id = 1;
300    }
301    return frame_id;
302  }
303
304  bool SymbolizeData(uptr addr, DataInfo *info) {
305    LoadedModule *module = FindModuleForAddress(addr);
306    if (module == 0)
307      return false;
308    const char *module_name = module->full_name();
309    uptr module_offset = addr - module->base_address();
310    internal_memset(info, 0, sizeof(*info));
311    info->address = addr;
312    info->module = internal_strdup(module_name);
313    info->module_offset = module_offset;
314    const char *str = SendCommand(true, module_name, module_offset);
315    if (str == 0)
316      return true;
317    str = ExtractToken(str, "\n", &info->name);
318    str = ExtractUptr(str, " ", &info->start);
319    str = ExtractUptr(str, "\n", &info->size);
320    info->start += module->base_address();
321    return true;
322  }
323
324  bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
325    int input_fd, output_fd;
326    if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
327      return false;
328    void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
329    external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
330                                                       input_fd, output_fd);
331    return true;
332  }
333
334  bool IsSymbolizerAvailable() {
335    if (internal_symbolizer_ == 0)
336      internal_symbolizer_ = InternalSymbolizer::get();
337    return internal_symbolizer_ || external_symbolizer_;
338  }
339
340  void Flush() {
341    if (internal_symbolizer_)
342      internal_symbolizer_->Flush();
343    if (external_symbolizer_)
344      external_symbolizer_->Flush();
345  }
346
347 private:
348  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
349    // First, try to use internal symbolizer.
350    if (!IsSymbolizerAvailable()) {
351      return 0;
352    }
353    if (internal_symbolizer_) {
354      return internal_symbolizer_->SendCommand(is_data, module_name,
355                                               module_offset);
356    }
357    // Otherwise, fall back to external symbolizer.
358    if (external_symbolizer_ == 0) {
359      ReportExternalSymbolizerError(
360          "WARNING: Trying to symbolize code, but external "
361          "symbolizer is not initialized!\n");
362      return 0;
363    }
364    for (;;) {
365      char *reply = external_symbolizer_->SendCommand(is_data, module_name,
366          module_offset);
367      if (reply)
368        return reply;
369      // Try to restart symbolizer subprocess. If we don't succeed, forget
370      // about it and don't try to use it later.
371      if (!external_symbolizer_->Restart()) {
372        ReportExternalSymbolizerError(
373            "WARNING: Failed to use and restart external symbolizer!\n");
374        external_symbolizer_ = 0;
375        return 0;
376      }
377    }
378  }
379
380  LoadedModule *FindModuleForAddress(uptr address) {
381    bool modules_were_reloaded = false;
382    if (modules_ == 0 || !modules_fresh_) {
383      modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
384          kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
385      CHECK(modules_);
386      n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
387                                    /* filter */ 0);
388      // FIXME: Return this check when GetListOfModules is implemented on Mac.
389      // CHECK_GT(n_modules_, 0);
390      CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
391      modules_fresh_ = true;
392      modules_were_reloaded = true;
393    }
394    for (uptr i = 0; i < n_modules_; i++) {
395      if (modules_[i].containsAddress(address)) {
396        return &modules_[i];
397      }
398    }
399    // Reload the modules and look up again, if we haven't tried it yet.
400    if (!modules_were_reloaded) {
401      // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
402      // It's too aggressive to reload the list of modules each time we fail
403      // to find a module for a given address.
404      modules_fresh_ = false;
405      return FindModuleForAddress(address);
406    }
407    return 0;
408  }
409
410  void ReportExternalSymbolizerError(const char *msg) {
411    // Don't use atomics here for now, as SymbolizeCode can't be called
412    // from multiple threads anyway.
413    static bool reported;
414    if (!reported) {
415      Report(msg);
416      reported = true;
417    }
418  }
419
420  // 16K loaded modules should be enough for everyone.
421  static const uptr kMaxNumberOfModuleContexts = 1 << 14;
422  LoadedModule *modules_;  // Array of module descriptions is leaked.
423  uptr n_modules_;
424  // If stale, need to reload the modules before looking up addresses.
425  bool modules_fresh_;
426
427  ExternalSymbolizer *external_symbolizer_;  // Leaked.
428  InternalSymbolizer *internal_symbolizer_;  // Leaked.
429};
430
431static Symbolizer symbolizer;  // Linker initialized.
432
433uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
434  return symbolizer.SymbolizeCode(address, frames, max_frames);
435}
436
437bool SymbolizeData(uptr address, DataInfo *info) {
438  return symbolizer.SymbolizeData(address, info);
439}
440
441bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
442  return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
443}
444
445bool IsSymbolizerAvailable() {
446  return symbolizer.IsSymbolizerAvailable();
447}
448
449void FlushSymbolizer() {
450  symbolizer.Flush();
451}
452
453}  // namespace __sanitizer
454