sanitizer_symbolizer_libcdep.cc revision 8b3af3a0e9426e9e0b2986b97360bd820ef05c7f
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is shared between AddressSanitizer and ThreadSanitizer 11// run-time libraries. See sanitizer_symbolizer.h for details. 12//===----------------------------------------------------------------------===// 13 14#include "sanitizer_common.h" 15#include "sanitizer_placement_new.h" 16#include "sanitizer_procmaps.h" 17#include "sanitizer_symbolizer.h" 18 19namespace __sanitizer { 20 21void AddressInfo::Clear() { 22 InternalFree(module); 23 InternalFree(function); 24 InternalFree(file); 25 internal_memset(this, 0, sizeof(AddressInfo)); 26} 27 28LoadedModule::LoadedModule(const char *module_name, uptr base_address) { 29 full_name_ = internal_strdup(module_name); 30 base_address_ = base_address; 31 n_ranges_ = 0; 32} 33 34void LoadedModule::addAddressRange(uptr beg, uptr end) { 35 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); 36 ranges_[n_ranges_].beg = beg; 37 ranges_[n_ranges_].end = end; 38 n_ranges_++; 39} 40 41bool LoadedModule::containsAddress(uptr address) const { 42 for (uptr i = 0; i < n_ranges_; i++) { 43 if (ranges_[i].beg <= address && address < ranges_[i].end) 44 return true; 45 } 46 return false; 47} 48 49// Extracts the prefix of "str" that consists of any characters not 50// present in "delims" string, and copies this prefix to "result", allocating 51// space for it. 52// Returns a pointer to "str" after skipping extracted prefix and first 53// delimiter char. 54static const char *ExtractToken(const char *str, const char *delims, 55 char **result) { 56 uptr prefix_len = internal_strcspn(str, delims); 57 *result = (char*)InternalAlloc(prefix_len + 1); 58 internal_memcpy(*result, str, prefix_len); 59 (*result)[prefix_len] = '\0'; 60 const char *prefix_end = str + prefix_len; 61 if (*prefix_end != '\0') prefix_end++; 62 return prefix_end; 63} 64 65// Same as ExtractToken, but converts extracted token to integer. 66static const char *ExtractInt(const char *str, const char *delims, 67 int *result) { 68 char *buff; 69 const char *ret = ExtractToken(str, delims, &buff); 70 if (buff != 0) { 71 *result = (int)internal_atoll(buff); 72 } 73 InternalFree(buff); 74 return ret; 75} 76 77static const char *ExtractUptr(const char *str, const char *delims, 78 uptr *result) { 79 char *buff; 80 const char *ret = ExtractToken(str, delims, &buff); 81 if (buff != 0) { 82 *result = (uptr)internal_atoll(buff); 83 } 84 InternalFree(buff); 85 return ret; 86} 87 88// ExternalSymbolizer encapsulates communication between the tool and 89// external symbolizer program, running in a different subprocess, 90// For now we assume the following protocol: 91// For each request of the form 92// <module_name> <module_offset> 93// passed to STDIN, external symbolizer prints to STDOUT response: 94// <function_name> 95// <file_name>:<line_number>:<column_number> 96// <function_name> 97// <file_name>:<line_number>:<column_number> 98// ... 99// <empty line> 100class ExternalSymbolizer { 101 public: 102 ExternalSymbolizer(const char *path, int input_fd, int output_fd) 103 : path_(path), 104 input_fd_(input_fd), 105 output_fd_(output_fd), 106 times_restarted_(0) { 107 CHECK(path_); 108 CHECK_NE(input_fd_, kInvalidFd); 109 CHECK_NE(output_fd_, kInvalidFd); 110 } 111 112 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 113 CHECK(module_name); 114 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 115 is_data ? "DATA " : "", module_name, module_offset); 116 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) 117 return 0; 118 if (!readFromSymbolizer(buffer_, kBufferSize)) 119 return 0; 120 return buffer_; 121 } 122 123 bool Restart() { 124 if (times_restarted_ >= kMaxTimesRestarted) return false; 125 times_restarted_++; 126 internal_close(input_fd_); 127 internal_close(output_fd_); 128 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); 129 } 130 131 void Flush() { 132 } 133 134 private: 135 bool readFromSymbolizer(char *buffer, uptr max_length) { 136 if (max_length == 0) 137 return true; 138 uptr read_len = 0; 139 while (true) { 140 uptr just_read = internal_read(input_fd_, buffer + read_len, 141 max_length - read_len); 142 // We can't read 0 bytes, as we don't expect external symbolizer to close 143 // its stdout. 144 if (just_read == 0 || just_read == (uptr)-1) { 145 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 146 return false; 147 } 148 read_len += just_read; 149 // Empty line marks the end of symbolizer output. 150 if (read_len >= 2 && buffer[read_len - 1] == '\n' && 151 buffer[read_len - 2] == '\n') { 152 break; 153 } 154 } 155 return true; 156 } 157 158 bool writeToSymbolizer(const char *buffer, uptr length) { 159 if (length == 0) 160 return true; 161 uptr write_len = internal_write(output_fd_, buffer, length); 162 if (write_len == 0 || write_len == (uptr)-1) { 163 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 164 return false; 165 } 166 return true; 167 } 168 169 const char *path_; 170 int input_fd_; 171 int output_fd_; 172 173 static const uptr kBufferSize = 16 * 1024; 174 char buffer_[kBufferSize]; 175 176 static const uptr kMaxTimesRestarted = 5; 177 uptr times_restarted_; 178}; 179 180static LowLevelAllocator symbolizer_allocator; // Linker initialized. 181 182#if SANITIZER_SUPPORTS_WEAK_HOOKS 183extern "C" { 184SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 185bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, 186 char *Buffer, int MaxLength); 187SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 188bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, 189 char *Buffer, int MaxLength); 190SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 191void __sanitizer_symbolize_flush(); 192} // extern "C" 193 194class InternalSymbolizer { 195 public: 196 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); 197 198 static InternalSymbolizer *get() { 199 if (__sanitizer_symbolize_code != 0 && 200 __sanitizer_symbolize_data != 0) { 201 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer)); 202 return new(mem) InternalSymbolizer(); 203 } 204 return 0; 205 } 206 207 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 208 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data 209 : __sanitizer_symbolize_code; 210 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) 211 return buffer_; 212 return 0; 213 } 214 215 void Flush() { 216 if (__sanitizer_symbolize_flush) 217 __sanitizer_symbolize_flush(); 218 } 219 220 private: 221 InternalSymbolizer() { } 222 223 static const int kBufferSize = 16 * 1024; 224 char buffer_[kBufferSize]; 225}; 226#else // SANITIZER_SUPPORTS_WEAK_HOOKS 227 228class InternalSymbolizer { 229 public: 230 static InternalSymbolizer *get() { return 0; } 231 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 232 return 0; 233 } 234 void Flush() { 235 } 236}; 237 238#endif // SANITIZER_SUPPORTS_WEAK_HOOKS 239 240class Symbolizer { 241 // This class has no constructor, as global constructors are forbidden in 242 // sanitizer_common. It should be linker initialized instead. 243 public: 244 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { 245 if (max_frames == 0) 246 return 0; 247 LoadedModule *module = FindModuleForAddress(addr); 248 if (module == 0) 249 return 0; 250 const char *module_name = module->full_name(); 251 uptr module_offset = addr - module->base_address(); 252 const char *str = SendCommand(false, module_name, module_offset); 253 if (str == 0) { 254 // External symbolizer was not initialized or failed. Fill only data 255 // about module name and offset. 256 AddressInfo *info = &frames[0]; 257 info->Clear(); 258 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 259 return 1; 260 } 261 uptr frame_id = 0; 262 for (frame_id = 0; frame_id < max_frames; frame_id++) { 263 AddressInfo *info = &frames[frame_id]; 264 char *function_name = 0; 265 str = ExtractToken(str, "\n", &function_name); 266 CHECK(function_name); 267 if (function_name[0] == '\0') { 268 // There are no more frames. 269 break; 270 } 271 info->Clear(); 272 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 273 info->function = function_name; 274 // Parse <file>:<line>:<column> buffer. 275 char *file_line_info = 0; 276 str = ExtractToken(str, "\n", &file_line_info); 277 CHECK(file_line_info); 278 const char *line_info = ExtractToken(file_line_info, ":", &info->file); 279 line_info = ExtractInt(line_info, ":", &info->line); 280 line_info = ExtractInt(line_info, "", &info->column); 281 InternalFree(file_line_info); 282 283 // Functions and filenames can be "??", in which case we write 0 284 // to address info to mark that names are unknown. 285 if (0 == internal_strcmp(info->function, "??")) { 286 InternalFree(info->function); 287 info->function = 0; 288 } 289 if (0 == internal_strcmp(info->file, "??")) { 290 InternalFree(info->file); 291 info->file = 0; 292 } 293 } 294 if (frame_id == 0) { 295 // Make sure we return at least one frame. 296 AddressInfo *info = &frames[0]; 297 info->Clear(); 298 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 299 frame_id = 1; 300 } 301 return frame_id; 302 } 303 304 bool SymbolizeData(uptr addr, DataInfo *info) { 305 LoadedModule *module = FindModuleForAddress(addr); 306 if (module == 0) 307 return false; 308 const char *module_name = module->full_name(); 309 uptr module_offset = addr - module->base_address(); 310 internal_memset(info, 0, sizeof(*info)); 311 info->address = addr; 312 info->module = internal_strdup(module_name); 313 info->module_offset = module_offset; 314 const char *str = SendCommand(true, module_name, module_offset); 315 if (str == 0) 316 return true; 317 str = ExtractToken(str, "\n", &info->name); 318 str = ExtractUptr(str, " ", &info->start); 319 str = ExtractUptr(str, "\n", &info->size); 320 info->start += module->base_address(); 321 return true; 322 } 323 324 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 325 int input_fd, output_fd; 326 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) 327 return false; 328 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); 329 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, 330 input_fd, output_fd); 331 return true; 332 } 333 334 bool IsSymbolizerAvailable() { 335 if (internal_symbolizer_ == 0) 336 internal_symbolizer_ = InternalSymbolizer::get(); 337 return internal_symbolizer_ || external_symbolizer_; 338 } 339 340 void Flush() { 341 if (internal_symbolizer_) 342 internal_symbolizer_->Flush(); 343 if (external_symbolizer_) 344 external_symbolizer_->Flush(); 345 } 346 347 private: 348 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 349 // First, try to use internal symbolizer. 350 if (!IsSymbolizerAvailable()) { 351 return 0; 352 } 353 if (internal_symbolizer_) { 354 return internal_symbolizer_->SendCommand(is_data, module_name, 355 module_offset); 356 } 357 // Otherwise, fall back to external symbolizer. 358 if (external_symbolizer_ == 0) { 359 ReportExternalSymbolizerError( 360 "WARNING: Trying to symbolize code, but external " 361 "symbolizer is not initialized!\n"); 362 return 0; 363 } 364 for (;;) { 365 char *reply = external_symbolizer_->SendCommand(is_data, module_name, 366 module_offset); 367 if (reply) 368 return reply; 369 // Try to restart symbolizer subprocess. If we don't succeed, forget 370 // about it and don't try to use it later. 371 if (!external_symbolizer_->Restart()) { 372 ReportExternalSymbolizerError( 373 "WARNING: Failed to use and restart external symbolizer!\n"); 374 external_symbolizer_ = 0; 375 return 0; 376 } 377 } 378 } 379 380 LoadedModule *FindModuleForAddress(uptr address) { 381 bool modules_were_reloaded = false; 382 if (modules_ == 0 || !modules_fresh_) { 383 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( 384 kMaxNumberOfModuleContexts * sizeof(LoadedModule))); 385 CHECK(modules_); 386 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts, 387 /* filter */ 0); 388 // FIXME: Return this check when GetListOfModules is implemented on Mac. 389 // CHECK_GT(n_modules_, 0); 390 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); 391 modules_fresh_ = true; 392 modules_were_reloaded = true; 393 } 394 for (uptr i = 0; i < n_modules_; i++) { 395 if (modules_[i].containsAddress(address)) { 396 return &modules_[i]; 397 } 398 } 399 // Reload the modules and look up again, if we haven't tried it yet. 400 if (!modules_were_reloaded) { 401 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 402 // It's too aggressive to reload the list of modules each time we fail 403 // to find a module for a given address. 404 modules_fresh_ = false; 405 return FindModuleForAddress(address); 406 } 407 return 0; 408 } 409 410 void ReportExternalSymbolizerError(const char *msg) { 411 // Don't use atomics here for now, as SymbolizeCode can't be called 412 // from multiple threads anyway. 413 static bool reported; 414 if (!reported) { 415 Report(msg); 416 reported = true; 417 } 418 } 419 420 // 16K loaded modules should be enough for everyone. 421 static const uptr kMaxNumberOfModuleContexts = 1 << 14; 422 LoadedModule *modules_; // Array of module descriptions is leaked. 423 uptr n_modules_; 424 // If stale, need to reload the modules before looking up addresses. 425 bool modules_fresh_; 426 427 ExternalSymbolizer *external_symbolizer_; // Leaked. 428 InternalSymbolizer *internal_symbolizer_; // Leaked. 429}; 430 431static Symbolizer symbolizer; // Linker initialized. 432 433uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { 434 return symbolizer.SymbolizeCode(address, frames, max_frames); 435} 436 437bool SymbolizeData(uptr address, DataInfo *info) { 438 return symbolizer.SymbolizeData(address, info); 439} 440 441bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 442 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); 443} 444 445bool IsSymbolizerAvailable() { 446 return symbolizer.IsSymbolizerAvailable(); 447} 448 449void FlushSymbolizer() { 450 symbolizer.Flush(); 451} 452 453} // namespace __sanitizer 454