sanitizer_symbolizer_libcdep.cc revision 68c016aea0b61f649b9d9ba65c7d7217e0c0f6cb
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is shared between AddressSanitizer and ThreadSanitizer 11// run-time libraries. See sanitizer_symbolizer.h for details. 12//===----------------------------------------------------------------------===// 13 14#include "sanitizer_allocator_internal.h" 15#include "sanitizer_common.h" 16#include "sanitizer_placement_new.h" 17#include "sanitizer_procmaps.h" 18#include "sanitizer_symbolizer.h" 19 20namespace __sanitizer { 21 22void AddressInfo::Clear() { 23 InternalFree(module); 24 InternalFree(function); 25 InternalFree(file); 26 internal_memset(this, 0, sizeof(AddressInfo)); 27} 28 29LoadedModule::LoadedModule(const char *module_name, uptr base_address) { 30 full_name_ = internal_strdup(module_name); 31 base_address_ = base_address; 32 n_ranges_ = 0; 33} 34 35void LoadedModule::addAddressRange(uptr beg, uptr end) { 36 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); 37 ranges_[n_ranges_].beg = beg; 38 ranges_[n_ranges_].end = end; 39 n_ranges_++; 40} 41 42bool LoadedModule::containsAddress(uptr address) const { 43 for (uptr i = 0; i < n_ranges_; i++) { 44 if (ranges_[i].beg <= address && address < ranges_[i].end) 45 return true; 46 } 47 return false; 48} 49 50// Extracts the prefix of "str" that consists of any characters not 51// present in "delims" string, and copies this prefix to "result", allocating 52// space for it. 53// Returns a pointer to "str" after skipping extracted prefix and first 54// delimiter char. 55static const char *ExtractToken(const char *str, const char *delims, 56 char **result) { 57 uptr prefix_len = internal_strcspn(str, delims); 58 *result = (char*)InternalAlloc(prefix_len + 1); 59 internal_memcpy(*result, str, prefix_len); 60 (*result)[prefix_len] = '\0'; 61 const char *prefix_end = str + prefix_len; 62 if (*prefix_end != '\0') prefix_end++; 63 return prefix_end; 64} 65 66// Same as ExtractToken, but converts extracted token to integer. 67static const char *ExtractInt(const char *str, const char *delims, 68 int *result) { 69 char *buff; 70 const char *ret = ExtractToken(str, delims, &buff); 71 if (buff != 0) { 72 *result = (int)internal_atoll(buff); 73 } 74 InternalFree(buff); 75 return ret; 76} 77 78static const char *ExtractUptr(const char *str, const char *delims, 79 uptr *result) { 80 char *buff; 81 const char *ret = ExtractToken(str, delims, &buff); 82 if (buff != 0) { 83 *result = (uptr)internal_atoll(buff); 84 } 85 InternalFree(buff); 86 return ret; 87} 88 89// ExternalSymbolizer encapsulates communication between the tool and 90// external symbolizer program, running in a different subprocess, 91// For now we assume the following protocol: 92// For each request of the form 93// <module_name> <module_offset> 94// passed to STDIN, external symbolizer prints to STDOUT response: 95// <function_name> 96// <file_name>:<line_number>:<column_number> 97// <function_name> 98// <file_name>:<line_number>:<column_number> 99// ... 100// <empty line> 101class ExternalSymbolizer { 102 public: 103 ExternalSymbolizer(const char *path, int input_fd, int output_fd) 104 : path_(path), 105 input_fd_(input_fd), 106 output_fd_(output_fd), 107 times_restarted_(0) { 108 CHECK(path_); 109 CHECK_NE(input_fd_, kInvalidFd); 110 CHECK_NE(output_fd_, kInvalidFd); 111 } 112 113 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 114 CHECK(module_name); 115 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 116 is_data ? "DATA " : "", module_name, module_offset); 117 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) 118 return 0; 119 if (!readFromSymbolizer(buffer_, kBufferSize)) 120 return 0; 121 return buffer_; 122 } 123 124 bool Restart() { 125 if (times_restarted_ >= kMaxTimesRestarted) return false; 126 times_restarted_++; 127 internal_close(input_fd_); 128 internal_close(output_fd_); 129 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); 130 } 131 132 void Flush() { 133 } 134 135 private: 136 bool readFromSymbolizer(char *buffer, uptr max_length) { 137 if (max_length == 0) 138 return true; 139 uptr read_len = 0; 140 while (true) { 141 uptr just_read = internal_read(input_fd_, buffer + read_len, 142 max_length - read_len); 143 // We can't read 0 bytes, as we don't expect external symbolizer to close 144 // its stdout. 145 if (just_read == 0 || just_read == (uptr)-1) { 146 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 147 return false; 148 } 149 read_len += just_read; 150 // Empty line marks the end of symbolizer output. 151 if (read_len >= 2 && buffer[read_len - 1] == '\n' && 152 buffer[read_len - 2] == '\n') { 153 break; 154 } 155 } 156 return true; 157 } 158 159 bool writeToSymbolizer(const char *buffer, uptr length) { 160 if (length == 0) 161 return true; 162 uptr write_len = internal_write(output_fd_, buffer, length); 163 if (write_len == 0 || write_len == (uptr)-1) { 164 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 165 return false; 166 } 167 return true; 168 } 169 170 const char *path_; 171 int input_fd_; 172 int output_fd_; 173 174 static const uptr kBufferSize = 16 * 1024; 175 char buffer_[kBufferSize]; 176 177 static const uptr kMaxTimesRestarted = 5; 178 uptr times_restarted_; 179}; 180 181static LowLevelAllocator symbolizer_allocator; // Linker initialized. 182 183#if SANITIZER_SUPPORTS_WEAK_HOOKS 184extern "C" { 185SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 186bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, 187 char *Buffer, int MaxLength); 188SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 189bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, 190 char *Buffer, int MaxLength); 191SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 192void __sanitizer_symbolize_flush(); 193SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 194int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, 195 int MaxLength); 196} // extern "C" 197 198class InternalSymbolizer { 199 public: 200 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); 201 202 static InternalSymbolizer *get() { 203 if (__sanitizer_symbolize_code != 0 && 204 __sanitizer_symbolize_data != 0) { 205 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer)); 206 return new(mem) InternalSymbolizer(); 207 } 208 return 0; 209 } 210 211 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 212 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data 213 : __sanitizer_symbolize_code; 214 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) 215 return buffer_; 216 return 0; 217 } 218 219 void Flush() { 220 if (__sanitizer_symbolize_flush) 221 __sanitizer_symbolize_flush(); 222 } 223 224 const char *Demangle(const char *name) { 225 if (__sanitizer_symbolize_demangle) { 226 for (uptr res_length = 1024; 227 res_length <= InternalSizeClassMap::kMaxSize;) { 228 char *res_buff = static_cast<char*>(InternalAlloc(res_length)); 229 uptr req_length = 230 __sanitizer_symbolize_demangle(name, res_buff, res_length); 231 if (req_length > res_length) { 232 res_length = req_length + 1; 233 InternalFree(res_buff); 234 continue; 235 } 236 return res_buff; 237 } 238 } 239 return name; 240 } 241 242 private: 243 InternalSymbolizer() { } 244 245 static const int kBufferSize = 16 * 1024; 246 static const int kMaxDemangledNameSize = 1024; 247 char buffer_[kBufferSize]; 248}; 249#else // SANITIZER_SUPPORTS_WEAK_HOOKS 250 251class InternalSymbolizer { 252 public: 253 static InternalSymbolizer *get() { return 0; } 254 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 255 return 0; 256 } 257 void Flush() { } 258 const char *Demangle(const char *name) { return name; } 259}; 260 261#endif // SANITIZER_SUPPORTS_WEAK_HOOKS 262 263class Symbolizer { 264 // This class has no constructor, as global constructors are forbidden in 265 // sanitizer_common. It should be linker initialized instead. 266 public: 267 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { 268 if (max_frames == 0) 269 return 0; 270 LoadedModule *module = FindModuleForAddress(addr); 271 if (module == 0) 272 return 0; 273 const char *module_name = module->full_name(); 274 uptr module_offset = addr - module->base_address(); 275 const char *str = SendCommand(false, module_name, module_offset); 276 if (str == 0) { 277 // External symbolizer was not initialized or failed. Fill only data 278 // about module name and offset. 279 AddressInfo *info = &frames[0]; 280 info->Clear(); 281 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 282 return 1; 283 } 284 uptr frame_id = 0; 285 for (frame_id = 0; frame_id < max_frames; frame_id++) { 286 AddressInfo *info = &frames[frame_id]; 287 char *function_name = 0; 288 str = ExtractToken(str, "\n", &function_name); 289 CHECK(function_name); 290 if (function_name[0] == '\0') { 291 // There are no more frames. 292 break; 293 } 294 info->Clear(); 295 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 296 info->function = function_name; 297 // Parse <file>:<line>:<column> buffer. 298 char *file_line_info = 0; 299 str = ExtractToken(str, "\n", &file_line_info); 300 CHECK(file_line_info); 301 const char *line_info = ExtractToken(file_line_info, ":", &info->file); 302 line_info = ExtractInt(line_info, ":", &info->line); 303 line_info = ExtractInt(line_info, "", &info->column); 304 InternalFree(file_line_info); 305 306 // Functions and filenames can be "??", in which case we write 0 307 // to address info to mark that names are unknown. 308 if (0 == internal_strcmp(info->function, "??")) { 309 InternalFree(info->function); 310 info->function = 0; 311 } 312 if (0 == internal_strcmp(info->file, "??")) { 313 InternalFree(info->file); 314 info->file = 0; 315 } 316 } 317 if (frame_id == 0) { 318 // Make sure we return at least one frame. 319 AddressInfo *info = &frames[0]; 320 info->Clear(); 321 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 322 frame_id = 1; 323 } 324 return frame_id; 325 } 326 327 bool SymbolizeData(uptr addr, DataInfo *info) { 328 LoadedModule *module = FindModuleForAddress(addr); 329 if (module == 0) 330 return false; 331 const char *module_name = module->full_name(); 332 uptr module_offset = addr - module->base_address(); 333 internal_memset(info, 0, sizeof(*info)); 334 info->address = addr; 335 info->module = internal_strdup(module_name); 336 info->module_offset = module_offset; 337 const char *str = SendCommand(true, module_name, module_offset); 338 if (str == 0) 339 return true; 340 str = ExtractToken(str, "\n", &info->name); 341 str = ExtractUptr(str, " ", &info->start); 342 str = ExtractUptr(str, "\n", &info->size); 343 info->start += module->base_address(); 344 return true; 345 } 346 347 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 348 if (!path_to_symbolizer || path_to_symbolizer[0] == '\0') { 349 path_to_symbolizer = FindPathToBinary("llvm-symbolizer"); 350 if (!path_to_symbolizer) 351 return false; 352 } 353 int input_fd, output_fd; 354 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) 355 return false; 356 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); 357 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, 358 input_fd, output_fd); 359 return true; 360 } 361 362 bool IsSymbolizerAvailable() { 363 if (internal_symbolizer_ == 0) 364 internal_symbolizer_ = InternalSymbolizer::get(); 365 return internal_symbolizer_ || external_symbolizer_; 366 } 367 368 void Flush() { 369 if (internal_symbolizer_) 370 internal_symbolizer_->Flush(); 371 if (external_symbolizer_) 372 external_symbolizer_->Flush(); 373 } 374 375 const char *Demangle(const char *name) { 376 if (IsSymbolizerAvailable() && internal_symbolizer_ != 0) 377 return internal_symbolizer_->Demangle(name); 378 return DemangleCXXABI(name); 379 } 380 381 private: 382 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 383 // First, try to use internal symbolizer. 384 if (!IsSymbolizerAvailable()) { 385 return 0; 386 } 387 if (internal_symbolizer_) { 388 return internal_symbolizer_->SendCommand(is_data, module_name, 389 module_offset); 390 } 391 // Otherwise, fall back to external symbolizer. 392 if (external_symbolizer_ == 0) { 393 ReportExternalSymbolizerError( 394 "WARNING: Trying to symbolize code, but external " 395 "symbolizer is not initialized!\n"); 396 return 0; 397 } 398 for (;;) { 399 char *reply = external_symbolizer_->SendCommand(is_data, module_name, 400 module_offset); 401 if (reply) 402 return reply; 403 // Try to restart symbolizer subprocess. If we don't succeed, forget 404 // about it and don't try to use it later. 405 if (!external_symbolizer_->Restart()) { 406 ReportExternalSymbolizerError( 407 "WARNING: Failed to use and restart external symbolizer!\n"); 408 external_symbolizer_ = 0; 409 return 0; 410 } 411 } 412 } 413 414 LoadedModule *FindModuleForAddress(uptr address) { 415 bool modules_were_reloaded = false; 416 if (modules_ == 0 || !modules_fresh_) { 417 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( 418 kMaxNumberOfModuleContexts * sizeof(LoadedModule))); 419 CHECK(modules_); 420 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts, 421 /* filter */ 0); 422 // FIXME: Return this check when GetListOfModules is implemented on Mac. 423 // CHECK_GT(n_modules_, 0); 424 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); 425 modules_fresh_ = true; 426 modules_were_reloaded = true; 427 } 428 for (uptr i = 0; i < n_modules_; i++) { 429 if (modules_[i].containsAddress(address)) { 430 return &modules_[i]; 431 } 432 } 433 // Reload the modules and look up again, if we haven't tried it yet. 434 if (!modules_were_reloaded) { 435 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 436 // It's too aggressive to reload the list of modules each time we fail 437 // to find a module for a given address. 438 modules_fresh_ = false; 439 return FindModuleForAddress(address); 440 } 441 return 0; 442 } 443 444 void ReportExternalSymbolizerError(const char *msg) { 445 // Don't use atomics here for now, as SymbolizeCode can't be called 446 // from multiple threads anyway. 447 static bool reported; 448 if (!reported) { 449 Report(msg); 450 reported = true; 451 } 452 } 453 454 // 16K loaded modules should be enough for everyone. 455 static const uptr kMaxNumberOfModuleContexts = 1 << 14; 456 LoadedModule *modules_; // Array of module descriptions is leaked. 457 uptr n_modules_; 458 // If stale, need to reload the modules before looking up addresses. 459 bool modules_fresh_; 460 461 ExternalSymbolizer *external_symbolizer_; // Leaked. 462 InternalSymbolizer *internal_symbolizer_; // Leaked. 463}; 464 465static Symbolizer symbolizer; // Linker initialized. 466 467uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { 468 return symbolizer.SymbolizeCode(address, frames, max_frames); 469} 470 471bool SymbolizeData(uptr address, DataInfo *info) { 472 return symbolizer.SymbolizeData(address, info); 473} 474 475bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 476 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); 477} 478 479bool IsSymbolizerAvailable() { 480 return symbolizer.IsSymbolizerAvailable(); 481} 482 483void FlushSymbolizer() { 484 symbolizer.Flush(); 485} 486 487const char *Demangle(const char *name) { 488 return symbolizer.Demangle(name); 489} 490 491} // namespace __sanitizer 492