sanitizer_symbolizer_posix_libcdep.cc revision e059bd35a33040c9ecbd291a190449f7a0a6f17f
1//===-- sanitizer_symbolizer_posix_libcdep.cc -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is shared between AddressSanitizer and ThreadSanitizer 11// run-time libraries. 12// POSIX-specific implementation of symbolizer parts. 13//===----------------------------------------------------------------------===// 14 15#include "sanitizer_platform.h" 16#if SANITIZER_POSIX 17#include "sanitizer_allocator_internal.h" 18#include "sanitizer_common.h" 19#include "sanitizer_internal_defs.h" 20#include "sanitizer_linux.h" 21#include "sanitizer_placement_new.h" 22#include "sanitizer_procmaps.h" 23#include "sanitizer_symbolizer.h" 24 25#include <errno.h> 26#include <stdlib.h> 27#include <sys/wait.h> 28#include <unistd.h> 29 30// C++ demangling function, as required by Itanium C++ ABI. This is weak, 31// because we do not require a C++ ABI library to be linked to a program 32// using sanitizers; if it's not present, we'll just use the mangled name. 33namespace __cxxabiv1 { 34 extern "C" SANITIZER_WEAK_ATTRIBUTE 35 char *__cxa_demangle(const char *mangled, char *buffer, 36 size_t *length, int *status); 37} 38 39namespace __sanitizer { 40 41// Attempts to demangle the name via __cxa_demangle from __cxxabiv1. 42static const char *DemangleCXXABI(const char *name) { 43 // FIXME: __cxa_demangle aggressively insists on allocating memory. 44 // There's not much we can do about that, short of providing our 45 // own demangler (libc++abi's implementation could be adapted so that 46 // it does not allocate). For now, we just call it anyway, and we leak 47 // the returned value. 48 if (__cxxabiv1::__cxa_demangle) 49 if (const char *demangled_name = 50 __cxxabiv1::__cxa_demangle(name, 0, 0, 0)) 51 return demangled_name; 52 53 return name; 54} 55 56#if defined(__x86_64__) 57static const char* const kSymbolizerArch = "--default-arch=x86_64"; 58#elif defined(__i386__) 59static const char* const kSymbolizerArch = "--default-arch=i386"; 60#elif defined(__powerpc64__) 61static const char* const kSymbolizerArch = "--default-arch=powerpc64"; 62#else 63static const char* const kSymbolizerArch = "--default-arch=unknown"; 64#endif 65 66static const int kSymbolizerStartupTimeMillis = 10; 67 68// Creates external symbolizer connected via pipe, user should write 69// to output_fd and read from input_fd. 70static bool StartSymbolizerSubprocess(const char *path_to_symbolizer, 71 int *input_fd, int *output_fd) { 72 if (!FileExists(path_to_symbolizer)) { 73 Report("WARNING: invalid path to external symbolizer!\n"); 74 return false; 75 } 76 77 int *infd = NULL; 78 int *outfd = NULL; 79 // The client program may close its stdin and/or stdout and/or stderr 80 // thus allowing socketpair to reuse file descriptors 0, 1 or 2. 81 // In this case the communication between the forked processes may be 82 // broken if either the parent or the child tries to close or duplicate 83 // these descriptors. The loop below produces two pairs of file 84 // descriptors, each greater than 2 (stderr). 85 int sock_pair[5][2]; 86 for (int i = 0; i < 5; i++) { 87 if (pipe(sock_pair[i]) == -1) { 88 for (int j = 0; j < i; j++) { 89 internal_close(sock_pair[j][0]); 90 internal_close(sock_pair[j][1]); 91 } 92 Report("WARNING: Can't create a socket pair to start " 93 "external symbolizer (errno: %d)\n", errno); 94 return false; 95 } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) { 96 if (infd == NULL) { 97 infd = sock_pair[i]; 98 } else { 99 outfd = sock_pair[i]; 100 for (int j = 0; j < i; j++) { 101 if (sock_pair[j] == infd) continue; 102 internal_close(sock_pair[j][0]); 103 internal_close(sock_pair[j][1]); 104 } 105 break; 106 } 107 } 108 } 109 CHECK(infd); 110 CHECK(outfd); 111 112 int pid = fork(); 113 if (pid == -1) { 114 // Fork() failed. 115 internal_close(infd[0]); 116 internal_close(infd[1]); 117 internal_close(outfd[0]); 118 internal_close(outfd[1]); 119 Report("WARNING: failed to fork external symbolizer " 120 " (errno: %d)\n", errno); 121 return false; 122 } else if (pid == 0) { 123 // Child subprocess. 124 internal_close(STDOUT_FILENO); 125 internal_close(STDIN_FILENO); 126 internal_dup2(outfd[0], STDIN_FILENO); 127 internal_dup2(infd[1], STDOUT_FILENO); 128 internal_close(outfd[0]); 129 internal_close(outfd[1]); 130 internal_close(infd[0]); 131 internal_close(infd[1]); 132 for (int fd = getdtablesize(); fd > 2; fd--) 133 internal_close(fd); 134 execl(path_to_symbolizer, path_to_symbolizer, kSymbolizerArch, (char*)0); 135 internal__exit(1); 136 } 137 138 // Continue execution in parent process. 139 internal_close(outfd[0]); 140 internal_close(infd[1]); 141 *input_fd = infd[0]; 142 *output_fd = outfd[1]; 143 144 // Check that symbolizer subprocess started successfully. 145 int pid_status; 146 SleepForMillis(kSymbolizerStartupTimeMillis); 147 int exited_pid = waitpid(pid, &pid_status, WNOHANG); 148 if (exited_pid != 0) { 149 // Either waitpid failed, or child has already exited. 150 Report("WARNING: external symbolizer didn't start up correctly!\n"); 151 return false; 152 } 153 154 return true; 155} 156 157// Extracts the prefix of "str" that consists of any characters not 158// present in "delims" string, and copies this prefix to "result", allocating 159// space for it. 160// Returns a pointer to "str" after skipping extracted prefix and first 161// delimiter char. 162static const char *ExtractToken(const char *str, const char *delims, 163 char **result) { 164 uptr prefix_len = internal_strcspn(str, delims); 165 *result = (char*)InternalAlloc(prefix_len + 1); 166 internal_memcpy(*result, str, prefix_len); 167 (*result)[prefix_len] = '\0'; 168 const char *prefix_end = str + prefix_len; 169 if (*prefix_end != '\0') prefix_end++; 170 return prefix_end; 171} 172 173// Same as ExtractToken, but converts extracted token to integer. 174static const char *ExtractInt(const char *str, const char *delims, 175 int *result) { 176 char *buff; 177 const char *ret = ExtractToken(str, delims, &buff); 178 if (buff != 0) { 179 *result = (int)internal_atoll(buff); 180 } 181 InternalFree(buff); 182 return ret; 183} 184 185static const char *ExtractUptr(const char *str, const char *delims, 186 uptr *result) { 187 char *buff; 188 const char *ret = ExtractToken(str, delims, &buff); 189 if (buff != 0) { 190 *result = (uptr)internal_atoll(buff); 191 } 192 InternalFree(buff); 193 return ret; 194} 195 196// ExternalSymbolizer encapsulates communication between the tool and 197// external symbolizer program, running in a different subprocess, 198// For now we assume the following protocol: 199// For each request of the form 200// <module_name> <module_offset> 201// passed to STDIN, external symbolizer prints to STDOUT response: 202// <function_name> 203// <file_name>:<line_number>:<column_number> 204// <function_name> 205// <file_name>:<line_number>:<column_number> 206// ... 207// <empty line> 208class ExternalSymbolizer { 209 public: 210 ExternalSymbolizer(const char *path, int input_fd, int output_fd) 211 : path_(path), 212 input_fd_(input_fd), 213 output_fd_(output_fd), 214 times_restarted_(0) { 215 CHECK(path_); 216 CHECK_NE(input_fd_, kInvalidFd); 217 CHECK_NE(output_fd_, kInvalidFd); 218 } 219 220 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 221 CHECK(module_name); 222 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 223 is_data ? "DATA " : "", module_name, module_offset); 224 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) 225 return 0; 226 if (!readFromSymbolizer(buffer_, kBufferSize)) 227 return 0; 228 return buffer_; 229 } 230 231 bool Restart() { 232 if (times_restarted_ >= kMaxTimesRestarted) return false; 233 times_restarted_++; 234 internal_close(input_fd_); 235 internal_close(output_fd_); 236 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); 237 } 238 239 void Flush() { 240 } 241 242 private: 243 bool readFromSymbolizer(char *buffer, uptr max_length) { 244 if (max_length == 0) 245 return true; 246 uptr read_len = 0; 247 while (true) { 248 uptr just_read = internal_read(input_fd_, buffer + read_len, 249 max_length - read_len); 250 // We can't read 0 bytes, as we don't expect external symbolizer to close 251 // its stdout. 252 if (just_read == 0 || just_read == (uptr)-1) { 253 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 254 return false; 255 } 256 read_len += just_read; 257 // Empty line marks the end of symbolizer output. 258 if (read_len >= 2 && buffer[read_len - 1] == '\n' && 259 buffer[read_len - 2] == '\n') { 260 break; 261 } 262 } 263 return true; 264 } 265 266 bool writeToSymbolizer(const char *buffer, uptr length) { 267 if (length == 0) 268 return true; 269 uptr write_len = internal_write(output_fd_, buffer, length); 270 if (write_len == 0 || write_len == (uptr)-1) { 271 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 272 return false; 273 } 274 return true; 275 } 276 277 const char *path_; 278 int input_fd_; 279 int output_fd_; 280 281 static const uptr kBufferSize = 16 * 1024; 282 char buffer_[kBufferSize]; 283 284 static const uptr kMaxTimesRestarted = 5; 285 uptr times_restarted_; 286}; 287 288static LowLevelAllocator symbolizer_allocator; // Linker initialized. 289 290#if SANITIZER_SUPPORTS_WEAK_HOOKS 291extern "C" { 292SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 293bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, 294 char *Buffer, int MaxLength); 295SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 296bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, 297 char *Buffer, int MaxLength); 298SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 299void __sanitizer_symbolize_flush(); 300SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE 301int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, 302 int MaxLength); 303} // extern "C" 304 305class InternalSymbolizer { 306 public: 307 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); 308 309 static InternalSymbolizer *get() { 310 if (__sanitizer_symbolize_code != 0 && 311 __sanitizer_symbolize_data != 0) { 312 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer)); 313 return new(mem) InternalSymbolizer(); 314 } 315 return 0; 316 } 317 318 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 319 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data 320 : __sanitizer_symbolize_code; 321 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) 322 return buffer_; 323 return 0; 324 } 325 326 void Flush() { 327 if (__sanitizer_symbolize_flush) 328 __sanitizer_symbolize_flush(); 329 } 330 331 const char *Demangle(const char *name) { 332 if (__sanitizer_symbolize_demangle) { 333 for (uptr res_length = 1024; 334 res_length <= InternalSizeClassMap::kMaxSize;) { 335 char *res_buff = static_cast<char*>(InternalAlloc(res_length)); 336 uptr req_length = 337 __sanitizer_symbolize_demangle(name, res_buff, res_length); 338 if (req_length > res_length) { 339 res_length = req_length + 1; 340 InternalFree(res_buff); 341 continue; 342 } 343 return res_buff; 344 } 345 } 346 return name; 347 } 348 349 private: 350 InternalSymbolizer() { } 351 352 static const int kBufferSize = 16 * 1024; 353 static const int kMaxDemangledNameSize = 1024; 354 char buffer_[kBufferSize]; 355}; 356#else // SANITIZER_SUPPORTS_WEAK_HOOKS 357 358class InternalSymbolizer { 359 public: 360 static InternalSymbolizer *get() { return 0; } 361 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 362 return 0; 363 } 364 void Flush() { } 365 const char *Demangle(const char *name) { return name; } 366}; 367 368#endif // SANITIZER_SUPPORTS_WEAK_HOOKS 369 370class Symbolizer : public SymbolizerInterface { 371 // This class has no constructor, as global constructors are forbidden in 372 // sanitizer_common. It should be linker initialized instead. 373 public: 374 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { 375 if (max_frames == 0) 376 return 0; 377 LoadedModule *module = FindModuleForAddress(addr); 378 if (module == 0) 379 return 0; 380 const char *module_name = module->full_name(); 381 uptr module_offset = addr - module->base_address(); 382 const char *str = SendCommand(false, module_name, module_offset); 383 if (str == 0) { 384 // External symbolizer was not initialized or failed. Fill only data 385 // about module name and offset. 386 AddressInfo *info = &frames[0]; 387 info->Clear(); 388 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 389 return 1; 390 } 391 uptr frame_id = 0; 392 for (frame_id = 0; frame_id < max_frames; frame_id++) { 393 AddressInfo *info = &frames[frame_id]; 394 char *function_name = 0; 395 str = ExtractToken(str, "\n", &function_name); 396 CHECK(function_name); 397 if (function_name[0] == '\0') { 398 // There are no more frames. 399 break; 400 } 401 info->Clear(); 402 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 403 info->function = function_name; 404 // Parse <file>:<line>:<column> buffer. 405 char *file_line_info = 0; 406 str = ExtractToken(str, "\n", &file_line_info); 407 CHECK(file_line_info); 408 const char *line_info = ExtractToken(file_line_info, ":", &info->file); 409 line_info = ExtractInt(line_info, ":", &info->line); 410 line_info = ExtractInt(line_info, "", &info->column); 411 InternalFree(file_line_info); 412 413 // Functions and filenames can be "??", in which case we write 0 414 // to address info to mark that names are unknown. 415 if (0 == internal_strcmp(info->function, "??")) { 416 InternalFree(info->function); 417 info->function = 0; 418 } 419 if (0 == internal_strcmp(info->file, "??")) { 420 InternalFree(info->file); 421 info->file = 0; 422 } 423 } 424 if (frame_id == 0) { 425 // Make sure we return at least one frame. 426 AddressInfo *info = &frames[0]; 427 info->Clear(); 428 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 429 frame_id = 1; 430 } 431 return frame_id; 432 } 433 434 bool SymbolizeData(uptr addr, DataInfo *info) { 435 LoadedModule *module = FindModuleForAddress(addr); 436 if (module == 0) 437 return false; 438 const char *module_name = module->full_name(); 439 uptr module_offset = addr - module->base_address(); 440 internal_memset(info, 0, sizeof(*info)); 441 info->address = addr; 442 info->module = internal_strdup(module_name); 443 info->module_offset = module_offset; 444 const char *str = SendCommand(true, module_name, module_offset); 445 if (str == 0) 446 return true; 447 str = ExtractToken(str, "\n", &info->name); 448 str = ExtractUptr(str, " ", &info->start); 449 str = ExtractUptr(str, "\n", &info->size); 450 info->start += module->base_address(); 451 return true; 452 } 453 454 bool InitializeExternal(const char *path_to_symbolizer) { 455 if (!path_to_symbolizer || path_to_symbolizer[0] == '\0') { 456 path_to_symbolizer = FindPathToBinary("llvm-symbolizer"); 457 if (!path_to_symbolizer) 458 return false; 459 } 460 int input_fd, output_fd; 461 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) 462 return false; 463 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); 464 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, 465 input_fd, output_fd); 466 return true; 467 } 468 469 bool IsAvailable() { 470 if (internal_symbolizer_ == 0) 471 internal_symbolizer_ = InternalSymbolizer::get(); 472 return internal_symbolizer_ || external_symbolizer_; 473 } 474 475 void Flush() { 476 if (internal_symbolizer_) 477 internal_symbolizer_->Flush(); 478 if (external_symbolizer_) 479 external_symbolizer_->Flush(); 480 } 481 482 const char *Demangle(const char *name) { 483 if (IsAvailable() && internal_symbolizer_ != 0) 484 return internal_symbolizer_->Demangle(name); 485 return DemangleCXXABI(name); 486 } 487 488 void PrepareForSandboxing() { 489#if SANITIZER_LINUX && !SANITIZER_ANDROID 490 // Cache /proc/self/exe on Linux. 491 CacheBinaryName(); 492#endif 493 } 494 495 private: 496 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 497 // First, try to use internal symbolizer. 498 if (!IsAvailable()) { 499 return 0; 500 } 501 if (internal_symbolizer_) { 502 return internal_symbolizer_->SendCommand(is_data, module_name, 503 module_offset); 504 } 505 // Otherwise, fall back to external symbolizer. 506 if (external_symbolizer_ == 0) { 507 ReportExternalSymbolizerError( 508 "WARNING: Trying to symbolize code, but external " 509 "symbolizer is not initialized!\n"); 510 return 0; 511 } 512 for (;;) { 513 char *reply = external_symbolizer_->SendCommand(is_data, module_name, 514 module_offset); 515 if (reply) 516 return reply; 517 // Try to restart symbolizer subprocess. If we don't succeed, forget 518 // about it and don't try to use it later. 519 if (!external_symbolizer_->Restart()) { 520 ReportExternalSymbolizerError( 521 "WARNING: Failed to use and restart external symbolizer!\n"); 522 external_symbolizer_ = 0; 523 return 0; 524 } 525 } 526 } 527 528 LoadedModule *FindModuleForAddress(uptr address) { 529 bool modules_were_reloaded = false; 530 if (modules_ == 0 || !modules_fresh_) { 531 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( 532 kMaxNumberOfModuleContexts * sizeof(LoadedModule))); 533 CHECK(modules_); 534 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts, 535 /* filter */ 0); 536 // FIXME: Return this check when GetListOfModules is implemented on Mac. 537 // CHECK_GT(n_modules_, 0); 538 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); 539 modules_fresh_ = true; 540 modules_were_reloaded = true; 541 } 542 for (uptr i = 0; i < n_modules_; i++) { 543 if (modules_[i].containsAddress(address)) { 544 return &modules_[i]; 545 } 546 } 547 // Reload the modules and look up again, if we haven't tried it yet. 548 if (!modules_were_reloaded) { 549 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 550 // It's too aggressive to reload the list of modules each time we fail 551 // to find a module for a given address. 552 modules_fresh_ = false; 553 return FindModuleForAddress(address); 554 } 555 return 0; 556 } 557 558 void ReportExternalSymbolizerError(const char *msg) { 559 // Don't use atomics here for now, as SymbolizeCode can't be called 560 // from multiple threads anyway. 561 static bool reported; 562 if (!reported) { 563 Report(msg); 564 reported = true; 565 } 566 } 567 568 // 16K loaded modules should be enough for everyone. 569 static const uptr kMaxNumberOfModuleContexts = 1 << 14; 570 LoadedModule *modules_; // Array of module descriptions is leaked. 571 uptr n_modules_; 572 // If stale, need to reload the modules before looking up addresses. 573 bool modules_fresh_; 574 575 ExternalSymbolizer *external_symbolizer_; // Leaked. 576 InternalSymbolizer *internal_symbolizer_; // Leaked. 577}; 578 579static ALIGNED(64) char symbolizer_placeholder[sizeof(Symbolizer)]; 580static Symbolizer *symbolizer; 581 582SymbolizerInterface *getSymbolizer() { 583 static atomic_uint8_t initialized; 584 static StaticSpinMutex init_mu; 585 if (atomic_load(&initialized, memory_order_acquire) == 0) { 586 SpinMutexLock l(&init_mu); 587 if (atomic_load(&initialized, memory_order_relaxed) == 0) { 588 symbolizer = new(symbolizer_placeholder) Symbolizer(); 589 atomic_store(&initialized, 1, memory_order_release); 590 } 591 } 592 return symbolizer; 593} 594 595} // namespace __sanitizer 596 597#endif // SANITIZER_POSIX 598