sanitizer_symbolizer_libcdep.cc revision 6b30cf07c92eafecf5d06ae6cafa7b60f4ed7512
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is shared between AddressSanitizer and ThreadSanitizer 11// run-time libraries. See sanitizer_symbolizer.h for details. 12//===----------------------------------------------------------------------===// 13 14#include "sanitizer_allocator_internal.h" 15#include "sanitizer_common.h" 16#include "sanitizer_placement_new.h" 17#include "sanitizer_procmaps.h" 18#include "sanitizer_symbolizer.h" 19 20namespace __sanitizer { 21 22void AddressInfo::Clear() { 23 InternalFree(module); 24 InternalFree(function); 25 InternalFree(file); 26 internal_memset(this, 0, sizeof(AddressInfo)); 27} 28 29LoadedModule::LoadedModule(const char *module_name, uptr base_address) { 30 full_name_ = internal_strdup(module_name); 31 base_address_ = base_address; 32 n_ranges_ = 0; 33} 34 35void LoadedModule::addAddressRange(uptr beg, uptr end) { 36 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); 37 ranges_[n_ranges_].beg = beg; 38 ranges_[n_ranges_].end = end; 39 n_ranges_++; 40} 41 42bool LoadedModule::containsAddress(uptr address) const { 43 for (uptr i = 0; i < n_ranges_; i++) { 44 if (ranges_[i].beg <= address && address < ranges_[i].end) 45 return true; 46 } 47 return false; 48} 49 50// Extracts the prefix of "str" that consists of any characters not 51// present in "delims" string, and copies this prefix to "result", allocating 52// space for it. 53// Returns a pointer to "str" after skipping extracted prefix and first 54// delimiter char. 55static const char *ExtractToken(const char *str, const char *delims, 56 char **result) { 57 uptr prefix_len = internal_strcspn(str, delims); 58 *result = (char*)InternalAlloc(prefix_len + 1); 59 internal_memcpy(*result, str, prefix_len); 60 (*result)[prefix_len] = '\0'; 61 const char *prefix_end = str + prefix_len; 62 if (*prefix_end != '\0') prefix_end++; 63 return prefix_end; 64} 65 66// Same as ExtractToken, but converts extracted token to integer. 67static const char *ExtractInt(const char *str, const char *delims, 68 int *result) { 69 char *buff; 70 const char *ret = ExtractToken(str, delims, &buff); 71 if (buff != 0) { 72 *result = (int)internal_atoll(buff); 73 } 74 InternalFree(buff); 75 return ret; 76} 77 78static const char *ExtractUptr(const char *str, const char *delims, 79 uptr *result) { 80 char *buff; 81 const char *ret = ExtractToken(str, delims, &buff); 82 if (buff != 0) { 83 *result = (uptr)internal_atoll(buff); 84 } 85 InternalFree(buff); 86 return ret; 87} 88 89// ExternalSymbolizer encapsulates communication between the tool and 90// external symbolizer program, running in a different subprocess, 91// For now we assume the following protocol: 92// For each request of the form 93// <module_name> <module_offset> 94// passed to STDIN, external symbolizer prints to STDOUT response: 95// <function_name> 96// <file_name>:<line_number>:<column_number> 97// <function_name> 98// <file_name>:<line_number>:<column_number> 99// ... 100// <empty line> 101class ExternalSymbolizer { 102 public: 103 ExternalSymbolizer(const char *path, int input_fd, int output_fd) 104 : path_(path), 105 input_fd_(input_fd), 106 output_fd_(output_fd), 107 times_restarted_(0) { 108 CHECK(path_); 109 CHECK_NE(input_fd_, kInvalidFd); 110 CHECK_NE(output_fd_, kInvalidFd); 111 } 112 113 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 114 CHECK(module_name); 115 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 116 is_data ? "DATA " : "", module_name, module_offset); 117 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) 118 return 0; 119 if (!readFromSymbolizer(buffer_, kBufferSize)) 120 return 0; 121 return buffer_; 122 } 123 124 bool Restart() { 125 if (times_restarted_ >= kMaxTimesRestarted) return false; 126 times_restarted_++; 127 internal_close(input_fd_); 128 internal_close(output_fd_); 129 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); 130 } 131 132 void Flush() { 133 } 134 135 private: 136 bool readFromSymbolizer(char *buffer, uptr max_length) { 137 if (max_length == 0) 138 return true; 139 uptr read_len = 0; 140 while (true) { 141 uptr just_read = internal_read(input_fd_, buffer + read_len, 142 max_length - read_len); 143 // We can't read 0 bytes, as we don't expect external symbolizer to close 144 // its stdout. 145 if (just_read == 0 || just_read == (uptr)-1) { 146 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 147 return false; 148 } 149 read_len += just_read; 150 // Empty line marks the end of symbolizer output. 151 if (read_len >= 2 && buffer[read_len - 1] == '\n' && 152 buffer[read_len - 2] == '\n') { 153 break; 154 } 155 } 156 return true; 157 } 158 159 bool writeToSymbolizer(const char *buffer, uptr length) { 160 if (length == 0) 161 return true; 162 uptr write_len = internal_write(output_fd_, buffer, length); 163 if (write_len == 0 || write_len == (uptr)-1) { 164 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 165 return false; 166 } 167 return true; 168 } 169 170 const char *path_; 171 int input_fd_; 172 int output_fd_; 173 174 static const uptr kBufferSize = 16 * 1024; 175 char buffer_[kBufferSize]; 176 177 static const uptr kMaxTimesRestarted = 5; 178 uptr times_restarted_; 179}; 180 181static LowLevelAllocator symbolizer_allocator; // Linker initialized. 182 183#if SANITIZER_SUPPORTS_WEAK_HOOKS 184extern "C" { 185SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 186bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, 187 char *Buffer, int MaxLength); 188SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 189bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, 190 char *Buffer, int MaxLength); 191SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 192void __sanitizer_symbolize_flush(); 193SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 194void __sanitizer_symbolize_demangle(const char *Name, char *Buffer, 195 int MaxLength); 196} // extern "C" 197 198class InternalSymbolizer { 199 public: 200 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); 201 202 static InternalSymbolizer *get() { 203 if (__sanitizer_symbolize_code != 0 && 204 __sanitizer_symbolize_data != 0) { 205 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer)); 206 return new(mem) InternalSymbolizer(); 207 } 208 return 0; 209 } 210 211 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 212 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data 213 : __sanitizer_symbolize_code; 214 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) 215 return buffer_; 216 return 0; 217 } 218 219 void Flush() { 220 if (__sanitizer_symbolize_flush) 221 __sanitizer_symbolize_flush(); 222 } 223 224 const char *Demangle(const char *name) { 225 if (__sanitizer_symbolize_demangle) { 226 char *res = static_cast<char*>(InternalAlloc(kMaxDemangledNameSize)); 227 internal_memset(res, 0, kMaxDemangledNameSize); 228 __sanitizer_symbolize_demangle(name, res, kMaxDemangledNameSize); 229 return res; 230 } 231 return name; 232 } 233 234 private: 235 InternalSymbolizer() { } 236 237 static const int kBufferSize = 16 * 1024; 238 static const int kMaxDemangledNameSize = 1024; 239 char buffer_[kBufferSize]; 240}; 241#else // SANITIZER_SUPPORTS_WEAK_HOOKS 242 243class InternalSymbolizer { 244 public: 245 static InternalSymbolizer *get() { return 0; } 246 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 247 return 0; 248 } 249 void Flush() { } 250 const char *Demangle(const char *name) { return name; } 251}; 252 253#endif // SANITIZER_SUPPORTS_WEAK_HOOKS 254 255class Symbolizer { 256 // This class has no constructor, as global constructors are forbidden in 257 // sanitizer_common. It should be linker initialized instead. 258 public: 259 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { 260 if (max_frames == 0) 261 return 0; 262 LoadedModule *module = FindModuleForAddress(addr); 263 if (module == 0) 264 return 0; 265 const char *module_name = module->full_name(); 266 uptr module_offset = addr - module->base_address(); 267 const char *str = SendCommand(false, module_name, module_offset); 268 if (str == 0) { 269 // External symbolizer was not initialized or failed. Fill only data 270 // about module name and offset. 271 AddressInfo *info = &frames[0]; 272 info->Clear(); 273 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 274 return 1; 275 } 276 uptr frame_id = 0; 277 for (frame_id = 0; frame_id < max_frames; frame_id++) { 278 AddressInfo *info = &frames[frame_id]; 279 char *function_name = 0; 280 str = ExtractToken(str, "\n", &function_name); 281 CHECK(function_name); 282 if (function_name[0] == '\0') { 283 // There are no more frames. 284 break; 285 } 286 info->Clear(); 287 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 288 info->function = function_name; 289 // Parse <file>:<line>:<column> buffer. 290 char *file_line_info = 0; 291 str = ExtractToken(str, "\n", &file_line_info); 292 CHECK(file_line_info); 293 const char *line_info = ExtractToken(file_line_info, ":", &info->file); 294 line_info = ExtractInt(line_info, ":", &info->line); 295 line_info = ExtractInt(line_info, "", &info->column); 296 InternalFree(file_line_info); 297 298 // Functions and filenames can be "??", in which case we write 0 299 // to address info to mark that names are unknown. 300 if (0 == internal_strcmp(info->function, "??")) { 301 InternalFree(info->function); 302 info->function = 0; 303 } 304 if (0 == internal_strcmp(info->file, "??")) { 305 InternalFree(info->file); 306 info->file = 0; 307 } 308 } 309 if (frame_id == 0) { 310 // Make sure we return at least one frame. 311 AddressInfo *info = &frames[0]; 312 info->Clear(); 313 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 314 frame_id = 1; 315 } 316 return frame_id; 317 } 318 319 bool SymbolizeData(uptr addr, DataInfo *info) { 320 LoadedModule *module = FindModuleForAddress(addr); 321 if (module == 0) 322 return false; 323 const char *module_name = module->full_name(); 324 uptr module_offset = addr - module->base_address(); 325 internal_memset(info, 0, sizeof(*info)); 326 info->address = addr; 327 info->module = internal_strdup(module_name); 328 info->module_offset = module_offset; 329 const char *str = SendCommand(true, module_name, module_offset); 330 if (str == 0) 331 return true; 332 str = ExtractToken(str, "\n", &info->name); 333 str = ExtractUptr(str, " ", &info->start); 334 str = ExtractUptr(str, "\n", &info->size); 335 info->start += module->base_address(); 336 return true; 337 } 338 339 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 340 int input_fd, output_fd; 341 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) 342 return false; 343 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); 344 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, 345 input_fd, output_fd); 346 return true; 347 } 348 349 bool IsSymbolizerAvailable() { 350 if (internal_symbolizer_ == 0) 351 internal_symbolizer_ = InternalSymbolizer::get(); 352 return internal_symbolizer_ || external_symbolizer_; 353 } 354 355 void Flush() { 356 if (internal_symbolizer_) 357 internal_symbolizer_->Flush(); 358 if (external_symbolizer_) 359 external_symbolizer_->Flush(); 360 } 361 362 const char *Demangle(const char *name) { 363 if (IsSymbolizerAvailable() && internal_symbolizer_ != 0) 364 return internal_symbolizer_->Demangle(name); 365 return DemangleCXXABI(name); 366 } 367 368 private: 369 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 370 // First, try to use internal symbolizer. 371 if (!IsSymbolizerAvailable()) { 372 return 0; 373 } 374 if (internal_symbolizer_) { 375 return internal_symbolizer_->SendCommand(is_data, module_name, 376 module_offset); 377 } 378 // Otherwise, fall back to external symbolizer. 379 if (external_symbolizer_ == 0) { 380 ReportExternalSymbolizerError( 381 "WARNING: Trying to symbolize code, but external " 382 "symbolizer is not initialized!\n"); 383 return 0; 384 } 385 for (;;) { 386 char *reply = external_symbolizer_->SendCommand(is_data, module_name, 387 module_offset); 388 if (reply) 389 return reply; 390 // Try to restart symbolizer subprocess. If we don't succeed, forget 391 // about it and don't try to use it later. 392 if (!external_symbolizer_->Restart()) { 393 ReportExternalSymbolizerError( 394 "WARNING: Failed to use and restart external symbolizer!\n"); 395 external_symbolizer_ = 0; 396 return 0; 397 } 398 } 399 } 400 401 LoadedModule *FindModuleForAddress(uptr address) { 402 bool modules_were_reloaded = false; 403 if (modules_ == 0 || !modules_fresh_) { 404 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( 405 kMaxNumberOfModuleContexts * sizeof(LoadedModule))); 406 CHECK(modules_); 407 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts, 408 /* filter */ 0); 409 // FIXME: Return this check when GetListOfModules is implemented on Mac. 410 // CHECK_GT(n_modules_, 0); 411 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); 412 modules_fresh_ = true; 413 modules_were_reloaded = true; 414 } 415 for (uptr i = 0; i < n_modules_; i++) { 416 if (modules_[i].containsAddress(address)) { 417 return &modules_[i]; 418 } 419 } 420 // Reload the modules and look up again, if we haven't tried it yet. 421 if (!modules_were_reloaded) { 422 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 423 // It's too aggressive to reload the list of modules each time we fail 424 // to find a module for a given address. 425 modules_fresh_ = false; 426 return FindModuleForAddress(address); 427 } 428 return 0; 429 } 430 431 void ReportExternalSymbolizerError(const char *msg) { 432 // Don't use atomics here for now, as SymbolizeCode can't be called 433 // from multiple threads anyway. 434 static bool reported; 435 if (!reported) { 436 Report(msg); 437 reported = true; 438 } 439 } 440 441 // 16K loaded modules should be enough for everyone. 442 static const uptr kMaxNumberOfModuleContexts = 1 << 14; 443 LoadedModule *modules_; // Array of module descriptions is leaked. 444 uptr n_modules_; 445 // If stale, need to reload the modules before looking up addresses. 446 bool modules_fresh_; 447 448 ExternalSymbolizer *external_symbolizer_; // Leaked. 449 InternalSymbolizer *internal_symbolizer_; // Leaked. 450}; 451 452static Symbolizer symbolizer; // Linker initialized. 453 454uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { 455 return symbolizer.SymbolizeCode(address, frames, max_frames); 456} 457 458bool SymbolizeData(uptr address, DataInfo *info) { 459 return symbolizer.SymbolizeData(address, info); 460} 461 462bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 463 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); 464} 465 466bool IsSymbolizerAvailable() { 467 return symbolizer.IsSymbolizerAvailable(); 468} 469 470void FlushSymbolizer() { 471 symbolizer.Flush(); 472} 473 474const char *Demangle(const char *name) { 475 return symbolizer.Demangle(name); 476} 477 478} // namespace __sanitizer 479