heap_find.cpp revision 4c5c429b0cd53a4df9f0fab3482e73aa71fc5b91
1//===-- head_find.c ---------------------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file compiles into a dylib and can be used on darwin to find data that 11// is contained in active malloc blocks. To use this make the project, then 12// load the shared library in a debug session while you are stopped: 13// 14// (lldb) process load /path/to/libheap.dylib 15// 16// Now you can use the "find_pointer_in_heap" and "find_cstring_in_heap" 17// functions in the expression parser. 18// 19// This will grep everything in all active allocation blocks and print and 20// malloc blocks that contain the pointer 0x112233000000: 21// 22// (lldb) expression find_pointer_in_heap (0x112233000000) 23// 24// This will grep everything in all active allocation blocks and print and 25// malloc blocks that contain the C string "hello" (as a substring, no 26// NULL termination included): 27// 28// (lldb) expression find_cstring_in_heap ("hello") 29// 30// The results will be printed to the STDOUT of the inferior program. The 31// return value of the "find_pointer_in_heap" function is the number of 32// pointer references that were found. A quick example shows 33// 34// (lldb) expr find_pointer_in_heap(0x0000000104000410) 35// (uint32_t) $5 = 0x00000002 36// 0x104000740: 0x0000000104000410 found in malloc block 0x104000730 + 16 (malloc_size = 48) 37// 0x100820060: 0x0000000104000410 found in malloc block 0x100820000 + 96 (malloc_size = 4096) 38// 39// From the above output we see that 0x104000410 was found in the malloc block 40// at 0x104000730 and 0x100820000. If we want to see what these blocks are, we 41// can display the memory for this block using the "address" ("A" for short) 42// format. The address format shows pointers, and if those pointers point to 43// objects that have symbols or know data contents, it will display information 44// about the pointers: 45// 46// (lldb) memory read --format address --count 1 0x104000730 47// 0x104000730: 0x0000000100002460 (void *)0x0000000100002488: MyString 48// 49// We can see that the first block is a "MyString" object that contains our 50// pointer value at offset 16. 51// 52// Looking at the next pointers, are a bit more tricky: 53// (lldb) memory read -fA 0x100820000 -c1 54// 0x100820000: 0x4f545541a1a1a1a1 55// (lldb) memory read 0x100820000 56// 0x100820000: a1 a1 a1 a1 41 55 54 4f 52 45 4c 45 41 53 45 21 ....AUTORELEASE! 57// 0x100820010: 78 00 82 00 01 00 00 00 60 f9 e8 75 ff 7f 00 00 x.......`..u.... 58// 59// This is an objective C auto release pool object that contains our pointer. 60// C++ classes will show up if they are virtual as something like: 61// (lldb) memory read --format address --count 1 0x104008000 62// 0x104008000: 0x109008000 vtable for lldb_private::Process 63// 64// This is a clue that the 0x104008000 is a "lldb_private::Process *". 65//===----------------------------------------------------------------------===// 66 67#include <assert.h> 68#include <ctype.h> 69#include <mach/mach.h> 70#include <malloc/malloc.h> 71#include <stdio.h> 72#include <stdlib.h> 73#include <vector> 74 75//---------------------------------------------------------------------- 76// Redefine private types from "/usr/local/include/stack_logging.h" 77//---------------------------------------------------------------------- 78typedef struct { 79 uint32_t type_flags; 80 uint64_t stack_identifier; 81 uint64_t argument; 82 mach_vm_address_t address; 83} mach_stack_logging_record_t; 84 85//---------------------------------------------------------------------- 86// Redefine private defines from "/usr/local/include/stack_logging.h" 87//---------------------------------------------------------------------- 88#define stack_logging_type_free 0 89#define stack_logging_type_generic 1 90#define stack_logging_type_alloc 2 91#define stack_logging_type_dealloc 4 92 93//---------------------------------------------------------------------- 94// Redefine private function prototypes from 95// "/usr/local/include/stack_logging.h" 96//---------------------------------------------------------------------- 97extern "C" kern_return_t 98__mach_stack_logging_set_file_path ( 99 task_t task, 100 char* file_path 101); 102 103extern "C" kern_return_t 104__mach_stack_logging_get_frames ( 105 task_t task, 106 mach_vm_address_t address, 107 mach_vm_address_t *stack_frames_buffer, 108 uint32_t max_stack_frames, 109 uint32_t *count 110); 111 112extern "C" kern_return_t 113__mach_stack_logging_enumerate_records ( 114 task_t task, 115 mach_vm_address_t address, 116 void enumerator(mach_stack_logging_record_t, void *), 117 void *context 118); 119 120extern "C" kern_return_t 121__mach_stack_logging_frames_for_uniqued_stack ( 122 task_t task, 123 uint64_t stack_identifier, 124 mach_vm_address_t *stack_frames_buffer, 125 uint32_t max_stack_frames, 126 uint32_t *count 127); 128 129//---------------------------------------------------------------------- 130// Redefine private gloval variables prototypes from 131// "/usr/local/include/stack_logging.h" 132//---------------------------------------------------------------------- 133 134extern "C" int stack_logging_enable_logging; 135extern "C" int stack_logging_dontcompact; 136 137//---------------------------------------------------------------------- 138// Local defines 139//---------------------------------------------------------------------- 140#define MAX_FRAMES 1024 141 142//---------------------------------------------------------------------- 143// Local Typedefs and Types 144//---------------------------------------------------------------------- 145typedef void range_callback_t (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size); 146typedef void zone_callback_t (void *info, const malloc_zone_t *zone); 147 148struct range_callback_info_t 149{ 150 zone_callback_t *zone_callback; 151 range_callback_t *range_callback; 152 void *baton; 153}; 154 155enum data_type_t 156{ 157 eDataTypeAddress, 158 eDataTypeContainsData 159}; 160 161struct aligned_data_t 162{ 163 const uint8_t *buffer; 164 uint32_t size; 165 uint32_t align; 166}; 167 168struct range_contains_data_callback_info_t 169{ 170 data_type_t type; 171 const void *lookup_addr; 172 union 173 { 174 uintptr_t addr; 175 aligned_data_t data; 176 }; 177 uint32_t match_count; 178 bool done; 179}; 180 181struct malloc_match 182{ 183 void *addr; 184 intptr_t size; 185 intptr_t offset; 186}; 187 188struct malloc_stack_entry 189{ 190 const void *address; 191 uint64_t argument; 192 uint32_t type_flags; 193 std::vector<uintptr_t> frames; 194}; 195 196//---------------------------------------------------------------------- 197// Local global variables 198//---------------------------------------------------------------------- 199std::vector<malloc_match> g_matches; 200const void *g_lookup_addr = 0; 201std::vector<malloc_stack_entry> g_malloc_stack_history; 202mach_vm_address_t g_stack_frames[MAX_FRAMES]; 203char g_error_string[PATH_MAX]; 204 205//---------------------------------------------------------------------- 206// task_peek 207// 208// Reads memory from this tasks address space. This callback is needed 209// by the code that iterates through all of the malloc blocks to read 210// the memory in this process. 211//---------------------------------------------------------------------- 212static kern_return_t 213task_peek (task_t task, vm_address_t remote_address, vm_size_t size, void **local_memory) 214{ 215 *local_memory = (void*) remote_address; 216 return KERN_SUCCESS; 217} 218 219 220static const void 221foreach_zone_in_this_process (range_callback_info_t *info) 222{ 223 if (info == NULL || info->zone_callback == NULL) 224 return; 225 226 vm_address_t *zones = NULL; 227 unsigned int num_zones = 0; 228 229 kern_return_t err = malloc_get_all_zones (0, task_peek, &zones, &num_zones); 230 if (KERN_SUCCESS == err) 231 { 232 for (unsigned int i=0; i<num_zones; ++i) 233 { 234 info->zone_callback (info, (const malloc_zone_t *)zones[i]); 235 } 236 } 237} 238 239//---------------------------------------------------------------------- 240// dump_malloc_block_callback 241// 242// A simple callback that will dump each malloc block and all available 243// info from the enumeration callback perpective. 244//---------------------------------------------------------------------- 245static void 246dump_malloc_block_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size) 247{ 248 printf ("task = 0x%4.4x: baton = %p, type = %u, ptr_addr = 0x%llx + 0x%llu\n", task, baton, type, ptr_addr, ptr_size); 249} 250 251static void 252ranges_callback (task_t task, void *baton, unsigned type, vm_range_t *ptrs, unsigned count) 253{ 254 range_callback_info_t *info = (range_callback_info_t *)baton; 255 while(count--) { 256 info->range_callback (task, info->baton, type, ptrs->address, ptrs->size); 257 ptrs++; 258 } 259} 260 261static void 262enumerate_range_in_zone (void *baton, const malloc_zone_t *zone) 263{ 264 range_callback_info_t *info = (range_callback_info_t *)baton; 265 266 if (zone && zone->introspect) 267 zone->introspect->enumerator (mach_task_self(), 268 info, 269 MALLOC_PTR_IN_USE_RANGE_TYPE, 270 (vm_address_t)zone, 271 task_peek, 272 ranges_callback); 273} 274 275static void 276range_info_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size) 277{ 278 const uint64_t end_addr = ptr_addr + ptr_size; 279 280 range_contains_data_callback_info_t *info = (range_contains_data_callback_info_t *)baton; 281 switch (info->type) 282 { 283 case eDataTypeAddress: 284 if (ptr_addr <= info->addr && info->addr < end_addr) 285 { 286 ++info->match_count; 287 malloc_match match = { (void *)ptr_addr, ptr_size, info->addr - ptr_addr }; 288 g_matches.push_back(match); 289 } 290 break; 291 292 case eDataTypeContainsData: 293 { 294 const uint32_t size = info->data.size; 295 if (size < ptr_size) // Make sure this block can contain this data 296 { 297 uint8_t *ptr_data = NULL; 298 if (task_peek (task, ptr_addr, ptr_size, (void **)&ptr_data) == KERN_SUCCESS) 299 { 300 const void *buffer = info->data.buffer; 301 assert (ptr_data); 302 const uint32_t align = info->data.align; 303 for (uint64_t addr = ptr_addr; 304 addr < end_addr && ((end_addr - addr) >= size); 305 addr += align, ptr_data += align) 306 { 307 if (memcmp (buffer, ptr_data, size) == 0) 308 { 309 ++info->match_count; 310 malloc_match match = { (void *)ptr_addr, ptr_size, addr - ptr_addr }; 311 g_matches.push_back(match); 312 } 313 } 314 } 315 else 316 { 317 printf ("0x%llx: error: couldn't read %llu bytes\n", ptr_addr, ptr_size); 318 } 319 } 320 } 321 break; 322 } 323} 324 325static void 326get_stack_for_address_enumerator(mach_stack_logging_record_t stack_record, void *task_ptr) 327{ 328 uint32_t num_frames = 0; 329 kern_return_t err = __mach_stack_logging_frames_for_uniqued_stack (*(task_t *)task_ptr, 330 stack_record.stack_identifier, 331 g_stack_frames, 332 MAX_FRAMES, 333 &num_frames); 334 g_malloc_stack_history.resize(g_malloc_stack_history.size() + 1); 335 g_malloc_stack_history.back().address = (void *)stack_record.address; 336 g_malloc_stack_history.back().type_flags = stack_record.type_flags; 337 g_malloc_stack_history.back().argument = stack_record.argument; 338 if (num_frames > 0) 339 g_malloc_stack_history.back().frames.assign(g_stack_frames, g_stack_frames + num_frames); 340 g_malloc_stack_history.back().frames.push_back(0); // Terminate the frames with zero 341} 342 343malloc_stack_entry * 344get_stack_history_for_address (const void * addr, int history) 345{ 346 std::vector<malloc_stack_entry> empty; 347 g_malloc_stack_history.swap(empty); 348 if (!stack_logging_enable_logging || (history && !stack_logging_dontcompact)) 349 { 350 if (history) 351 strncpy(g_error_string, "error: stack history logging is not enabled, set MallocStackLoggingNoCompact=1 in the environment when launching to enable stack history logging.", sizeof(g_error_string)); 352 else 353 strncpy(g_error_string, "error: stack logging is not enabled, set MallocStackLogging=1 in the environment when launching to enable stack logging.", sizeof(g_error_string)); 354 return NULL; 355 } 356 kern_return_t err; 357 task_t task = mach_task_self(); 358 if (history) 359 { 360 err = __mach_stack_logging_enumerate_records (task, 361 (mach_vm_address_t)addr, 362 get_stack_for_address_enumerator, 363 &task); 364 } 365 else 366 { 367 uint32_t num_frames = 0; 368 err = __mach_stack_logging_get_frames(task, (mach_vm_address_t)addr, g_stack_frames, MAX_FRAMES, &num_frames); 369 if (err == 0 && num_frames > 0) 370 { 371 g_malloc_stack_history.resize(1); 372 g_malloc_stack_history.back().address = addr; 373 g_malloc_stack_history.back().type_flags = stack_logging_type_alloc; 374 g_malloc_stack_history.back().argument = 0; 375 if (num_frames > 0) 376 g_malloc_stack_history.back().frames.assign(g_stack_frames, g_stack_frames + num_frames); 377 g_malloc_stack_history.back().frames.push_back(0); // Terminate the frames with zero 378 } 379 } 380 // Append an empty entry 381 if (g_malloc_stack_history.empty()) 382 return NULL; 383 g_malloc_stack_history.resize(g_malloc_stack_history.size() + 1); 384 g_malloc_stack_history.back().address = 0; 385 g_malloc_stack_history.back().type_flags = 0; 386 g_malloc_stack_history.back().argument = 0; 387 return g_malloc_stack_history.data(); 388} 389 390//---------------------------------------------------------------------- 391// find_pointer_in_heap 392// 393// Finds a pointer value inside one or more currently valid malloc 394// blocks. 395//---------------------------------------------------------------------- 396malloc_match * 397find_pointer_in_heap (const void * addr) 398{ 399 g_matches.clear(); 400 // Setup "info" to look for a malloc block that contains data 401 // that is the a pointer 402 range_contains_data_callback_info_t data_info; 403 data_info.type = eDataTypeContainsData; // Check each block for data 404 g_lookup_addr = addr; 405 data_info.data.buffer = (uint8_t *)&addr; // What data? The pointer value passed in 406 data_info.data.size = sizeof(addr); // How many bytes? The byte size of a pointer 407 data_info.data.align = sizeof(addr); // Align to a pointer byte size 408 data_info.match_count = 0; // Initialize the match count to zero 409 data_info.done = false; // Set done to false so searching doesn't stop 410 range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info }; 411 foreach_zone_in_this_process (&info); 412 if (g_matches.empty()) 413 return NULL; 414 malloc_match match = { NULL, 0, 0 }; 415 g_matches.push_back(match); 416 return g_matches.data(); 417} 418 419//---------------------------------------------------------------------- 420// find_pointer_in_memory 421// 422// Finds a pointer value inside one or more currently valid malloc 423// blocks. 424//---------------------------------------------------------------------- 425malloc_match * 426find_pointer_in_memory (uint64_t memory_addr, uint64_t memory_size, const void * addr) 427{ 428 g_matches.clear(); 429 // Setup "info" to look for a malloc block that contains data 430 // that is the a pointer 431 range_contains_data_callback_info_t data_info; 432 data_info.type = eDataTypeContainsData; // Check each block for data 433 g_lookup_addr = addr; 434 data_info.data.buffer = (uint8_t *)&addr; // What data? The pointer value passed in 435 data_info.data.size = sizeof(addr); // How many bytes? The byte size of a pointer 436 data_info.data.align = sizeof(addr); // Align to a pointer byte size 437 data_info.match_count = 0; // Initialize the match count to zero 438 data_info.done = false; // Set done to false so searching doesn't stop 439 range_info_callback (mach_task_self(), &data_info, stack_logging_type_generic, memory_addr, memory_size); 440 if (g_matches.empty()) 441 return NULL; 442 malloc_match match = { NULL, 0, 0 }; 443 g_matches.push_back(match); 444 return g_matches.data(); 445} 446 447//---------------------------------------------------------------------- 448// find_cstring_in_heap 449// 450// Finds a C string inside one or more currently valid malloc blocks. 451//---------------------------------------------------------------------- 452malloc_match * 453find_cstring_in_heap (const char *s) 454{ 455 g_matches.clear(); 456 if (s == NULL || s[0] == '\0') 457 { 458 printf ("error: invalid argument (empty cstring)\n"); 459 return NULL; 460 } 461 // Setup "info" to look for a malloc block that contains data 462 // that is the C string passed in aligned on a 1 byte boundary 463 range_contains_data_callback_info_t data_info; 464 data_info.type = eDataTypeContainsData; // Check each block for data 465 g_lookup_addr = s; // If an expression was used, then fill in the resolved address we are looking up 466 data_info.data.buffer = (uint8_t *)s; // What data? The C string passed in 467 data_info.data.size = strlen(s); // How many bytes? The length of the C string 468 data_info.data.align = 1; // Data doesn't need to be aligned, so set the alignment to 1 469 data_info.match_count = 0; // Initialize the match count to zero 470 data_info.done = false; // Set done to false so searching doesn't stop 471 range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info }; 472 foreach_zone_in_this_process (&info); 473 if (g_matches.empty()) 474 return NULL; 475 malloc_match match = { NULL, 0, 0 }; 476 g_matches.push_back(match); 477 return g_matches.data(); 478} 479 480//---------------------------------------------------------------------- 481// find_block_for_address 482// 483// Find the malloc block that whose address range contains "addr". 484//---------------------------------------------------------------------- 485malloc_match * 486find_block_for_address (const void *addr) 487{ 488 g_matches.clear(); 489 // Setup "info" to look for a malloc block that contains data 490 // that is the C string passed in aligned on a 1 byte boundary 491 range_contains_data_callback_info_t data_info; 492 g_lookup_addr = addr; // If an expression was used, then fill in the resolved address we are looking up 493 data_info.type = eDataTypeAddress; // Check each block to see if the block contains the address passed in 494 data_info.addr = (uintptr_t)addr; // What data? The C string passed in 495 data_info.match_count = 0; // Initialize the match count to zero 496 data_info.done = false; // Set done to false so searching doesn't stop 497 range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info }; 498 foreach_zone_in_this_process (&info); 499 if (g_matches.empty()) 500 return NULL; 501 malloc_match match = { NULL, 0, 0 }; 502 g_matches.push_back(match); 503 return g_matches.data(); 504} 505