heap_find.cpp revision 4c5c429b0cd53a4df9f0fab3482e73aa71fc5b91
1//===-- head_find.c ---------------------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file compiles into a dylib and can be used on darwin to find data that
11// is contained in active malloc blocks. To use this make the project, then
12// load the shared library in a debug session while you are stopped:
13//
14// (lldb) process load /path/to/libheap.dylib
15//
16// Now you can use the "find_pointer_in_heap" and "find_cstring_in_heap"
17// functions in the expression parser.
18//
19// This will grep everything in all active allocation blocks and print and
20// malloc blocks that contain the pointer 0x112233000000:
21//
22// (lldb) expression find_pointer_in_heap (0x112233000000)
23//
24// This will grep everything in all active allocation blocks and print and
25// malloc blocks that contain the C string "hello" (as a substring, no
26// NULL termination included):
27//
28// (lldb) expression find_cstring_in_heap ("hello")
29//
30// The results will be printed to the STDOUT of the inferior program. The
31// return value of the "find_pointer_in_heap" function is the number of
32// pointer references that were found. A quick example shows
33//
34// (lldb) expr find_pointer_in_heap(0x0000000104000410)
35// (uint32_t) $5 = 0x00000002
36// 0x104000740: 0x0000000104000410 found in malloc block 0x104000730 + 16 (malloc_size = 48)
37// 0x100820060: 0x0000000104000410 found in malloc block 0x100820000 + 96 (malloc_size = 4096)
38//
39// From the above output we see that 0x104000410 was found in the malloc block
40// at 0x104000730 and 0x100820000. If we want to see what these blocks are, we
41// can display the memory for this block using the "address" ("A" for short)
42// format. The address format shows pointers, and if those pointers point to
43// objects that have symbols or know data contents, it will display information
44// about the pointers:
45//
46// (lldb) memory read --format address --count 1 0x104000730
47// 0x104000730: 0x0000000100002460 (void *)0x0000000100002488: MyString
48//
49// We can see that the first block is a "MyString" object that contains our
50// pointer value at offset 16.
51//
52// Looking at the next pointers, are a bit more tricky:
53// (lldb) memory read -fA 0x100820000 -c1
54// 0x100820000: 0x4f545541a1a1a1a1
55// (lldb) memory read 0x100820000
56// 0x100820000: a1 a1 a1 a1 41 55 54 4f 52 45 4c 45 41 53 45 21  ....AUTORELEASE!
57// 0x100820010: 78 00 82 00 01 00 00 00 60 f9 e8 75 ff 7f 00 00  x.......`..u....
58//
59// This is an objective C auto release pool object that contains our pointer.
60// C++ classes will show up if they are virtual as something like:
61// (lldb) memory read --format address --count 1 0x104008000
62// 0x104008000: 0x109008000 vtable for lldb_private::Process
63//
64// This is a clue that the 0x104008000 is a "lldb_private::Process *".
65//===----------------------------------------------------------------------===//
66
67#include <assert.h>
68#include <ctype.h>
69#include <mach/mach.h>
70#include <malloc/malloc.h>
71#include <stdio.h>
72#include <stdlib.h>
73#include <vector>
74
75//----------------------------------------------------------------------
76// Redefine private types from "/usr/local/include/stack_logging.h"
77//----------------------------------------------------------------------
78typedef struct {
79	uint32_t		type_flags;
80	uint64_t		stack_identifier;
81	uint64_t		argument;
82	mach_vm_address_t	address;
83} mach_stack_logging_record_t;
84
85//----------------------------------------------------------------------
86// Redefine private defines from "/usr/local/include/stack_logging.h"
87//----------------------------------------------------------------------
88#define stack_logging_type_free		0
89#define stack_logging_type_generic	1
90#define stack_logging_type_alloc	2
91#define stack_logging_type_dealloc	4
92
93//----------------------------------------------------------------------
94// Redefine private function prototypes from
95// "/usr/local/include/stack_logging.h"
96//----------------------------------------------------------------------
97extern "C" kern_return_t
98__mach_stack_logging_set_file_path (
99    task_t task,
100    char* file_path
101);
102
103extern "C" kern_return_t
104__mach_stack_logging_get_frames (
105    task_t task,
106    mach_vm_address_t address,
107    mach_vm_address_t *stack_frames_buffer,
108    uint32_t max_stack_frames,
109    uint32_t *count
110);
111
112extern "C" kern_return_t
113__mach_stack_logging_enumerate_records (
114    task_t task,
115    mach_vm_address_t address,
116    void enumerator(mach_stack_logging_record_t, void *),
117    void *context
118);
119
120extern "C" kern_return_t
121__mach_stack_logging_frames_for_uniqued_stack (
122    task_t task,
123    uint64_t stack_identifier,
124    mach_vm_address_t *stack_frames_buffer,
125    uint32_t max_stack_frames,
126    uint32_t *count
127);
128
129//----------------------------------------------------------------------
130// Redefine private gloval variables prototypes from
131// "/usr/local/include/stack_logging.h"
132//----------------------------------------------------------------------
133
134extern "C" int stack_logging_enable_logging;
135extern "C" int stack_logging_dontcompact;
136
137//----------------------------------------------------------------------
138// Local defines
139//----------------------------------------------------------------------
140#define MAX_FRAMES 1024
141
142//----------------------------------------------------------------------
143// Local Typedefs and Types
144//----------------------------------------------------------------------
145typedef void range_callback_t (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size);
146typedef void zone_callback_t (void *info, const malloc_zone_t *zone);
147
148struct range_callback_info_t
149{
150    zone_callback_t *zone_callback;
151    range_callback_t *range_callback;
152    void *baton;
153};
154
155enum data_type_t
156{
157    eDataTypeAddress,
158    eDataTypeContainsData
159};
160
161struct aligned_data_t
162{
163    const uint8_t *buffer;
164    uint32_t size;
165    uint32_t align;
166};
167
168struct range_contains_data_callback_info_t
169{
170    data_type_t type;
171    const void *lookup_addr;
172    union
173    {
174        uintptr_t addr;
175        aligned_data_t data;
176    };
177    uint32_t match_count;
178    bool done;
179};
180
181struct malloc_match
182{
183    void *addr;
184    intptr_t size;
185    intptr_t offset;
186};
187
188struct malloc_stack_entry
189{
190    const void *address;
191    uint64_t argument;
192    uint32_t type_flags;
193    std::vector<uintptr_t> frames;
194};
195
196//----------------------------------------------------------------------
197// Local global variables
198//----------------------------------------------------------------------
199std::vector<malloc_match> g_matches;
200const void *g_lookup_addr = 0;
201std::vector<malloc_stack_entry> g_malloc_stack_history;
202mach_vm_address_t g_stack_frames[MAX_FRAMES];
203char g_error_string[PATH_MAX];
204
205//----------------------------------------------------------------------
206// task_peek
207//
208// Reads memory from this tasks address space. This callback is needed
209// by the code that iterates through all of the malloc blocks to read
210// the memory in this process.
211//----------------------------------------------------------------------
212static kern_return_t
213task_peek (task_t task, vm_address_t remote_address, vm_size_t size, void **local_memory)
214{
215    *local_memory = (void*) remote_address;
216    return KERN_SUCCESS;
217}
218
219
220static const void
221foreach_zone_in_this_process (range_callback_info_t *info)
222{
223    if (info == NULL || info->zone_callback == NULL)
224        return;
225
226    vm_address_t *zones = NULL;
227    unsigned int num_zones = 0;
228
229    kern_return_t err = malloc_get_all_zones (0, task_peek, &zones, &num_zones);
230    if (KERN_SUCCESS == err)
231    {
232        for (unsigned int i=0; i<num_zones; ++i)
233        {
234            info->zone_callback (info, (const malloc_zone_t *)zones[i]);
235        }
236    }
237}
238
239//----------------------------------------------------------------------
240// dump_malloc_block_callback
241//
242// A simple callback that will dump each malloc block and all available
243// info from the enumeration callback perpective.
244//----------------------------------------------------------------------
245static void
246dump_malloc_block_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
247{
248    printf ("task = 0x%4.4x: baton = %p, type = %u, ptr_addr = 0x%llx + 0x%llu\n", task, baton, type, ptr_addr, ptr_size);
249}
250
251static void
252ranges_callback (task_t task, void *baton, unsigned type, vm_range_t *ptrs, unsigned count)
253{
254    range_callback_info_t *info = (range_callback_info_t *)baton;
255    while(count--) {
256        info->range_callback (task, info->baton, type, ptrs->address, ptrs->size);
257        ptrs++;
258    }
259}
260
261static void
262enumerate_range_in_zone (void *baton, const malloc_zone_t *zone)
263{
264    range_callback_info_t *info = (range_callback_info_t *)baton;
265
266    if (zone && zone->introspect)
267        zone->introspect->enumerator (mach_task_self(),
268                                      info,
269                                      MALLOC_PTR_IN_USE_RANGE_TYPE,
270                                      (vm_address_t)zone,
271                                      task_peek,
272                                      ranges_callback);
273}
274
275static void
276range_info_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
277{
278    const uint64_t end_addr = ptr_addr + ptr_size;
279
280    range_contains_data_callback_info_t *info = (range_contains_data_callback_info_t *)baton;
281    switch (info->type)
282    {
283    case eDataTypeAddress:
284        if (ptr_addr <= info->addr && info->addr < end_addr)
285        {
286            ++info->match_count;
287            malloc_match match = { (void *)ptr_addr, ptr_size, info->addr - ptr_addr };
288            g_matches.push_back(match);
289        }
290        break;
291
292    case eDataTypeContainsData:
293        {
294            const uint32_t size = info->data.size;
295            if (size < ptr_size) // Make sure this block can contain this data
296            {
297                uint8_t *ptr_data = NULL;
298                if (task_peek (task, ptr_addr, ptr_size, (void **)&ptr_data) == KERN_SUCCESS)
299                {
300                    const void *buffer = info->data.buffer;
301                    assert (ptr_data);
302                    const uint32_t align = info->data.align;
303                    for (uint64_t addr = ptr_addr;
304                         addr < end_addr && ((end_addr - addr) >= size);
305                         addr += align, ptr_data += align)
306                    {
307                        if (memcmp (buffer, ptr_data, size) == 0)
308                        {
309                            ++info->match_count;
310                            malloc_match match = { (void *)ptr_addr, ptr_size, addr - ptr_addr };
311                            g_matches.push_back(match);
312                        }
313                    }
314                }
315                else
316                {
317                    printf ("0x%llx: error: couldn't read %llu bytes\n", ptr_addr, ptr_size);
318                }
319            }
320        }
321        break;
322    }
323}
324
325static void
326get_stack_for_address_enumerator(mach_stack_logging_record_t stack_record, void *task_ptr)
327{
328    uint32_t num_frames = 0;
329    kern_return_t err = __mach_stack_logging_frames_for_uniqued_stack (*(task_t *)task_ptr,
330                                                                       stack_record.stack_identifier,
331                                                                       g_stack_frames,
332                                                                       MAX_FRAMES,
333                                                                       &num_frames);
334    g_malloc_stack_history.resize(g_malloc_stack_history.size() + 1);
335    g_malloc_stack_history.back().address = (void *)stack_record.address;
336    g_malloc_stack_history.back().type_flags = stack_record.type_flags;
337    g_malloc_stack_history.back().argument = stack_record.argument;
338    if (num_frames > 0)
339        g_malloc_stack_history.back().frames.assign(g_stack_frames, g_stack_frames + num_frames);
340    g_malloc_stack_history.back().frames.push_back(0); // Terminate the frames with zero
341}
342
343malloc_stack_entry *
344get_stack_history_for_address (const void * addr, int history)
345{
346    std::vector<malloc_stack_entry> empty;
347    g_malloc_stack_history.swap(empty);
348    if (!stack_logging_enable_logging || (history && !stack_logging_dontcompact))
349    {
350        if (history)
351            strncpy(g_error_string, "error: stack history logging is not enabled, set MallocStackLoggingNoCompact=1 in the environment when launching to enable stack history logging.", sizeof(g_error_string));
352        else
353            strncpy(g_error_string, "error: stack logging is not enabled, set MallocStackLogging=1 in the environment when launching to enable stack logging.", sizeof(g_error_string));
354        return NULL;
355    }
356    kern_return_t err;
357    task_t task = mach_task_self();
358    if (history)
359    {
360        err = __mach_stack_logging_enumerate_records (task,
361                                                      (mach_vm_address_t)addr,
362                                                      get_stack_for_address_enumerator,
363                                                      &task);
364    }
365    else
366    {
367        uint32_t num_frames = 0;
368        err = __mach_stack_logging_get_frames(task, (mach_vm_address_t)addr, g_stack_frames, MAX_FRAMES, &num_frames);
369        if (err == 0 && num_frames > 0)
370        {
371            g_malloc_stack_history.resize(1);
372            g_malloc_stack_history.back().address = addr;
373            g_malloc_stack_history.back().type_flags = stack_logging_type_alloc;
374            g_malloc_stack_history.back().argument = 0;
375            if (num_frames > 0)
376                g_malloc_stack_history.back().frames.assign(g_stack_frames, g_stack_frames + num_frames);
377            g_malloc_stack_history.back().frames.push_back(0); // Terminate the frames with zero
378        }
379    }
380    // Append an empty entry
381    if (g_malloc_stack_history.empty())
382        return NULL;
383    g_malloc_stack_history.resize(g_malloc_stack_history.size() + 1);
384    g_malloc_stack_history.back().address = 0;
385    g_malloc_stack_history.back().type_flags = 0;
386    g_malloc_stack_history.back().argument = 0;
387    return g_malloc_stack_history.data();
388}
389
390//----------------------------------------------------------------------
391// find_pointer_in_heap
392//
393// Finds a pointer value inside one or more currently valid malloc
394// blocks.
395//----------------------------------------------------------------------
396malloc_match *
397find_pointer_in_heap (const void * addr)
398{
399    g_matches.clear();
400    // Setup "info" to look for a malloc block that contains data
401    // that is the a pointer
402    range_contains_data_callback_info_t data_info;
403    data_info.type = eDataTypeContainsData;      // Check each block for data
404    g_lookup_addr = addr;
405    data_info.data.buffer = (uint8_t *)&addr;    // What data? The pointer value passed in
406    data_info.data.size = sizeof(addr);          // How many bytes? The byte size of a pointer
407    data_info.data.align = sizeof(addr);         // Align to a pointer byte size
408    data_info.match_count = 0;                   // Initialize the match count to zero
409    data_info.done = false;                      // Set done to false so searching doesn't stop
410    range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
411    foreach_zone_in_this_process (&info);
412    if (g_matches.empty())
413        return NULL;
414    malloc_match match = { NULL, 0, 0 };
415    g_matches.push_back(match);
416    return g_matches.data();
417}
418
419//----------------------------------------------------------------------
420// find_pointer_in_memory
421//
422// Finds a pointer value inside one or more currently valid malloc
423// blocks.
424//----------------------------------------------------------------------
425malloc_match *
426find_pointer_in_memory (uint64_t memory_addr, uint64_t memory_size, const void * addr)
427{
428    g_matches.clear();
429    // Setup "info" to look for a malloc block that contains data
430    // that is the a pointer
431    range_contains_data_callback_info_t data_info;
432    data_info.type = eDataTypeContainsData;      // Check each block for data
433    g_lookup_addr = addr;
434    data_info.data.buffer = (uint8_t *)&addr;    // What data? The pointer value passed in
435    data_info.data.size = sizeof(addr);          // How many bytes? The byte size of a pointer
436    data_info.data.align = sizeof(addr);         // Align to a pointer byte size
437    data_info.match_count = 0;                   // Initialize the match count to zero
438    data_info.done = false;                      // Set done to false so searching doesn't stop
439    range_info_callback (mach_task_self(), &data_info, stack_logging_type_generic, memory_addr, memory_size);
440    if (g_matches.empty())
441        return NULL;
442    malloc_match match = { NULL, 0, 0 };
443    g_matches.push_back(match);
444    return g_matches.data();
445}
446
447//----------------------------------------------------------------------
448// find_cstring_in_heap
449//
450// Finds a C string inside one or more currently valid malloc blocks.
451//----------------------------------------------------------------------
452malloc_match *
453find_cstring_in_heap (const char *s)
454{
455    g_matches.clear();
456    if (s == NULL || s[0] == '\0')
457    {
458        printf ("error: invalid argument (empty cstring)\n");
459        return NULL;
460    }
461    // Setup "info" to look for a malloc block that contains data
462    // that is the C string passed in aligned on a 1 byte boundary
463    range_contains_data_callback_info_t data_info;
464    data_info.type = eDataTypeContainsData;  // Check each block for data
465    g_lookup_addr = s;               // If an expression was used, then fill in the resolved address we are looking up
466    data_info.data.buffer = (uint8_t *)s;    // What data? The C string passed in
467    data_info.data.size = strlen(s);         // How many bytes? The length of the C string
468    data_info.data.align = 1;                // Data doesn't need to be aligned, so set the alignment to 1
469    data_info.match_count = 0;               // Initialize the match count to zero
470    data_info.done = false;                  // Set done to false so searching doesn't stop
471    range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
472    foreach_zone_in_this_process (&info);
473    if (g_matches.empty())
474        return NULL;
475    malloc_match match = { NULL, 0, 0 };
476    g_matches.push_back(match);
477    return g_matches.data();
478}
479
480//----------------------------------------------------------------------
481// find_block_for_address
482//
483// Find the malloc block that whose address range contains "addr".
484//----------------------------------------------------------------------
485malloc_match *
486find_block_for_address (const void *addr)
487{
488    g_matches.clear();
489    // Setup "info" to look for a malloc block that contains data
490    // that is the C string passed in aligned on a 1 byte boundary
491    range_contains_data_callback_info_t data_info;
492    g_lookup_addr = addr;               // If an expression was used, then fill in the resolved address we are looking up
493    data_info.type = eDataTypeAddress;  // Check each block to see if the block contains the address passed in
494    data_info.addr = (uintptr_t)addr;   // What data? The C string passed in
495    data_info.match_count = 0;          // Initialize the match count to zero
496    data_info.done = false;             // Set done to false so searching doesn't stop
497    range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
498    foreach_zone_in_this_process (&info);
499    if (g_matches.empty())
500        return NULL;
501    malloc_match match = { NULL, 0, 0 };
502    g_matches.push_back(match);
503    return g_matches.data();
504}
505