dynamic_images.cc revision 32441cc0608ddaf81885d23acf63f4b53cb73744
1// Copyright (c) 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30extern "C" { // needed to compile on Leopard
31  #include <mach-o/nlist.h>
32  #include <stdlib.h>
33  #include <stdio.h>
34}
35
36#include "breakpad_nlist_64.h"
37#include <dlfcn.h>
38#include <mach/mach_vm.h>
39#include <algorithm>
40#include "client/mac/handler/dynamic_images.h"
41
42namespace google_breakpad {
43
44//==============================================================================
45// Returns the size of the memory region containing |address| and the
46// number of bytes from |address| to the end of the region.
47// We potentially, will extend the size of the original
48// region by the size of the following region if it's contiguous with the
49// first in order to handle cases when we're reading strings and they
50// straddle two vm regions.
51//
52static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
53                                          const void* address,
54                                          mach_vm_size_t *size_to_end) {
55  mach_vm_address_t region_base = (mach_vm_address_t)address;
56  mach_vm_size_t region_size;
57  natural_t nesting_level = 0;
58  vm_region_submap_info_64 submap_info;
59  mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
60
61  // Get information about the vm region containing |address|
62  vm_region_recurse_info_t region_info;
63  region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
64
65  kern_return_t result =
66    mach_vm_region_recurse(target_task,
67                           &region_base,
68                           &region_size,
69                           &nesting_level,
70                           region_info,
71                           &info_count);
72
73  if (result == KERN_SUCCESS) {
74    // Get distance from |address| to the end of this region
75    *size_to_end = region_base + region_size -(mach_vm_address_t)address;
76
77    // If we want to handle strings as long as 4096 characters we may need
78    // to check if there's a vm region immediately following the first one.
79    // If so, we need to extend |*size_to_end| to go all the way to the end
80    // of the second region.
81    if (*size_to_end < 4096) {
82      // Second region starts where the first one ends
83      mach_vm_address_t region_base2 =
84        (mach_vm_address_t)(region_base + region_size);
85      mach_vm_size_t region_size2;
86
87      // Get information about the following vm region
88      result =
89        mach_vm_region_recurse(target_task,
90                               &region_base2,
91                               &region_size2,
92                               &nesting_level,
93                               region_info,
94                               &info_count);
95
96      // Extend region_size to go all the way to the end of the 2nd region
97      if (result == KERN_SUCCESS
98          && region_base2 == region_base + region_size) {
99        region_size += region_size2;
100      }
101    }
102
103    *size_to_end = region_base + region_size -(mach_vm_address_t)address;
104  } else {
105    region_size = 0;
106    *size_to_end = 0;
107  }
108
109  return region_size;
110}
111
112#define kMaxStringLength 8192
113//==============================================================================
114// Reads a NULL-terminated string from another task.
115//
116// Warning!  This will not read any strings longer than kMaxStringLength-1
117//
118static void* ReadTaskString(task_port_t target_task,
119                            const void* address) {
120  // The problem is we don't know how much to read until we know how long
121  // the string is. And we don't know how long the string is, until we've read
122  // the memory!  So, we'll try to read kMaxStringLength bytes
123  // (or as many bytes as we can until we reach the end of the vm region).
124  mach_vm_size_t size_to_end;
125  GetMemoryRegionSize(target_task, address, &size_to_end);
126
127  if (size_to_end > 0) {
128    mach_vm_size_t size_to_read =
129      size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
130
131    kern_return_t kr;
132    return ReadTaskMemory(target_task, address, size_to_read, &kr);
133  }
134
135  return NULL;
136}
137
138//==============================================================================
139// Reads an address range from another task.  A block of memory is malloced
140// and should be freed by the caller.
141void* ReadTaskMemory(task_port_t target_task,
142                     const void* address,
143                     size_t length,
144                     kern_return_t *kr) {
145  void* result = NULL;
146  int systemPageSize = getpagesize();
147
148  // use the negative of the page size for the mask to find the page address
149  mach_vm_address_t page_address =
150      reinterpret_cast<mach_vm_address_t>(address) & (-systemPageSize);
151
152  mach_vm_address_t last_page_address =
153      (reinterpret_cast<mach_vm_address_t>(address) + length +
154       (systemPageSize - 1)) & (-systemPageSize);
155
156  mach_vm_size_t page_size = last_page_address - page_address;
157  uint8_t* local_start;
158  uint32_t local_length;
159
160  kern_return_t r;
161
162  r = mach_vm_read(target_task,
163                   page_address,
164                   page_size,
165                   reinterpret_cast<vm_offset_t*>(&local_start),
166                   &local_length);
167
168
169  if (kr != NULL) {
170    *kr = r;
171  }
172
173  if (r == KERN_SUCCESS) {
174    result = malloc(length);
175    if (result != NULL) {
176      memcpy(result,
177             &local_start[(mach_vm_address_t)address - page_address],
178             length);
179    }
180    mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
181  }
182
183  return result;
184}
185
186#pragma mark -
187
188//==============================================================================
189// Initializes vmaddr_, vmsize_, and slide_
190void DynamicImage::CalculateMemoryAndVersionInfo() {
191  breakpad_mach_header *header = GetMachHeader();
192
193  // unless we can process the header, ensure that calls to
194  // IsValid() will return false
195  vmaddr_ = 0;
196  vmsize_ = 0;
197  slide_ = 0;
198  version_ = 0;
199
200  bool foundTextSection = false;
201  bool foundDylibIDCommand = false;
202
203#if __LP64__
204  if(header->magic != MH_MAGIC_64) {
205    return;
206  }
207#else
208  if(header->magic != MH_MAGIC) {
209    return;
210  }
211#endif
212
213#ifdef __LP64__
214  const uint32_t segmentLoadCommand = LC_SEGMENT_64;
215#else
216  const uint32_t segmentLoadCommand = LC_SEGMENT;
217#endif
218
219  const struct load_command *cmd =
220    reinterpret_cast<const struct load_command *>(header + 1);
221
222  for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
223    if (!foundTextSection) {
224      if (cmd->cmd == segmentLoadCommand) {
225        const breakpad_mach_segment_command *seg =
226            reinterpret_cast<const breakpad_mach_segment_command *>(cmd);
227
228        if (!strcmp(seg->segname, "__TEXT")) {
229          vmaddr_ = seg->vmaddr;
230          vmsize_ = seg->vmsize;
231          slide_ = 0;
232
233          if (seg->fileoff == 0  &&  seg->filesize != 0) {
234            slide_ = (uintptr_t)GetLoadAddress() - (uintptr_t)seg->vmaddr;
235          }
236          foundTextSection = true;
237        }
238      }
239    }
240
241    if (!foundDylibIDCommand) {
242      if (cmd->cmd == LC_ID_DYLIB) {
243        const struct dylib_command *dc =
244            reinterpret_cast<const struct dylib_command *>(cmd);
245
246        version_ = dc->dylib.current_version;
247        foundDylibIDCommand = true;
248      }
249    }
250
251    if (foundDylibIDCommand && foundTextSection) {
252      return;
253    }
254
255    cmd = reinterpret_cast<const struct load_command *>
256      (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
257  }
258
259}
260
261void DynamicImage::Print() {
262  const char *path = GetFilePath();
263  if (!path) {
264    path = "(unknown)";
265  }
266  printf("%p: %s\n", GetLoadAddress(), path);
267  breakpad_mach_header *header = GetMachHeader();
268  MachHeader(*header).Print();
269  printf("vmaddr\t\t: %p\n", reinterpret_cast<void*>(GetVMAddr()));
270  printf("vmsize\t\t: %llu\n", GetVMSize());
271  printf("slide\t\t: %td\n", GetVMAddrSlide());
272}
273
274#pragma mark -
275
276//==============================================================================
277// Loads information about dynamically loaded code in the given task.
278DynamicImages::DynamicImages(mach_port_t task)
279  : task_(task) {
280  ReadImageInfoForTask();
281}
282
283void* DynamicImages::GetDyldAllImageInfosPointer()
284{
285
286  const char *imageSymbolName = "_dyld_all_image_infos";
287  const char *dyldPath = "/usr/lib/dyld";
288#ifndef __LP64__
289  struct nlist l[8];
290  memset(l, 0, sizeof(l) );
291
292  // First we lookup the address of the "_dyld_all_image_infos" struct
293  // which lives in "dyld".  This structure contains information about all
294  // of the loaded dynamic images.
295  struct nlist &list = l[0];
296  list.n_un.n_name = const_cast<char *>(imageSymbolName);
297  nlist(dyldPath,&list);
298  if(list.n_value) {
299    return reinterpret_cast<void*>(list.n_value);
300  }
301
302  return NULL;
303#else
304  struct nlist_64 l[8];
305  struct nlist_64 &list = l[0];
306
307  memset(l, 0, sizeof(l) );
308
309  const char *symbolNames[2] = { imageSymbolName, "\0" };
310
311  int invalidEntriesCount = breakpad_nlist_64(dyldPath,&list,symbolNames);
312
313  if(invalidEntriesCount != 0) {
314    return NULL;
315  }
316  assert(list.n_value);
317  return reinterpret_cast<void*>(list.n_value);
318#endif
319
320}
321//==============================================================================
322// This code was written using dyld_debug.c (from Darwin) as a guide.
323void DynamicImages::ReadImageInfoForTask() {
324  void *imageList = GetDyldAllImageInfosPointer();
325
326  if (imageList) {
327    kern_return_t kr;
328    // Read the structure inside of dyld that contains information about
329    // loaded images.  We're reading from the desired task's address space.
330
331    // Here we make the assumption that dyld loaded at the same address in
332    // the crashed process vs. this one.  This is an assumption made in
333    // "dyld_debug.c" and is said to be nearly always valid.
334    dyld_all_image_infos *dyldInfo = reinterpret_cast<dyld_all_image_infos*>
335      (ReadTaskMemory(task_,
336                      reinterpret_cast<void*>(imageList),
337                      sizeof(dyld_all_image_infos), &kr));
338
339    if (dyldInfo) {
340      // number of loaded images
341      int count = dyldInfo->infoArrayCount;
342
343      // Read an array of dyld_image_info structures each containing
344      // information about a loaded image.
345      dyld_image_info *infoArray = reinterpret_cast<dyld_image_info*>
346        (ReadTaskMemory(task_,
347                        dyldInfo->infoArray,
348                        count*sizeof(dyld_image_info), &kr));
349
350      image_list_.reserve(count);
351
352      for (int i = 0; i < count; ++i) {
353        dyld_image_info &info = infoArray[i];
354
355        // First read just the mach_header from the image in the task.
356        breakpad_mach_header *header = reinterpret_cast<breakpad_mach_header*>
357          (ReadTaskMemory(task_,
358                          info.load_address_,
359                          sizeof(breakpad_mach_header), &kr));
360
361        if (!header)
362          break;   // bail on this dynamic image
363
364        // Now determine the total amount we really want to read based on the
365        // size of the load commands.  We need the header plus all of the
366        // load commands.
367        unsigned int header_size =
368            sizeof(breakpad_mach_header) + header->sizeofcmds;
369
370        free(header);
371
372        header = reinterpret_cast<breakpad_mach_header*>
373          (ReadTaskMemory(task_, info.load_address_, header_size, &kr));
374
375        // Read the file name from the task's memory space.
376        char *file_path = NULL;
377        if (info.file_path_) {
378          // Although we're reading kMaxStringLength bytes, it's copied in the
379          // the DynamicImage constructor below with the correct string length,
380          // so it's not really wasting memory.
381          file_path = reinterpret_cast<char*>
382            (ReadTaskString(task_, info.file_path_));
383        }
384
385        // Create an object representing this image and add it to our list.
386        DynamicImage *new_image;
387        new_image = new DynamicImage(header,
388                                     header_size,
389                                     (breakpad_mach_header*)info.load_address_,
390                                     file_path,
391                                     info.file_mod_date_,
392                                     task_);
393
394        if (new_image->IsValid()) {
395          image_list_.push_back(DynamicImageRef(new_image));
396        } else {
397          delete new_image;
398        }
399
400        if (file_path) {
401          free(file_path);
402        }
403      }
404
405      free(dyldInfo);
406      free(infoArray);
407
408      // sorts based on loading address
409      sort(image_list_.begin(), image_list_.end() );
410      // remove duplicates - this happens in certain strange cases
411      // You can see it in DashboardClient when Google Gadgets plugin
412      // is installed.  Apple's crash reporter log and gdb "info shared"
413      // both show the same library multiple times at the same address
414
415      vector<DynamicImageRef>::iterator it = unique(image_list_.begin(),
416                                                    image_list_.end() );
417      image_list_.erase(it, image_list_.end());
418    }
419  }
420}
421
422//==============================================================================
423DynamicImage  *DynamicImages::GetExecutableImage() {
424  int executable_index = GetExecutableImageIndex();
425
426  if (executable_index >= 0) {
427    return GetImage(executable_index);
428  }
429
430  return NULL;
431}
432
433//==============================================================================
434// returns -1 if failure to find executable
435int DynamicImages::GetExecutableImageIndex() {
436  int image_count = GetImageCount();
437
438  for (int i = 0; i < image_count; ++i) {
439    DynamicImage  *image = GetImage(i);
440    if (image->GetMachHeader()->filetype == MH_EXECUTE) {
441      return i;
442    }
443  }
444
445  return -1;
446}
447
448}  // namespace google_breakpad
449