1// Copyright (c) 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include "client/mac/handler/dynamic_images.h"
31
32extern "C" { // needed to compile on Leopard
33  #include <mach-o/nlist.h>
34  #include <stdlib.h>
35  #include <stdio.h>
36}
37
38#include <assert.h>
39#include <AvailabilityMacros.h>
40#include <dlfcn.h>
41#include <mach/task_info.h>
42#include <sys/sysctl.h>
43#include <TargetConditionals.h>
44#include <unistd.h>
45
46#include <algorithm>
47#include <string>
48#include <vector>
49
50#include "breakpad_nlist_64.h"
51
52#if !TARGET_OS_IPHONE
53#include <CoreServices/CoreServices.h>
54
55#ifndef MAC_OS_X_VERSION_10_6
56#define MAC_OS_X_VERSION_10_6 1060
57#endif
58
59#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
60
61// Fallback declarations for TASK_DYLD_INFO and friends, introduced in
62// <mach/task_info.h> in the Mac OS X 10.6 SDK.
63#define TASK_DYLD_INFO 17
64struct task_dyld_info {
65  mach_vm_address_t all_image_info_addr;
66  mach_vm_size_t all_image_info_size;
67};
68typedef struct task_dyld_info task_dyld_info_data_t;
69typedef struct task_dyld_info *task_dyld_info_t;
70#define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
71
72#endif
73
74#endif  // !TARGET_OS_IPHONE
75
76namespace google_breakpad {
77
78using std::string;
79using std::vector;
80
81//==============================================================================
82// Returns the size of the memory region containing |address| and the
83// number of bytes from |address| to the end of the region.
84// We potentially, will extend the size of the original
85// region by the size of the following region if it's contiguous with the
86// first in order to handle cases when we're reading strings and they
87// straddle two vm regions.
88//
89static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
90                                          const uint64_t address,
91                                          mach_vm_size_t *size_to_end) {
92  mach_vm_address_t region_base = (mach_vm_address_t)address;
93  mach_vm_size_t region_size;
94  natural_t nesting_level = 0;
95  vm_region_submap_info_64 submap_info;
96  mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
97
98  // Get information about the vm region containing |address|
99  vm_region_recurse_info_t region_info;
100  region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
101
102  kern_return_t result =
103    mach_vm_region_recurse(target_task,
104                           &region_base,
105                           &region_size,
106                           &nesting_level,
107                           region_info,
108                           &info_count);
109
110  if (result == KERN_SUCCESS) {
111    // Get distance from |address| to the end of this region
112    *size_to_end = region_base + region_size -(mach_vm_address_t)address;
113
114    // If we want to handle strings as long as 4096 characters we may need
115    // to check if there's a vm region immediately following the first one.
116    // If so, we need to extend |*size_to_end| to go all the way to the end
117    // of the second region.
118    if (*size_to_end < 4096) {
119      // Second region starts where the first one ends
120      mach_vm_address_t region_base2 =
121        (mach_vm_address_t)(region_base + region_size);
122      mach_vm_size_t region_size2;
123
124      // Get information about the following vm region
125      result =
126        mach_vm_region_recurse(target_task,
127                               &region_base2,
128                               &region_size2,
129                               &nesting_level,
130                               region_info,
131                               &info_count);
132
133      // Extend region_size to go all the way to the end of the 2nd region
134      if (result == KERN_SUCCESS
135          && region_base2 == region_base + region_size) {
136        region_size += region_size2;
137      }
138    }
139
140    *size_to_end = region_base + region_size -(mach_vm_address_t)address;
141  } else {
142    region_size = 0;
143    *size_to_end = 0;
144  }
145
146  return region_size;
147}
148
149#define kMaxStringLength 8192
150//==============================================================================
151// Reads a NULL-terminated string from another task.
152//
153// Warning!  This will not read any strings longer than kMaxStringLength-1
154//
155static string ReadTaskString(task_port_t target_task,
156                             const uint64_t address) {
157  // The problem is we don't know how much to read until we know how long
158  // the string is. And we don't know how long the string is, until we've read
159  // the memory!  So, we'll try to read kMaxStringLength bytes
160  // (or as many bytes as we can until we reach the end of the vm region).
161  mach_vm_size_t size_to_end;
162  GetMemoryRegionSize(target_task, address, &size_to_end);
163
164  if (size_to_end > 0) {
165    mach_vm_size_t size_to_read =
166      size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
167
168    vector<uint8_t> bytes;
169    if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) !=
170        KERN_SUCCESS)
171      return string();
172
173    return string(reinterpret_cast<const char*>(&bytes[0]));
174  }
175
176  return string();
177}
178
179//==============================================================================
180// Reads an address range from another task. The bytes read will be returned
181// in bytes, which will be resized as necessary.
182kern_return_t ReadTaskMemory(task_port_t target_task,
183                             const uint64_t address,
184                             size_t length,
185                             vector<uint8_t> &bytes) {
186  int systemPageSize = getpagesize();
187
188  // use the negative of the page size for the mask to find the page address
189  mach_vm_address_t page_address = address & (-systemPageSize);
190
191  mach_vm_address_t last_page_address =
192      (address + length + (systemPageSize - 1)) & (-systemPageSize);
193
194  mach_vm_size_t page_size = last_page_address - page_address;
195  uint8_t* local_start;
196  uint32_t local_length;
197
198  kern_return_t r = mach_vm_read(target_task,
199                                 page_address,
200                                 page_size,
201                                 reinterpret_cast<vm_offset_t*>(&local_start),
202                                 &local_length);
203
204  if (r != KERN_SUCCESS)
205    return r;
206
207  bytes.resize(length);
208  memcpy(&bytes[0],
209         &local_start[(mach_vm_address_t)address - page_address],
210         length);
211  mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
212  return KERN_SUCCESS;
213}
214
215#pragma mark -
216
217//==============================================================================
218// Traits structs for specializing function templates to handle
219// 32-bit/64-bit Mach-O files.
220struct MachO32 {
221  typedef mach_header mach_header_type;
222  typedef segment_command mach_segment_command_type;
223  typedef dyld_image_info32 dyld_image_info;
224  typedef dyld_all_image_infos32 dyld_all_image_infos;
225  typedef struct nlist nlist_type;
226  static const uint32_t magic = MH_MAGIC;
227  static const uint32_t segment_load_command = LC_SEGMENT;
228};
229
230struct MachO64 {
231  typedef mach_header_64 mach_header_type;
232  typedef segment_command_64 mach_segment_command_type;
233  typedef dyld_image_info64 dyld_image_info;
234  typedef dyld_all_image_infos64 dyld_all_image_infos;
235  typedef struct nlist_64 nlist_type;
236  static const uint32_t magic = MH_MAGIC_64;
237  static const uint32_t segment_load_command = LC_SEGMENT_64;
238};
239
240template<typename MachBits>
241bool FindTextSection(DynamicImage& image) {
242  typedef typename MachBits::mach_header_type mach_header_type;
243  typedef typename MachBits::mach_segment_command_type
244      mach_segment_command_type;
245
246  const mach_header_type* header =
247      reinterpret_cast<const mach_header_type*>(&image.header_[0]);
248
249  if(header->magic != MachBits::magic) {
250    return false;
251  }
252
253  const struct load_command *cmd =
254      reinterpret_cast<const struct load_command *>(header + 1);
255
256  bool found_text_section = false;
257  bool found_dylib_id_command = false;
258  for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
259    if (!found_text_section) {
260      if (cmd->cmd == MachBits::segment_load_command) {
261        const mach_segment_command_type *seg =
262            reinterpret_cast<const mach_segment_command_type *>(cmd);
263
264        if (!strcmp(seg->segname, "__TEXT")) {
265          image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr);
266          image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize);
267          image.slide_ = 0;
268
269          if (seg->fileoff == 0 && seg->filesize != 0) {
270            image.slide_ =
271                (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr;
272          }
273          found_text_section = true;
274        }
275      }
276    }
277
278    if (!found_dylib_id_command) {
279      if (cmd->cmd == LC_ID_DYLIB) {
280        const struct dylib_command *dc =
281            reinterpret_cast<const struct dylib_command *>(cmd);
282
283        image.version_ = dc->dylib.current_version;
284        found_dylib_id_command = true;
285      }
286    }
287
288    if (found_dylib_id_command && found_text_section) {
289      return true;
290    }
291
292    cmd = reinterpret_cast<const struct load_command *>
293        (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
294  }
295
296  return false;
297}
298
299//==============================================================================
300// Initializes vmaddr_, vmsize_, and slide_
301void DynamicImage::CalculateMemoryAndVersionInfo() {
302  // unless we can process the header, ensure that calls to
303  // IsValid() will return false
304  vmaddr_ = 0;
305  vmsize_ = 0;
306  slide_ = 0;
307  version_ = 0;
308
309  // The function template above does all the real work.
310  if (Is64Bit())
311    FindTextSection<MachO64>(*this);
312  else
313    FindTextSection<MachO32>(*this);
314}
315
316//==============================================================================
317// The helper function template abstracts the 32/64-bit differences.
318template<typename MachBits>
319uint32_t GetFileTypeFromHeader(DynamicImage& image) {
320  typedef typename MachBits::mach_header_type mach_header_type;
321
322  const mach_header_type* header =
323      reinterpret_cast<const mach_header_type*>(&image.header_[0]);
324  return header->filetype;
325}
326
327uint32_t DynamicImage::GetFileType() {
328  if (Is64Bit())
329    return GetFileTypeFromHeader<MachO64>(*this);
330
331  return GetFileTypeFromHeader<MachO32>(*this);
332}
333
334#pragma mark -
335
336//==============================================================================
337// Loads information about dynamically loaded code in the given task.
338DynamicImages::DynamicImages(mach_port_t task)
339    : task_(task),
340      cpu_type_(DetermineTaskCPUType(task)),
341      image_list_() {
342  ReadImageInfoForTask();
343}
344
345template<typename MachBits>
346static uint64_t LookupSymbol(const char* symbol_name,
347                             const char* filename,
348                             cpu_type_t cpu_type) {
349  typedef typename MachBits::nlist_type nlist_type;
350
351  nlist_type symbol_info[8] = {};
352  const char *symbolNames[2] = { symbol_name, "\0" };
353  nlist_type &list = symbol_info[0];
354  int invalidEntriesCount = breakpad_nlist(filename,
355                                           &list,
356                                           symbolNames,
357                                           cpu_type);
358
359  if(invalidEntriesCount != 0) {
360    return 0;
361  }
362
363  assert(list.n_value);
364  return list.n_value;
365}
366
367#if TARGET_OS_IPHONE
368static bool HasTaskDyldInfo() {
369  return true;
370}
371#else
372static SInt32 GetOSVersionInternal() {
373  SInt32 os_version = 0;
374  Gestalt(gestaltSystemVersion, &os_version);
375  return os_version;
376}
377
378static SInt32 GetOSVersion() {
379  static SInt32 os_version = GetOSVersionInternal();
380  return os_version;
381}
382
383static bool HasTaskDyldInfo() {
384#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
385  return true;
386#else
387  return GetOSVersion() >= 0x1060;
388#endif
389}
390#endif  // TARGET_OS_IPHONE
391
392uint64_t DynamicImages::GetDyldAllImageInfosPointer() {
393  if (HasTaskDyldInfo()) {
394    task_dyld_info_data_t task_dyld_info;
395    mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
396    if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
397                  &count) != KERN_SUCCESS) {
398      return 0;
399    }
400
401    return (uint64_t)task_dyld_info.all_image_info_addr;
402  } else {
403    const char *imageSymbolName = "_dyld_all_image_infos";
404    const char *dyldPath = "/usr/lib/dyld";
405
406    if (Is64Bit())
407      return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_);
408    return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_);
409  }
410}
411
412//==============================================================================
413// This code was written using dyld_debug.c (from Darwin) as a guide.
414
415template<typename MachBits>
416void ReadImageInfo(DynamicImages& images,
417                   uint64_t image_list_address) {
418  typedef typename MachBits::dyld_image_info dyld_image_info;
419  typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos;
420  typedef typename MachBits::mach_header_type mach_header_type;
421
422  // Read the structure inside of dyld that contains information about
423  // loaded images.  We're reading from the desired task's address space.
424
425  // Here we make the assumption that dyld loaded at the same address in
426  // the crashed process vs. this one.  This is an assumption made in
427  // "dyld_debug.c" and is said to be nearly always valid.
428  vector<uint8_t> dyld_all_info_bytes;
429  if (ReadTaskMemory(images.task_,
430                     image_list_address,
431                     sizeof(dyld_all_image_infos),
432                     dyld_all_info_bytes) != KERN_SUCCESS)
433    return;
434
435  dyld_all_image_infos *dyldInfo =
436    reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]);
437
438  // number of loaded images
439  int count = dyldInfo->infoArrayCount;
440
441  // Read an array of dyld_image_info structures each containing
442  // information about a loaded image.
443  vector<uint8_t> dyld_info_array_bytes;
444    if (ReadTaskMemory(images.task_,
445                       dyldInfo->infoArray,
446                       count * sizeof(dyld_image_info),
447                       dyld_info_array_bytes) != KERN_SUCCESS)
448      return;
449
450    dyld_image_info *infoArray =
451        reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]);
452    images.image_list_.reserve(count);
453
454    for (int i = 0; i < count; ++i) {
455      dyld_image_info &info = infoArray[i];
456
457      // First read just the mach_header from the image in the task.
458      vector<uint8_t> mach_header_bytes;
459      if (ReadTaskMemory(images.task_,
460                         info.load_address_,
461                         sizeof(mach_header_type),
462                         mach_header_bytes) != KERN_SUCCESS)
463        continue;  // bail on this dynamic image
464
465      mach_header_type *header =
466          reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]);
467
468      // Now determine the total amount necessary to read the header
469      // plus all of the load commands.
470      size_t header_size =
471          sizeof(mach_header_type) + header->sizeofcmds;
472
473      if (ReadTaskMemory(images.task_,
474                         info.load_address_,
475                         header_size,
476                         mach_header_bytes) != KERN_SUCCESS)
477        continue;
478
479      // Read the file name from the task's memory space.
480      string file_path;
481      if (info.file_path_) {
482        // Although we're reading kMaxStringLength bytes, it's copied in the
483        // the DynamicImage constructor below with the correct string length,
484        // so it's not really wasting memory.
485        file_path = ReadTaskString(images.task_, info.file_path_);
486      }
487
488      // Create an object representing this image and add it to our list.
489      DynamicImage *new_image;
490      new_image = new DynamicImage(&mach_header_bytes[0],
491                                   header_size,
492                                   info.load_address_,
493                                   file_path,
494                                   static_cast<uintptr_t>(info.file_mod_date_),
495                                   images.task_,
496                                   images.cpu_type_);
497
498      if (new_image->IsValid()) {
499        images.image_list_.push_back(DynamicImageRef(new_image));
500      } else {
501        delete new_image;
502      }
503    }
504
505    // sorts based on loading address
506    sort(images.image_list_.begin(), images.image_list_.end());
507    // remove duplicates - this happens in certain strange cases
508    // You can see it in DashboardClient when Google Gadgets plugin
509    // is installed.  Apple's crash reporter log and gdb "info shared"
510    // both show the same library multiple times at the same address
511
512    vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(),
513                                                  images.image_list_.end());
514    images.image_list_.erase(it, images.image_list_.end());
515}
516
517void DynamicImages::ReadImageInfoForTask() {
518  uint64_t imageList = GetDyldAllImageInfosPointer();
519
520  if (imageList) {
521    if (Is64Bit())
522      ReadImageInfo<MachO64>(*this, imageList);
523    else
524      ReadImageInfo<MachO32>(*this, imageList);
525  }
526}
527
528//==============================================================================
529DynamicImage  *DynamicImages::GetExecutableImage() {
530  int executable_index = GetExecutableImageIndex();
531
532  if (executable_index >= 0) {
533    return GetImage(executable_index);
534  }
535
536  return NULL;
537}
538
539//==============================================================================
540// returns -1 if failure to find executable
541int DynamicImages::GetExecutableImageIndex() {
542  int image_count = GetImageCount();
543
544  for (int i = 0; i < image_count; ++i) {
545    DynamicImage  *image = GetImage(i);
546    if (image->GetFileType() == MH_EXECUTE) {
547      return i;
548    }
549  }
550
551  return -1;
552}
553
554//==============================================================================
555// static
556cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) {
557  if (task == mach_task_self())
558    return GetNativeCPUType();
559
560  int mib[CTL_MAXNAME];
561  size_t mibLen = CTL_MAXNAME;
562  int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen);
563  if (err == 0) {
564    assert(mibLen < CTL_MAXNAME);
565    pid_for_task(task, &mib[mibLen]);
566    mibLen += 1;
567
568    cpu_type_t cpu_type;
569    size_t cpuTypeSize = sizeof(cpu_type);
570    sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0);
571    return cpu_type;
572  }
573
574  return GetNativeCPUType();
575}
576
577}  // namespace google_breakpad
578