1// Copyright (c) 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30//  dynamic_images.h
31//
32//    Implements most of the function of the dyld API, but allowing an
33//    arbitrary task to be introspected, unlike the dyld API which
34//    only allows operation on the current task.  The current implementation
35//    is limited to use by 32-bit tasks.
36
37#ifndef CLIENT_MAC_HANDLER_DYNAMIC_IMAGES_H__
38#define CLIENT_MAC_HANDLER_DYNAMIC_IMAGES_H__
39
40#include <mach/mach.h>
41#include <mach-o/dyld.h>
42#include <mach-o/loader.h>
43#include <sys/types.h>
44
45#include <string>
46#include <vector>
47
48#include "mach_vm_compat.h"
49
50namespace google_breakpad {
51
52using std::string;
53using std::vector;
54
55//==============================================================================
56// The memory layout of this struct matches the dyld_image_info struct
57// defined in "dyld_gdb.h" in the darwin source.
58typedef struct dyld_image_info32 {
59  uint32_t                   load_address_;  // struct mach_header*
60  uint32_t                   file_path_;     // char*
61  uint32_t                   file_mod_date_;
62} dyld_image_info32;
63
64typedef struct dyld_image_info64 {
65  uint64_t                   load_address_;  // struct mach_header*
66  uint64_t                   file_path_;     // char*
67  uint64_t                   file_mod_date_;
68} dyld_image_info64;
69
70//==============================================================================
71// This is as defined in "dyld_gdb.h" in the darwin source.
72// _dyld_all_image_infos (in dyld) is a structure of this type
73// which will be used to determine which dynamic code has been loaded.
74typedef struct dyld_all_image_infos32 {
75  uint32_t                      version;  // == 1 in Mac OS X 10.4
76  uint32_t                      infoArrayCount;
77  uint32_t                      infoArray;  // const struct dyld_image_info*
78  uint32_t                      notification;
79  bool                          processDetachedFromSharedRegion;
80} dyld_all_image_infos32;
81
82typedef struct dyld_all_image_infos64 {
83  uint32_t                      version;  // == 1 in Mac OS X 10.4
84  uint32_t                      infoArrayCount;
85  uint64_t                      infoArray;  // const struct dyld_image_info*
86  uint64_t                      notification;
87  bool                          processDetachedFromSharedRegion;
88} dyld_all_image_infos64;
89
90// some typedefs to isolate 64/32 bit differences
91#ifdef __LP64__
92typedef mach_header_64 breakpad_mach_header;
93typedef segment_command_64 breakpad_mach_segment_command;
94#else
95typedef mach_header breakpad_mach_header;
96typedef segment_command breakpad_mach_segment_command;
97#endif
98
99// Helper functions to deal with 32-bit/64-bit Mach-O differences.
100class DynamicImage;
101template<typename MachBits>
102bool FindTextSection(DynamicImage& image);
103
104template<typename MachBits>
105uint32_t GetFileTypeFromHeader(DynamicImage& image);
106
107//==============================================================================
108// Represents a single dynamically loaded mach-o image
109class DynamicImage {
110 public:
111  DynamicImage(uint8_t *header,     // data is copied
112               size_t header_size,  // includes load commands
113               uint64_t load_address,
114               string file_path,
115               uintptr_t image_mod_date,
116               mach_port_t task,
117               cpu_type_t cpu_type)
118    : header_(header, header + header_size),
119      header_size_(header_size),
120      load_address_(load_address),
121      vmaddr_(0),
122      vmsize_(0),
123      slide_(0),
124      version_(0),
125      file_path_(file_path),
126      file_mod_date_(image_mod_date),
127      task_(task),
128      cpu_type_(cpu_type) {
129    CalculateMemoryAndVersionInfo();
130  }
131
132  // Size of mach_header plus load commands
133  size_t GetHeaderSize() const {return header_.size();}
134
135  // Full path to mach-o binary
136  string GetFilePath() {return file_path_;}
137
138  uint64_t GetModDate() const {return file_mod_date_;}
139
140  // Actual address where the image was loaded
141  uint64_t GetLoadAddress() const {return load_address_;}
142
143  // Address where the image should be loaded
144  mach_vm_address_t GetVMAddr() const {return vmaddr_;}
145
146  // Difference between GetLoadAddress() and GetVMAddr()
147  ptrdiff_t GetVMAddrSlide() const {return slide_;}
148
149  // Size of the image
150  mach_vm_size_t GetVMSize() const {return vmsize_;}
151
152  // Task owning this loaded image
153  mach_port_t GetTask() {return task_;}
154
155  // CPU type of the task
156  cpu_type_t GetCPUType() {return cpu_type_;}
157
158  // filetype from the Mach-O header.
159  uint32_t GetFileType();
160
161  // Return true if the task is a 64-bit architecture.
162  bool Is64Bit() { return (GetCPUType() & CPU_ARCH_ABI64) == CPU_ARCH_ABI64; }
163
164  uint32_t GetVersion() {return version_;}
165  // For sorting
166  bool operator<(const DynamicImage &inInfo) {
167    return GetLoadAddress() < inInfo.GetLoadAddress();
168  }
169
170  // Sanity checking
171  bool IsValid() {return GetVMSize() != 0;}
172
173 private:
174  DynamicImage(const DynamicImage &);
175  DynamicImage &operator=(const DynamicImage &);
176
177  friend class DynamicImages;
178  template<typename MachBits>
179  friend bool FindTextSection(DynamicImage& image);
180  template<typename MachBits>
181  friend uint32_t GetFileTypeFromHeader(DynamicImage& image);
182
183  // Initializes vmaddr_, vmsize_, and slide_
184  void CalculateMemoryAndVersionInfo();
185
186  const vector<uint8_t>   header_;        // our local copy of the header
187  size_t                  header_size_;    // mach_header plus load commands
188  uint64_t                load_address_;   // base address image is mapped into
189  mach_vm_address_t       vmaddr_;
190  mach_vm_size_t          vmsize_;
191  ptrdiff_t               slide_;
192  uint32_t                version_;        // Dylib version
193  string                  file_path_;     // path dyld used to load the image
194  uintptr_t               file_mod_date_;  // time_t of image file
195
196  mach_port_t             task_;
197  cpu_type_t              cpu_type_;        // CPU type of task_
198};
199
200//==============================================================================
201// DynamicImageRef is just a simple wrapper for a pointer to
202// DynamicImage.  The reason we use it instead of a simple typedef is so
203// that we can use stl::sort() on a vector of DynamicImageRefs
204// and simple class pointers can't implement operator<().
205//
206class DynamicImageRef {
207 public:
208  explicit DynamicImageRef(DynamicImage *inP) : p(inP) {}
209  // The copy constructor is required by STL
210  DynamicImageRef(const DynamicImageRef &inRef) : p(inRef.p) {}
211
212  bool operator<(const DynamicImageRef &inRef) const {
213    return (*const_cast<DynamicImageRef*>(this)->p)
214      < (*const_cast<DynamicImageRef&>(inRef).p);
215  }
216
217  bool operator==(const DynamicImageRef &inInfo) const {
218    return (*const_cast<DynamicImageRef*>(this)->p).GetLoadAddress() ==
219        (*const_cast<DynamicImageRef&>(inInfo)).GetLoadAddress();
220  }
221
222  // Be just like DynamicImage*
223  DynamicImage  *operator->() {return p;}
224  operator DynamicImage*() {return p;}
225
226 private:
227  DynamicImage  *p;
228};
229
230// Helper function to deal with 32-bit/64-bit Mach-O differences.
231class DynamicImages;
232template<typename MachBits>
233void ReadImageInfo(DynamicImages& images, uint64_t image_list_address);
234
235//==============================================================================
236// An object of type DynamicImages may be created to allow introspection of
237// an arbitrary task's dynamically loaded mach-o binaries.  This makes the
238// assumption that the current task has send rights to the target task.
239class DynamicImages {
240 public:
241  explicit DynamicImages(mach_port_t task);
242
243  ~DynamicImages() {
244    for (int i = 0; i < GetImageCount(); ++i) {
245      delete image_list_[i];
246    }
247  }
248
249  // Returns the number of dynamically loaded mach-o images.
250  int GetImageCount() const {return static_cast<int>(image_list_.size());}
251
252  // Returns an individual image.
253  DynamicImage *GetImage(int i) {
254    if (i < (int)image_list_.size()) {
255      return image_list_[i];
256    }
257    return NULL;
258  }
259
260  // Returns the image corresponding to the main executable.
261  DynamicImage *GetExecutableImage();
262  int GetExecutableImageIndex();
263
264  // Returns the task which we're looking at.
265  mach_port_t GetTask() const {return task_;}
266
267  // CPU type of the task
268  cpu_type_t GetCPUType() {return cpu_type_;}
269
270  // Return true if the task is a 64-bit architecture.
271  bool Is64Bit() { return (GetCPUType() & CPU_ARCH_ABI64) == CPU_ARCH_ABI64; }
272
273  // Determine the CPU type of the task being dumped.
274  static cpu_type_t DetermineTaskCPUType(task_t task);
275
276  // Get the native CPU type of this task.
277  static cpu_type_t GetNativeCPUType() {
278#if defined(__i386__)
279    return CPU_TYPE_I386;
280#elif defined(__x86_64__)
281    return CPU_TYPE_X86_64;
282#elif defined(__ppc__)
283    return CPU_TYPE_POWERPC;
284#elif defined(__ppc64__)
285    return CPU_TYPE_POWERPC64;
286#elif defined(__arm__)
287    return CPU_TYPE_ARM;
288#elif defined(__aarch64__)
289    return CPU_TYPE_ARM64;
290#else
291#error "GetNativeCPUType not implemented for this architecture"
292#endif
293  }
294
295 private:
296  template<typename MachBits>
297  friend void ReadImageInfo(DynamicImages& images, uint64_t image_list_address);
298
299  bool IsOurTask() {return task_ == mach_task_self();}
300
301  // Initialization
302  void ReadImageInfoForTask();
303  uint64_t GetDyldAllImageInfosPointer();
304
305  mach_port_t              task_;
306  cpu_type_t               cpu_type_;  // CPU type of task_
307  vector<DynamicImageRef>  image_list_;
308};
309
310// Fill bytes with the contents of memory at a particular
311// location in another task.
312kern_return_t ReadTaskMemory(task_port_t target_task,
313                             const uint64_t address,
314                             size_t length,
315                             vector<uint8_t> &bytes);
316
317}   // namespace google_breakpad
318
319#endif // CLIENT_MAC_HANDLER_DYNAMIC_IMAGES_H__
320