dynamic_images.cc revision 32441cc0608ddaf81885d23acf63f4b53cb73744
1// Copyright (c) 2007, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30extern "C" { // needed to compile on Leopard 31 #include <mach-o/nlist.h> 32 #include <stdlib.h> 33 #include <stdio.h> 34} 35 36#include "breakpad_nlist_64.h" 37#include <dlfcn.h> 38#include <mach/mach_vm.h> 39#include <algorithm> 40#include "client/mac/handler/dynamic_images.h" 41 42namespace google_breakpad { 43 44//============================================================================== 45// Returns the size of the memory region containing |address| and the 46// number of bytes from |address| to the end of the region. 47// We potentially, will extend the size of the original 48// region by the size of the following region if it's contiguous with the 49// first in order to handle cases when we're reading strings and they 50// straddle two vm regions. 51// 52static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task, 53 const void* address, 54 mach_vm_size_t *size_to_end) { 55 mach_vm_address_t region_base = (mach_vm_address_t)address; 56 mach_vm_size_t region_size; 57 natural_t nesting_level = 0; 58 vm_region_submap_info_64 submap_info; 59 mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64; 60 61 // Get information about the vm region containing |address| 62 vm_region_recurse_info_t region_info; 63 region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info); 64 65 kern_return_t result = 66 mach_vm_region_recurse(target_task, 67 ®ion_base, 68 ®ion_size, 69 &nesting_level, 70 region_info, 71 &info_count); 72 73 if (result == KERN_SUCCESS) { 74 // Get distance from |address| to the end of this region 75 *size_to_end = region_base + region_size -(mach_vm_address_t)address; 76 77 // If we want to handle strings as long as 4096 characters we may need 78 // to check if there's a vm region immediately following the first one. 79 // If so, we need to extend |*size_to_end| to go all the way to the end 80 // of the second region. 81 if (*size_to_end < 4096) { 82 // Second region starts where the first one ends 83 mach_vm_address_t region_base2 = 84 (mach_vm_address_t)(region_base + region_size); 85 mach_vm_size_t region_size2; 86 87 // Get information about the following vm region 88 result = 89 mach_vm_region_recurse(target_task, 90 ®ion_base2, 91 ®ion_size2, 92 &nesting_level, 93 region_info, 94 &info_count); 95 96 // Extend region_size to go all the way to the end of the 2nd region 97 if (result == KERN_SUCCESS 98 && region_base2 == region_base + region_size) { 99 region_size += region_size2; 100 } 101 } 102 103 *size_to_end = region_base + region_size -(mach_vm_address_t)address; 104 } else { 105 region_size = 0; 106 *size_to_end = 0; 107 } 108 109 return region_size; 110} 111 112#define kMaxStringLength 8192 113//============================================================================== 114// Reads a NULL-terminated string from another task. 115// 116// Warning! This will not read any strings longer than kMaxStringLength-1 117// 118static void* ReadTaskString(task_port_t target_task, 119 const void* address) { 120 // The problem is we don't know how much to read until we know how long 121 // the string is. And we don't know how long the string is, until we've read 122 // the memory! So, we'll try to read kMaxStringLength bytes 123 // (or as many bytes as we can until we reach the end of the vm region). 124 mach_vm_size_t size_to_end; 125 GetMemoryRegionSize(target_task, address, &size_to_end); 126 127 if (size_to_end > 0) { 128 mach_vm_size_t size_to_read = 129 size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end; 130 131 kern_return_t kr; 132 return ReadTaskMemory(target_task, address, size_to_read, &kr); 133 } 134 135 return NULL; 136} 137 138//============================================================================== 139// Reads an address range from another task. A block of memory is malloced 140// and should be freed by the caller. 141void* ReadTaskMemory(task_port_t target_task, 142 const void* address, 143 size_t length, 144 kern_return_t *kr) { 145 void* result = NULL; 146 int systemPageSize = getpagesize(); 147 148 // use the negative of the page size for the mask to find the page address 149 mach_vm_address_t page_address = 150 reinterpret_cast<mach_vm_address_t>(address) & (-systemPageSize); 151 152 mach_vm_address_t last_page_address = 153 (reinterpret_cast<mach_vm_address_t>(address) + length + 154 (systemPageSize - 1)) & (-systemPageSize); 155 156 mach_vm_size_t page_size = last_page_address - page_address; 157 uint8_t* local_start; 158 uint32_t local_length; 159 160 kern_return_t r; 161 162 r = mach_vm_read(target_task, 163 page_address, 164 page_size, 165 reinterpret_cast<vm_offset_t*>(&local_start), 166 &local_length); 167 168 169 if (kr != NULL) { 170 *kr = r; 171 } 172 173 if (r == KERN_SUCCESS) { 174 result = malloc(length); 175 if (result != NULL) { 176 memcpy(result, 177 &local_start[(mach_vm_address_t)address - page_address], 178 length); 179 } 180 mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length); 181 } 182 183 return result; 184} 185 186#pragma mark - 187 188//============================================================================== 189// Initializes vmaddr_, vmsize_, and slide_ 190void DynamicImage::CalculateMemoryAndVersionInfo() { 191 breakpad_mach_header *header = GetMachHeader(); 192 193 // unless we can process the header, ensure that calls to 194 // IsValid() will return false 195 vmaddr_ = 0; 196 vmsize_ = 0; 197 slide_ = 0; 198 version_ = 0; 199 200 bool foundTextSection = false; 201 bool foundDylibIDCommand = false; 202 203#if __LP64__ 204 if(header->magic != MH_MAGIC_64) { 205 return; 206 } 207#else 208 if(header->magic != MH_MAGIC) { 209 return; 210 } 211#endif 212 213#ifdef __LP64__ 214 const uint32_t segmentLoadCommand = LC_SEGMENT_64; 215#else 216 const uint32_t segmentLoadCommand = LC_SEGMENT; 217#endif 218 219 const struct load_command *cmd = 220 reinterpret_cast<const struct load_command *>(header + 1); 221 222 for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) { 223 if (!foundTextSection) { 224 if (cmd->cmd == segmentLoadCommand) { 225 const breakpad_mach_segment_command *seg = 226 reinterpret_cast<const breakpad_mach_segment_command *>(cmd); 227 228 if (!strcmp(seg->segname, "__TEXT")) { 229 vmaddr_ = seg->vmaddr; 230 vmsize_ = seg->vmsize; 231 slide_ = 0; 232 233 if (seg->fileoff == 0 && seg->filesize != 0) { 234 slide_ = (uintptr_t)GetLoadAddress() - (uintptr_t)seg->vmaddr; 235 } 236 foundTextSection = true; 237 } 238 } 239 } 240 241 if (!foundDylibIDCommand) { 242 if (cmd->cmd == LC_ID_DYLIB) { 243 const struct dylib_command *dc = 244 reinterpret_cast<const struct dylib_command *>(cmd); 245 246 version_ = dc->dylib.current_version; 247 foundDylibIDCommand = true; 248 } 249 } 250 251 if (foundDylibIDCommand && foundTextSection) { 252 return; 253 } 254 255 cmd = reinterpret_cast<const struct load_command *> 256 (reinterpret_cast<const char *>(cmd) + cmd->cmdsize); 257 } 258 259} 260 261void DynamicImage::Print() { 262 const char *path = GetFilePath(); 263 if (!path) { 264 path = "(unknown)"; 265 } 266 printf("%p: %s\n", GetLoadAddress(), path); 267 breakpad_mach_header *header = GetMachHeader(); 268 MachHeader(*header).Print(); 269 printf("vmaddr\t\t: %p\n", reinterpret_cast<void*>(GetVMAddr())); 270 printf("vmsize\t\t: %llu\n", GetVMSize()); 271 printf("slide\t\t: %td\n", GetVMAddrSlide()); 272} 273 274#pragma mark - 275 276//============================================================================== 277// Loads information about dynamically loaded code in the given task. 278DynamicImages::DynamicImages(mach_port_t task) 279 : task_(task) { 280 ReadImageInfoForTask(); 281} 282 283void* DynamicImages::GetDyldAllImageInfosPointer() 284{ 285 286 const char *imageSymbolName = "_dyld_all_image_infos"; 287 const char *dyldPath = "/usr/lib/dyld"; 288#ifndef __LP64__ 289 struct nlist l[8]; 290 memset(l, 0, sizeof(l) ); 291 292 // First we lookup the address of the "_dyld_all_image_infos" struct 293 // which lives in "dyld". This structure contains information about all 294 // of the loaded dynamic images. 295 struct nlist &list = l[0]; 296 list.n_un.n_name = const_cast<char *>(imageSymbolName); 297 nlist(dyldPath,&list); 298 if(list.n_value) { 299 return reinterpret_cast<void*>(list.n_value); 300 } 301 302 return NULL; 303#else 304 struct nlist_64 l[8]; 305 struct nlist_64 &list = l[0]; 306 307 memset(l, 0, sizeof(l) ); 308 309 const char *symbolNames[2] = { imageSymbolName, "\0" }; 310 311 int invalidEntriesCount = breakpad_nlist_64(dyldPath,&list,symbolNames); 312 313 if(invalidEntriesCount != 0) { 314 return NULL; 315 } 316 assert(list.n_value); 317 return reinterpret_cast<void*>(list.n_value); 318#endif 319 320} 321//============================================================================== 322// This code was written using dyld_debug.c (from Darwin) as a guide. 323void DynamicImages::ReadImageInfoForTask() { 324 void *imageList = GetDyldAllImageInfosPointer(); 325 326 if (imageList) { 327 kern_return_t kr; 328 // Read the structure inside of dyld that contains information about 329 // loaded images. We're reading from the desired task's address space. 330 331 // Here we make the assumption that dyld loaded at the same address in 332 // the crashed process vs. this one. This is an assumption made in 333 // "dyld_debug.c" and is said to be nearly always valid. 334 dyld_all_image_infos *dyldInfo = reinterpret_cast<dyld_all_image_infos*> 335 (ReadTaskMemory(task_, 336 reinterpret_cast<void*>(imageList), 337 sizeof(dyld_all_image_infos), &kr)); 338 339 if (dyldInfo) { 340 // number of loaded images 341 int count = dyldInfo->infoArrayCount; 342 343 // Read an array of dyld_image_info structures each containing 344 // information about a loaded image. 345 dyld_image_info *infoArray = reinterpret_cast<dyld_image_info*> 346 (ReadTaskMemory(task_, 347 dyldInfo->infoArray, 348 count*sizeof(dyld_image_info), &kr)); 349 350 image_list_.reserve(count); 351 352 for (int i = 0; i < count; ++i) { 353 dyld_image_info &info = infoArray[i]; 354 355 // First read just the mach_header from the image in the task. 356 breakpad_mach_header *header = reinterpret_cast<breakpad_mach_header*> 357 (ReadTaskMemory(task_, 358 info.load_address_, 359 sizeof(breakpad_mach_header), &kr)); 360 361 if (!header) 362 break; // bail on this dynamic image 363 364 // Now determine the total amount we really want to read based on the 365 // size of the load commands. We need the header plus all of the 366 // load commands. 367 unsigned int header_size = 368 sizeof(breakpad_mach_header) + header->sizeofcmds; 369 370 free(header); 371 372 header = reinterpret_cast<breakpad_mach_header*> 373 (ReadTaskMemory(task_, info.load_address_, header_size, &kr)); 374 375 // Read the file name from the task's memory space. 376 char *file_path = NULL; 377 if (info.file_path_) { 378 // Although we're reading kMaxStringLength bytes, it's copied in the 379 // the DynamicImage constructor below with the correct string length, 380 // so it's not really wasting memory. 381 file_path = reinterpret_cast<char*> 382 (ReadTaskString(task_, info.file_path_)); 383 } 384 385 // Create an object representing this image and add it to our list. 386 DynamicImage *new_image; 387 new_image = new DynamicImage(header, 388 header_size, 389 (breakpad_mach_header*)info.load_address_, 390 file_path, 391 info.file_mod_date_, 392 task_); 393 394 if (new_image->IsValid()) { 395 image_list_.push_back(DynamicImageRef(new_image)); 396 } else { 397 delete new_image; 398 } 399 400 if (file_path) { 401 free(file_path); 402 } 403 } 404 405 free(dyldInfo); 406 free(infoArray); 407 408 // sorts based on loading address 409 sort(image_list_.begin(), image_list_.end() ); 410 // remove duplicates - this happens in certain strange cases 411 // You can see it in DashboardClient when Google Gadgets plugin 412 // is installed. Apple's crash reporter log and gdb "info shared" 413 // both show the same library multiple times at the same address 414 415 vector<DynamicImageRef>::iterator it = unique(image_list_.begin(), 416 image_list_.end() ); 417 image_list_.erase(it, image_list_.end()); 418 } 419 } 420} 421 422//============================================================================== 423DynamicImage *DynamicImages::GetExecutableImage() { 424 int executable_index = GetExecutableImageIndex(); 425 426 if (executable_index >= 0) { 427 return GetImage(executable_index); 428 } 429 430 return NULL; 431} 432 433//============================================================================== 434// returns -1 if failure to find executable 435int DynamicImages::GetExecutableImageIndex() { 436 int image_count = GetImageCount(); 437 438 for (int i = 0; i < image_count; ++i) { 439 DynamicImage *image = GetImage(i); 440 if (image->GetMachHeader()->filetype == MH_EXECUTE) { 441 return i; 442 } 443 } 444 445 return -1; 446} 447 448} // namespace google_breakpad 449