1// Copyright (c) 2010, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 31 32// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and 33// google_breakpad::Mach_O::Reader. See macho_reader.h for details. 34 35#include "common/mac/macho_reader.h" 36 37#include <assert.h> 38#include <stdio.h> 39#include <stdlib.h> 40 41// Unfortunately, CPU_TYPE_ARM is not define for 10.4. 42#if !defined(CPU_TYPE_ARM) 43#define CPU_TYPE_ARM 12 44#endif 45 46#if !defined(CPU_TYPE_ARM_64) 47#define CPU_TYPE_ARM_64 16777228 48#endif 49 50namespace google_breakpad { 51namespace mach_o { 52 53// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its 54// arguments, so you can't place expressions that do necessary work in 55// the argument of an assert. Nor can you assign the result of the 56// expression to a variable and assert that the variable's value is 57// true: you'll get unused variable warnings when NDEBUG is #defined. 58// 59// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that 60// the result is true if NDEBUG is not #defined. 61#if defined(NDEBUG) 62#define ASSERT_ALWAYS_EVAL(x) (x) 63#else 64#define ASSERT_ALWAYS_EVAL(x) assert(x) 65#endif 66 67void FatReader::Reporter::BadHeader() { 68 fprintf(stderr, "%s: file is neither a fat binary file" 69 " nor a Mach-O object file\n", filename_.c_str()); 70} 71 72void FatReader::Reporter::TooShort() { 73 fprintf(stderr, "%s: file too short for the data it claims to contain\n", 74 filename_.c_str()); 75} 76 77void FatReader::Reporter::MisplacedObjectFile() { 78 fprintf(stderr, "%s: file too short for the object files it claims" 79 " to contain\n", filename_.c_str()); 80} 81 82bool FatReader::Read(const uint8_t *buffer, size_t size) { 83 buffer_.start = buffer; 84 buffer_.end = buffer + size; 85 ByteCursor cursor(&buffer_); 86 87 // Fat binaries always use big-endian, so read the magic number in 88 // that endianness. To recognize Mach-O magic numbers, which can use 89 // either endianness, check for both the proper and reversed forms 90 // of the magic numbers. 91 cursor.set_big_endian(true); 92 if (cursor >> magic_) { 93 if (magic_ == FAT_MAGIC) { 94 // How many object files does this fat binary contain? 95 uint32_t object_files_count; 96 if (!(cursor >> object_files_count)) { // nfat_arch 97 reporter_->TooShort(); 98 return false; 99 } 100 101 // Read the list of object files. 102 object_files_.resize(object_files_count); 103 for (size_t i = 0; i < object_files_count; i++) { 104 struct fat_arch *objfile = &object_files_[i]; 105 106 // Read this object file entry, byte-swapping as appropriate. 107 cursor >> objfile->cputype 108 >> objfile->cpusubtype 109 >> objfile->offset 110 >> objfile->size 111 >> objfile->align; 112 if (!cursor) { 113 reporter_->TooShort(); 114 return false; 115 } 116 // Does the file actually have the bytes this entry refers to? 117 size_t fat_size = buffer_.Size(); 118 if (objfile->offset > fat_size || 119 objfile->size > fat_size - objfile->offset) { 120 reporter_->MisplacedObjectFile(); 121 return false; 122 } 123 } 124 125 return true; 126 } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || 127 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { 128 // If this is a little-endian Mach-O file, fix the cursor's endianness. 129 if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) 130 cursor.set_big_endian(false); 131 // Record the entire file as a single entry in the object file list. 132 object_files_.resize(1); 133 134 // Get the cpu type and subtype from the Mach-O header. 135 if (!(cursor >> object_files_[0].cputype 136 >> object_files_[0].cpusubtype)) { 137 reporter_->TooShort(); 138 return false; 139 } 140 141 object_files_[0].offset = 0; 142 object_files_[0].size = static_cast<uint32_t>(buffer_.Size()); 143 // This alignment is correct for 32 and 64-bit x86 and ppc. 144 // See get_align in the lipo source for other architectures: 145 // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c 146 object_files_[0].align = 12; // 2^12 == 4096 147 148 return true; 149 } 150 } 151 152 reporter_->BadHeader(); 153 return false; 154} 155 156void Reader::Reporter::BadHeader() { 157 fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); 158} 159 160void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, 161 cpu_subtype_t cpu_subtype, 162 cpu_type_t expected_cpu_type, 163 cpu_subtype_t expected_cpu_subtype) { 164 fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" 165 " type %d, subtype %d\n", 166 filename_.c_str(), cpu_type, cpu_subtype, 167 expected_cpu_type, expected_cpu_subtype); 168} 169 170void Reader::Reporter::HeaderTruncated() { 171 fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", 172 filename_.c_str()); 173} 174 175void Reader::Reporter::LoadCommandRegionTruncated() { 176 fprintf(stderr, "%s: file too short to hold load command region" 177 " given in Mach-O header\n", filename_.c_str()); 178} 179 180void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, 181 LoadCommandType type) { 182 fprintf(stderr, "%s: file's header claims there are %ld" 183 " load commands, but load command #%ld", 184 filename_.c_str(), claimed, i); 185 if (type) fprintf(stderr, ", of type %d,", type); 186 fprintf(stderr, " extends beyond the end of the load command region\n"); 187} 188 189void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { 190 fprintf(stderr, "%s: the contents of load command #%ld, of type %d," 191 " extend beyond the size given in the load command's header\n", 192 filename_.c_str(), i, type); 193} 194 195void Reader::Reporter::SectionsMissing(const string &name) { 196 fprintf(stderr, "%s: the load command for segment '%s'" 197 " is too short to hold the section headers it claims to have\n", 198 filename_.c_str(), name.c_str()); 199} 200 201void Reader::Reporter::MisplacedSegmentData(const string &name) { 202 fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" 203 " the end of the file\n", filename_.c_str(), name.c_str()); 204} 205 206void Reader::Reporter::MisplacedSectionData(const string §ion, 207 const string &segment) { 208 fprintf(stderr, "%s: the section '%s' in segment '%s'" 209 " claims its contents lie outside the segment's contents\n", 210 filename_.c_str(), section.c_str(), segment.c_str()); 211} 212 213void Reader::Reporter::MisplacedSymbolTable() { 214 fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" 215 " table's contents are located beyond the end of the file\n", 216 filename_.c_str()); 217} 218 219void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { 220 fprintf(stderr, "%s: CPU type %d is not supported\n", 221 filename_.c_str(), cpu_type); 222} 223 224bool Reader::Read(const uint8_t *buffer, 225 size_t size, 226 cpu_type_t expected_cpu_type, 227 cpu_subtype_t expected_cpu_subtype) { 228 assert(!buffer_.start); 229 buffer_.start = buffer; 230 buffer_.end = buffer + size; 231 ByteCursor cursor(&buffer_, true); 232 uint32_t magic; 233 if (!(cursor >> magic)) { 234 reporter_->HeaderTruncated(); 235 return false; 236 } 237 238 if (expected_cpu_type != CPU_TYPE_ANY) { 239 uint32_t expected_magic; 240 // validate that magic matches the expected cpu type 241 switch (expected_cpu_type) { 242 case CPU_TYPE_ARM: 243 case CPU_TYPE_I386: 244 expected_magic = MH_CIGAM; 245 break; 246 case CPU_TYPE_POWERPC: 247 expected_magic = MH_MAGIC; 248 break; 249 case CPU_TYPE_ARM_64: 250 case CPU_TYPE_X86_64: 251 expected_magic = MH_CIGAM_64; 252 break; 253 case CPU_TYPE_POWERPC64: 254 expected_magic = MH_MAGIC_64; 255 break; 256 default: 257 reporter_->UnsupportedCPUType(expected_cpu_type); 258 return false; 259 } 260 261 if (expected_magic != magic) { 262 reporter_->BadHeader(); 263 return false; 264 } 265 } 266 267 // Since the byte cursor is in big-endian mode, a reversed magic number 268 // always indicates a little-endian file, regardless of our own endianness. 269 switch (magic) { 270 case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; 271 case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; 272 case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; 273 case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; 274 default: 275 reporter_->BadHeader(); 276 return false; 277 } 278 cursor.set_big_endian(big_endian_); 279 uint32_t commands_size, reserved; 280 cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ 281 >> commands_size >> flags_; 282 if (bits_64_) 283 cursor >> reserved; 284 if (!cursor) { 285 reporter_->HeaderTruncated(); 286 return false; 287 } 288 289 if (expected_cpu_type != CPU_TYPE_ANY && 290 (expected_cpu_type != cpu_type_ || 291 expected_cpu_subtype != cpu_subtype_)) { 292 reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, 293 expected_cpu_type, expected_cpu_subtype); 294 return false; 295 } 296 297 cursor 298 .PointTo(&load_commands_.start, commands_size) 299 .PointTo(&load_commands_.end, 0); 300 if (!cursor) { 301 reporter_->LoadCommandRegionTruncated(); 302 return false; 303 } 304 305 return true; 306} 307 308bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { 309 ByteCursor list_cursor(&load_commands_, big_endian_); 310 311 for (size_t index = 0; index < load_command_count_; ++index) { 312 // command refers to this load command alone, so that cursor will 313 // refuse to read past the load command's end. But since we haven't 314 // read the size yet, let command initially refer to the entire 315 // remainder of the load command series. 316 ByteBuffer command(list_cursor.here(), list_cursor.Available()); 317 ByteCursor cursor(&command, big_endian_); 318 319 // Read the command type and size --- fields common to all commands. 320 uint32_t type, size; 321 if (!(cursor >> type)) { 322 reporter_->LoadCommandsOverrun(load_command_count_, index, 0); 323 return false; 324 } 325 if (!(cursor >> size) || size > command.Size()) { 326 reporter_->LoadCommandsOverrun(load_command_count_, index, type); 327 return false; 328 } 329 330 // Now that we've read the length, restrict command's range to this 331 // load command only. 332 command.end = command.start + size; 333 334 switch (type) { 335 case LC_SEGMENT: 336 case LC_SEGMENT_64: { 337 Segment segment; 338 segment.bits_64 = (type == LC_SEGMENT_64); 339 size_t word_size = segment.bits_64 ? 8 : 4; 340 cursor.CString(&segment.name, 16); 341 size_t file_offset, file_size; 342 cursor 343 .Read(word_size, false, &segment.vmaddr) 344 .Read(word_size, false, &segment.vmsize) 345 .Read(word_size, false, &file_offset) 346 .Read(word_size, false, &file_size); 347 cursor >> segment.maxprot 348 >> segment.initprot 349 >> segment.nsects 350 >> segment.flags; 351 if (!cursor) { 352 reporter_->LoadCommandTooShort(index, type); 353 return false; 354 } 355 if (file_offset > buffer_.Size() || 356 file_size > buffer_.Size() - file_offset) { 357 reporter_->MisplacedSegmentData(segment.name); 358 return false; 359 } 360 // Mach-O files in .dSYM bundles have the contents of the loaded 361 // segments removed, and their file offsets and file sizes zeroed 362 // out. To help us handle this special case properly, give such 363 // segments' contents NULL starting and ending pointers. 364 if (file_offset == 0 && file_size == 0) { 365 segment.contents.start = segment.contents.end = NULL; 366 } else { 367 segment.contents.start = buffer_.start + file_offset; 368 segment.contents.end = segment.contents.start + file_size; 369 } 370 // The section list occupies the remainder of this load command's space. 371 segment.section_list.start = cursor.here(); 372 segment.section_list.end = command.end; 373 374 if (!handler->SegmentCommand(segment)) 375 return false; 376 break; 377 } 378 379 case LC_SYMTAB: { 380 uint32_t symoff, nsyms, stroff, strsize; 381 cursor >> symoff >> nsyms >> stroff >> strsize; 382 if (!cursor) { 383 reporter_->LoadCommandTooShort(index, type); 384 return false; 385 } 386 // How big are the entries in the symbol table? 387 // sizeof(struct nlist_64) : sizeof(struct nlist), 388 // but be paranoid about alignment vs. target architecture. 389 size_t symbol_size = bits_64_ ? 16 : 12; 390 // How big is the entire symbol array? 391 size_t symbols_size = nsyms * symbol_size; 392 if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || 393 stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { 394 reporter_->MisplacedSymbolTable(); 395 return false; 396 } 397 ByteBuffer entries(buffer_.start + symoff, symbols_size); 398 ByteBuffer names(buffer_.start + stroff, strsize); 399 if (!handler->SymtabCommand(entries, names)) 400 return false; 401 break; 402 } 403 404 default: { 405 if (!handler->UnknownCommand(type, command)) 406 return false; 407 break; 408 } 409 } 410 411 list_cursor.set_here(command.end); 412 } 413 414 return true; 415} 416 417// A load command handler that looks for a segment of a given name. 418class Reader::SegmentFinder : public LoadCommandHandler { 419 public: 420 // Create a load command handler that looks for a segment named NAME, 421 // and sets SEGMENT to describe it if found. 422 SegmentFinder(const string &name, Segment *segment) 423 : name_(name), segment_(segment), found_() { } 424 425 // Return true if the traversal found the segment, false otherwise. 426 bool found() const { return found_; } 427 428 bool SegmentCommand(const Segment &segment) { 429 if (segment.name == name_) { 430 *segment_ = segment; 431 found_ = true; 432 return false; 433 } 434 return true; 435 } 436 437 private: 438 // The name of the segment our creator is looking for. 439 const string &name_; 440 441 // Where we should store the segment if found. (WEAK) 442 Segment *segment_; 443 444 // True if we found the segment. 445 bool found_; 446}; 447 448bool Reader::FindSegment(const string &name, Segment *segment) const { 449 SegmentFinder finder(name, segment); 450 WalkLoadCommands(&finder); 451 return finder.found(); 452} 453 454bool Reader::WalkSegmentSections(const Segment &segment, 455 SectionHandler *handler) const { 456 size_t word_size = segment.bits_64 ? 8 : 4; 457 ByteCursor cursor(&segment.section_list, big_endian_); 458 459 for (size_t i = 0; i < segment.nsects; i++) { 460 Section section; 461 section.bits_64 = segment.bits_64; 462 uint64_t size; 463 uint32_t offset, dummy32; 464 cursor 465 .CString(§ion.section_name, 16) 466 .CString(§ion.segment_name, 16) 467 .Read(word_size, false, §ion.address) 468 .Read(word_size, false, &size) 469 >> offset 470 >> section.align 471 >> dummy32 472 >> dummy32 473 >> section.flags 474 >> dummy32 475 >> dummy32; 476 if (section.bits_64) 477 cursor >> dummy32; 478 if (!cursor) { 479 reporter_->SectionsMissing(segment.name); 480 return false; 481 } 482 if ((section.flags & SECTION_TYPE) == S_ZEROFILL) { 483 // Zero-fill sections have a size, but no contents. 484 section.contents.start = section.contents.end = NULL; 485 } else if (segment.contents.start == NULL && 486 segment.contents.end == NULL) { 487 // Mach-O files in .dSYM bundles have the contents of the loaded 488 // segments removed, and their file offsets and file sizes zeroed 489 // out. However, the sections within those segments still have 490 // non-zero sizes. There's no reason to call MisplacedSectionData in 491 // this case; the caller may just need the section's load 492 // address. But do set the contents' limits to NULL, for safety. 493 section.contents.start = section.contents.end = NULL; 494 } else { 495 if (offset < size_t(segment.contents.start - buffer_.start) || 496 offset > size_t(segment.contents.end - buffer_.start) || 497 size > size_t(segment.contents.end - buffer_.start - offset)) { 498 reporter_->MisplacedSectionData(section.section_name, 499 section.segment_name); 500 return false; 501 } 502 section.contents.start = buffer_.start + offset; 503 section.contents.end = section.contents.start + size; 504 } 505 if (!handler->HandleSection(section)) 506 return false; 507 } 508 return true; 509} 510 511// A SectionHandler that builds a SectionMap for the sections within a 512// given segment. 513class Reader::SectionMapper: public SectionHandler { 514 public: 515 // Create a SectionHandler that populates MAP with an entry for 516 // each section it is given. 517 SectionMapper(SectionMap *map) : map_(map) { } 518 bool HandleSection(const Section §ion) { 519 (*map_)[section.section_name] = section; 520 return true; 521 } 522 private: 523 // The map under construction. (WEAK) 524 SectionMap *map_; 525}; 526 527bool Reader::MapSegmentSections(const Segment &segment, 528 SectionMap *section_map) const { 529 section_map->clear(); 530 SectionMapper mapper(section_map); 531 return WalkSegmentSections(segment, &mapper); 532} 533 534} // namespace mach_o 535} // namespace google_breakpad 536