1// Copyright (c) 2010, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
33// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
34
35#include "common/mac/macho_reader.h"
36
37#include <assert.h>
38#include <stdio.h>
39#include <stdlib.h>
40
41// Unfortunately, CPU_TYPE_ARM is not define for 10.4.
42#if !defined(CPU_TYPE_ARM)
43#define CPU_TYPE_ARM 12
44#endif
45
46#if !defined(CPU_TYPE_ARM_64)
47#define CPU_TYPE_ARM_64 16777228
48#endif
49
50namespace google_breakpad {
51namespace mach_o {
52
53// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
54// arguments, so you can't place expressions that do necessary work in
55// the argument of an assert. Nor can you assign the result of the
56// expression to a variable and assert that the variable's value is
57// true: you'll get unused variable warnings when NDEBUG is #defined.
58//
59// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
60// the result is true if NDEBUG is not #defined.
61#if defined(NDEBUG)
62#define ASSERT_ALWAYS_EVAL(x) (x)
63#else
64#define ASSERT_ALWAYS_EVAL(x) assert(x)
65#endif
66
67void FatReader::Reporter::BadHeader() {
68  fprintf(stderr, "%s: file is neither a fat binary file"
69          " nor a Mach-O object file\n", filename_.c_str());
70}
71
72void FatReader::Reporter::TooShort() {
73  fprintf(stderr, "%s: file too short for the data it claims to contain\n",
74          filename_.c_str());
75}
76
77void FatReader::Reporter::MisplacedObjectFile() {
78  fprintf(stderr, "%s: file too short for the object files it claims"
79          " to contain\n", filename_.c_str());
80}
81
82bool FatReader::Read(const uint8_t *buffer, size_t size) {
83  buffer_.start = buffer;
84  buffer_.end = buffer + size;
85  ByteCursor cursor(&buffer_);
86
87  // Fat binaries always use big-endian, so read the magic number in
88  // that endianness. To recognize Mach-O magic numbers, which can use
89  // either endianness, check for both the proper and reversed forms
90  // of the magic numbers.
91  cursor.set_big_endian(true);
92  if (cursor >> magic_) {
93    if (magic_ == FAT_MAGIC) {
94      // How many object files does this fat binary contain?
95      uint32_t object_files_count;
96      if (!(cursor >> object_files_count)) {  // nfat_arch
97        reporter_->TooShort();
98        return false;
99      }
100
101      // Read the list of object files.
102      object_files_.resize(object_files_count);
103      for (size_t i = 0; i < object_files_count; i++) {
104        struct fat_arch *objfile = &object_files_[i];
105
106        // Read this object file entry, byte-swapping as appropriate.
107        cursor >> objfile->cputype
108               >> objfile->cpusubtype
109               >> objfile->offset
110               >> objfile->size
111               >> objfile->align;
112        if (!cursor) {
113          reporter_->TooShort();
114          return false;
115        }
116        // Does the file actually have the bytes this entry refers to?
117        size_t fat_size = buffer_.Size();
118        if (objfile->offset > fat_size ||
119            objfile->size > fat_size - objfile->offset) {
120          reporter_->MisplacedObjectFile();
121          return false;
122        }
123      }
124
125      return true;
126    } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
127               magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
128      // If this is a little-endian Mach-O file, fix the cursor's endianness.
129      if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
130        cursor.set_big_endian(false);
131      // Record the entire file as a single entry in the object file list.
132      object_files_.resize(1);
133
134      // Get the cpu type and subtype from the Mach-O header.
135      if (!(cursor >> object_files_[0].cputype
136                   >> object_files_[0].cpusubtype)) {
137        reporter_->TooShort();
138        return false;
139      }
140
141      object_files_[0].offset = 0;
142      object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
143      // This alignment is correct for 32 and 64-bit x86 and ppc.
144      // See get_align in the lipo source for other architectures:
145      // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
146      object_files_[0].align = 12;  // 2^12 == 4096
147
148      return true;
149    }
150  }
151
152  reporter_->BadHeader();
153  return false;
154}
155
156void Reader::Reporter::BadHeader() {
157  fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
158}
159
160void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
161                                       cpu_subtype_t cpu_subtype,
162                                       cpu_type_t expected_cpu_type,
163                                       cpu_subtype_t expected_cpu_subtype) {
164  fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
165          " type %d, subtype %d\n",
166          filename_.c_str(), cpu_type, cpu_subtype,
167          expected_cpu_type, expected_cpu_subtype);
168}
169
170void Reader::Reporter::HeaderTruncated() {
171  fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
172          filename_.c_str());
173}
174
175void Reader::Reporter::LoadCommandRegionTruncated() {
176  fprintf(stderr, "%s: file too short to hold load command region"
177          " given in Mach-O header\n", filename_.c_str());
178}
179
180void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
181                                           LoadCommandType type) {
182  fprintf(stderr, "%s: file's header claims there are %ld"
183          " load commands, but load command #%ld",
184          filename_.c_str(), claimed, i);
185  if (type) fprintf(stderr, ", of type %d,", type);
186  fprintf(stderr, " extends beyond the end of the load command region\n");
187}
188
189void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
190  fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
191          " extend beyond the size given in the load command's header\n",
192          filename_.c_str(), i, type);
193}
194
195void Reader::Reporter::SectionsMissing(const string &name) {
196  fprintf(stderr, "%s: the load command for segment '%s'"
197          " is too short to hold the section headers it claims to have\n",
198          filename_.c_str(), name.c_str());
199}
200
201void Reader::Reporter::MisplacedSegmentData(const string &name) {
202  fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
203          " the end of the file\n", filename_.c_str(), name.c_str());
204}
205
206void Reader::Reporter::MisplacedSectionData(const string &section,
207                                            const string &segment) {
208  fprintf(stderr, "%s: the section '%s' in segment '%s'"
209          " claims its contents lie outside the segment's contents\n",
210          filename_.c_str(), section.c_str(), segment.c_str());
211}
212
213void Reader::Reporter::MisplacedSymbolTable() {
214  fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
215          " table's contents are located beyond the end of the file\n",
216          filename_.c_str());
217}
218
219void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
220  fprintf(stderr, "%s: CPU type %d is not supported\n",
221          filename_.c_str(), cpu_type);
222}
223
224bool Reader::Read(const uint8_t *buffer,
225                  size_t size,
226                  cpu_type_t expected_cpu_type,
227                  cpu_subtype_t expected_cpu_subtype) {
228  assert(!buffer_.start);
229  buffer_.start = buffer;
230  buffer_.end = buffer + size;
231  ByteCursor cursor(&buffer_, true);
232  uint32_t magic;
233  if (!(cursor >> magic)) {
234    reporter_->HeaderTruncated();
235    return false;
236  }
237
238  if (expected_cpu_type != CPU_TYPE_ANY) {
239    uint32_t expected_magic;
240    // validate that magic matches the expected cpu type
241    switch (expected_cpu_type) {
242      case CPU_TYPE_ARM:
243      case CPU_TYPE_I386:
244        expected_magic = MH_CIGAM;
245        break;
246      case CPU_TYPE_POWERPC:
247        expected_magic = MH_MAGIC;
248        break;
249      case CPU_TYPE_ARM_64:
250      case CPU_TYPE_X86_64:
251        expected_magic = MH_CIGAM_64;
252        break;
253      case CPU_TYPE_POWERPC64:
254        expected_magic = MH_MAGIC_64;
255        break;
256      default:
257        reporter_->UnsupportedCPUType(expected_cpu_type);
258        return false;
259    }
260
261    if (expected_magic != magic) {
262      reporter_->BadHeader();
263      return false;
264    }
265  }
266
267  // Since the byte cursor is in big-endian mode, a reversed magic number
268  // always indicates a little-endian file, regardless of our own endianness.
269  switch (magic) {
270    case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
271    case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
272    case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
273    case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
274    default:
275      reporter_->BadHeader();
276      return false;
277  }
278  cursor.set_big_endian(big_endian_);
279  uint32_t commands_size, reserved;
280  cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
281         >> commands_size >> flags_;
282  if (bits_64_)
283    cursor >> reserved;
284  if (!cursor) {
285    reporter_->HeaderTruncated();
286    return false;
287  }
288
289  if (expected_cpu_type != CPU_TYPE_ANY &&
290      (expected_cpu_type != cpu_type_ ||
291       expected_cpu_subtype != cpu_subtype_)) {
292    reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
293                              expected_cpu_type, expected_cpu_subtype);
294    return false;
295  }
296
297  cursor
298      .PointTo(&load_commands_.start, commands_size)
299      .PointTo(&load_commands_.end, 0);
300  if (!cursor) {
301    reporter_->LoadCommandRegionTruncated();
302    return false;
303  }
304
305  return true;
306}
307
308bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
309  ByteCursor list_cursor(&load_commands_, big_endian_);
310
311  for (size_t index = 0; index < load_command_count_; ++index) {
312    // command refers to this load command alone, so that cursor will
313    // refuse to read past the load command's end. But since we haven't
314    // read the size yet, let command initially refer to the entire
315    // remainder of the load command series.
316    ByteBuffer command(list_cursor.here(), list_cursor.Available());
317    ByteCursor cursor(&command, big_endian_);
318
319    // Read the command type and size --- fields common to all commands.
320    uint32_t type, size;
321    if (!(cursor >> type)) {
322      reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
323      return false;
324    }
325    if (!(cursor >> size) || size > command.Size()) {
326      reporter_->LoadCommandsOverrun(load_command_count_, index, type);
327      return false;
328    }
329
330    // Now that we've read the length, restrict command's range to this
331    // load command only.
332    command.end = command.start + size;
333
334    switch (type) {
335      case LC_SEGMENT:
336      case LC_SEGMENT_64: {
337        Segment segment;
338        segment.bits_64 = (type == LC_SEGMENT_64);
339        size_t word_size = segment.bits_64 ? 8 : 4;
340        cursor.CString(&segment.name, 16);
341        size_t file_offset, file_size;
342        cursor
343            .Read(word_size, false, &segment.vmaddr)
344            .Read(word_size, false, &segment.vmsize)
345            .Read(word_size, false, &file_offset)
346            .Read(word_size, false, &file_size);
347        cursor >> segment.maxprot
348               >> segment.initprot
349               >> segment.nsects
350               >> segment.flags;
351        if (!cursor) {
352          reporter_->LoadCommandTooShort(index, type);
353          return false;
354        }
355        if (file_offset > buffer_.Size() ||
356            file_size > buffer_.Size() - file_offset) {
357          reporter_->MisplacedSegmentData(segment.name);
358          return false;
359        }
360        // Mach-O files in .dSYM bundles have the contents of the loaded
361        // segments removed, and their file offsets and file sizes zeroed
362        // out. To help us handle this special case properly, give such
363        // segments' contents NULL starting and ending pointers.
364        if (file_offset == 0 && file_size == 0) {
365          segment.contents.start = segment.contents.end = NULL;
366        } else {
367          segment.contents.start = buffer_.start + file_offset;
368          segment.contents.end = segment.contents.start + file_size;
369        }
370        // The section list occupies the remainder of this load command's space.
371        segment.section_list.start = cursor.here();
372        segment.section_list.end = command.end;
373
374        if (!handler->SegmentCommand(segment))
375          return false;
376        break;
377      }
378
379      case LC_SYMTAB: {
380        uint32_t symoff, nsyms, stroff, strsize;
381        cursor >> symoff >> nsyms >> stroff >> strsize;
382        if (!cursor) {
383          reporter_->LoadCommandTooShort(index, type);
384          return false;
385        }
386        // How big are the entries in the symbol table?
387        // sizeof(struct nlist_64) : sizeof(struct nlist),
388        // but be paranoid about alignment vs. target architecture.
389        size_t symbol_size = bits_64_ ? 16 : 12;
390        // How big is the entire symbol array?
391        size_t symbols_size = nsyms * symbol_size;
392        if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
393            stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
394          reporter_->MisplacedSymbolTable();
395          return false;
396        }
397        ByteBuffer entries(buffer_.start + symoff, symbols_size);
398        ByteBuffer names(buffer_.start + stroff, strsize);
399        if (!handler->SymtabCommand(entries, names))
400          return false;
401        break;
402      }
403
404      default: {
405        if (!handler->UnknownCommand(type, command))
406          return false;
407        break;
408      }
409    }
410
411    list_cursor.set_here(command.end);
412  }
413
414  return true;
415}
416
417// A load command handler that looks for a segment of a given name.
418class Reader::SegmentFinder : public LoadCommandHandler {
419 public:
420  // Create a load command handler that looks for a segment named NAME,
421  // and sets SEGMENT to describe it if found.
422  SegmentFinder(const string &name, Segment *segment)
423      : name_(name), segment_(segment), found_() { }
424
425  // Return true if the traversal found the segment, false otherwise.
426  bool found() const { return found_; }
427
428  bool SegmentCommand(const Segment &segment) {
429    if (segment.name == name_) {
430      *segment_ = segment;
431      found_ = true;
432      return false;
433    }
434    return true;
435  }
436
437 private:
438  // The name of the segment our creator is looking for.
439  const string &name_;
440
441  // Where we should store the segment if found. (WEAK)
442  Segment *segment_;
443
444  // True if we found the segment.
445  bool found_;
446};
447
448bool Reader::FindSegment(const string &name, Segment *segment) const {
449  SegmentFinder finder(name, segment);
450  WalkLoadCommands(&finder);
451  return finder.found();
452}
453
454bool Reader::WalkSegmentSections(const Segment &segment,
455                                 SectionHandler *handler) const {
456  size_t word_size = segment.bits_64 ? 8 : 4;
457  ByteCursor cursor(&segment.section_list, big_endian_);
458
459  for (size_t i = 0; i < segment.nsects; i++) {
460    Section section;
461    section.bits_64 = segment.bits_64;
462    uint64_t size;
463    uint32_t offset, dummy32;
464    cursor
465        .CString(&section.section_name, 16)
466        .CString(&section.segment_name, 16)
467        .Read(word_size, false, &section.address)
468        .Read(word_size, false, &size)
469        >> offset
470        >> section.align
471        >> dummy32
472        >> dummy32
473        >> section.flags
474        >> dummy32
475        >> dummy32;
476    if (section.bits_64)
477      cursor >> dummy32;
478    if (!cursor) {
479      reporter_->SectionsMissing(segment.name);
480      return false;
481    }
482    if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
483      // Zero-fill sections have a size, but no contents.
484      section.contents.start = section.contents.end = NULL;
485    } else if (segment.contents.start == NULL &&
486               segment.contents.end == NULL) {
487      // Mach-O files in .dSYM bundles have the contents of the loaded
488      // segments removed, and their file offsets and file sizes zeroed
489      // out.  However, the sections within those segments still have
490      // non-zero sizes.  There's no reason to call MisplacedSectionData in
491      // this case; the caller may just need the section's load
492      // address. But do set the contents' limits to NULL, for safety.
493      section.contents.start = section.contents.end = NULL;
494    } else {
495      if (offset < size_t(segment.contents.start - buffer_.start) ||
496          offset > size_t(segment.contents.end - buffer_.start) ||
497          size > size_t(segment.contents.end - buffer_.start - offset)) {
498        reporter_->MisplacedSectionData(section.section_name,
499                                        section.segment_name);
500        return false;
501      }
502      section.contents.start = buffer_.start + offset;
503      section.contents.end = section.contents.start + size;
504    }
505    if (!handler->HandleSection(section))
506      return false;
507  }
508  return true;
509}
510
511// A SectionHandler that builds a SectionMap for the sections within a
512// given segment.
513class Reader::SectionMapper: public SectionHandler {
514 public:
515  // Create a SectionHandler that populates MAP with an entry for
516  // each section it is given.
517  SectionMapper(SectionMap *map) : map_(map) { }
518  bool HandleSection(const Section &section) {
519    (*map_)[section.section_name] = section;
520    return true;
521  }
522 private:
523  // The map under construction. (WEAK)
524  SectionMap *map_;
525};
526
527bool Reader::MapSegmentSections(const Segment &segment,
528                                SectionMap *section_map) const {
529  section_map->clear();
530  SectionMapper mapper(section_map);
531  return WalkSegmentSections(segment, &mapper);
532}
533
534}  // namespace mach_o
535}  // namespace google_breakpad
536