1// -*- mode: C++ -*-
2
3// Copyright (c) 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// macho_reader.h: A class for parsing Mach-O files.
35
36#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
37#define BREAKPAD_COMMON_MAC_MACHO_READER_H_
38
39#include <mach-o/loader.h>
40#include <mach-o/fat.h>
41#include <stdint.h>
42#include <stdlib.h>
43#include <unistd.h>
44
45#include <map>
46#include <string>
47#include <vector>
48
49#include "common/byte_cursor.h"
50
51namespace google_breakpad {
52namespace mach_o {
53
54using std::map;
55using std::string;
56using std::vector;
57
58// The Mac headers don't specify particular types for these groups of
59// constants, but defining them here provides some documentation
60// value.  We also give them the same width as the fields in which
61// they appear, which makes them a bit easier to use with ByteCursors.
62typedef uint32_t Magic;
63typedef uint32_t FileType;
64typedef uint32_t FileFlags;
65typedef uint32_t LoadCommandType;
66typedef uint32_t SegmentFlags;
67typedef uint32_t SectionFlags;
68
69// A parser for fat binary files, used to store universal binaries.
70// When applied to a (non-fat) Mach-O file, this behaves as if the
71// file were a fat file containing a single object file.
72class FatReader {
73 public:
74
75  // A class for reporting errors found while parsing fat binary files. The
76  // default definitions of these methods print messages to stderr.
77  class Reporter {
78   public:
79    // Create a reporter that attributes problems to |filename|.
80    explicit Reporter(const string &filename) : filename_(filename) { }
81
82    virtual ~Reporter() { }
83
84    // The data does not begin with a fat binary or Mach-O magic number.
85    // This is a fatal error.
86    virtual void BadHeader();
87
88    // The Mach-O fat binary file ends abruptly, without enough space
89    // to contain an object file it claims is present.
90    virtual void MisplacedObjectFile();
91
92    // The file ends abruptly: either it is not large enough to hold a
93    // complete header, or the header implies that contents are present
94    // beyond the actual end of the file.
95    virtual void TooShort();
96
97   private:
98    // The filename to which the reader should attribute problems.
99    string filename_;
100  };
101
102  // Create a fat binary file reader that uses |reporter| to report problems.
103  explicit FatReader(Reporter *reporter) : reporter_(reporter) { }
104
105  // Read the |size| bytes at |buffer| as a fat binary file. On success,
106  // return true; on failure, report the problem to reporter_ and return
107  // false.
108  //
109  // If the data is a plain Mach-O file, rather than a fat binary file,
110  // then the reader behaves as if it had found a fat binary file whose
111  // single object file is the Mach-O file.
112  bool Read(const uint8_t *buffer, size_t size);
113
114  // Return an array of 'struct fat_arch' structures describing the
115  // object files present in this fat binary file. Set |size| to the
116  // number of elements in the array.
117  //
118  // Assuming Read returned true, the entries are validated: it is
119  // safe to assume that the offsets and sizes in each 'struct
120  // fat_arch' refer to subranges of the bytes passed to Read.
121  //
122  // If there are no object files in this fat binary, then this
123  // function can return NULL.
124  //
125  // The array is owned by this FatReader instance; it will be freed when
126  // this FatReader is destroyed.
127  //
128  // This function returns a C-style array instead of a vector to make it
129  // possible to use the result with OS X functions like NXFindBestFatArch,
130  // so that the symbol dumper will behave consistently with other OS X
131  // utilities that work with fat binaries.
132  const struct fat_arch *object_files(size_t *count) const {
133    *count = object_files_.size();
134    if (object_files_.size() > 0)
135      return &object_files_[0];
136    return NULL;
137  }
138
139 private:
140  // We use this to report problems parsing the file's contents. (WEAK)
141  Reporter *reporter_;
142
143  // The contents of the fat binary or Mach-O file we're parsing. We do not
144  // own the storage it refers to.
145  ByteBuffer buffer_;
146
147  // The magic number of this binary, in host byte order.
148  Magic magic_;
149
150  // The list of object files in this binary.
151  // object_files_.size() == fat_header.nfat_arch
152  vector<struct fat_arch> object_files_;
153};
154
155// A segment in a Mach-O file. All these fields have been byte-swapped as
156// appropriate for use by the executing architecture.
157struct Segment {
158  // The ByteBuffers below point into the bytes passed to the Reader that
159  // created this Segment.
160
161  ByteBuffer section_list;    // This segment's section list.
162  ByteBuffer contents;        // This segment's contents.
163
164  // This segment's name.
165  string name;
166
167  // The address at which this segment should be loaded in memory. If
168  // bits_64 is false, only the bottom 32 bits of this value are valid.
169  uint64_t vmaddr;
170
171  // The size of this segment when loaded into memory. This may be larger
172  // than contents.Size(), in which case the extra area will be
173  // initialized with zeros. If bits_64 is false, only the bottom 32 bits
174  // of this value are valid.
175  uint64_t vmsize;
176
177  // The maximum and initial VM protection of this segment's contents.
178  uint32_t maxprot;
179  uint32_t initprot;
180
181  // The number of sections in section_list.
182  uint32_t nsects;
183
184  // Flags describing this segment, from SegmentFlags.
185  uint32_t flags;
186
187  // True if this is a 64-bit section; false if it is a 32-bit section.
188  bool bits_64;
189};
190
191// A section in a Mach-O file. All these fields have been byte-swapped as
192// appropriate for use by the executing architecture.
193struct Section {
194  // This section's contents. This points into the bytes passed to the
195  // Reader that created this Section.
196  ByteBuffer contents;
197
198  // This section's name.
199  string section_name;  // section[_64].sectname
200  // The name of the segment this section belongs to.
201  string segment_name;  // section[_64].segname
202
203  // The address at which this section's contents should be loaded in
204  // memory. If bits_64 is false, only the bottom 32 bits of this value
205  // are valid.
206  uint64_t address;
207
208  // The contents of this section should be loaded into memory at an
209  // address which is a multiple of (two raised to this power).
210  uint32_t align;
211
212  // Flags from SectionFlags describing the section's contents.
213  uint32_t flags;
214
215  // We don't support reading relocations yet.
216
217  // True if this is a 64-bit section; false if it is a 32-bit section.
218  bool bits_64;
219};
220
221// A map from section names to Sections.
222typedef map<string, Section> SectionMap;
223
224// A reader for a Mach-O file.
225//
226// This does not handle fat binaries; see FatReader above. FatReader
227// provides a friendly interface for parsing data that could be either a
228// fat binary or a Mach-O file.
229class Reader {
230 public:
231
232  // A class for reporting errors found while parsing Mach-O files. The
233  // default definitions of these member functions print messages to
234  // stderr.
235  class Reporter {
236   public:
237    // Create a reporter that attributes problems to |filename|.
238    explicit Reporter(const string &filename) : filename_(filename) { }
239    virtual ~Reporter() { }
240
241    // Reporter functions for fatal errors return void; the reader will
242    // definitely return an error to its caller after calling them
243
244    // The data does not begin with a Mach-O magic number, or the magic
245    // number does not match the expected value for the cpu architecture.
246    // This is a fatal error.
247    virtual void BadHeader();
248
249    // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
250    // does not match the expected CPU architecture
251    // (|expected_cpu_type|, |expected_cpu_subtype|).
252    virtual void CPUTypeMismatch(cpu_type_t cpu_type,
253                                 cpu_subtype_t cpu_subtype,
254                                 cpu_type_t expected_cpu_type,
255                                 cpu_subtype_t expected_cpu_subtype);
256
257    // The file ends abruptly: either it is not large enough to hold a
258    // complete header, or the header implies that contents are present
259    // beyond the actual end of the file.
260    virtual void HeaderTruncated();
261
262    // The file's load command region, as given in the Mach-O header, is
263    // too large for the file.
264    virtual void LoadCommandRegionTruncated();
265
266    // The file's Mach-O header claims the file contains |claimed| load
267    // commands, but the I'th load command, of type |type|, extends beyond
268    // the end of the load command region, as given by the Mach-O header.
269    // If |type| is zero, the command's type was unreadable.
270    virtual void LoadCommandsOverrun(size_t claimed, size_t i,
271                                     LoadCommandType type);
272
273    // The contents of the |i|'th load command, of type |type|, extend beyond
274    // the size given in the load command's header.
275    virtual void LoadCommandTooShort(size_t i, LoadCommandType type);
276
277    // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
278    // |name| is too short to hold the sections that its header says it does.
279    // (This more specific than LoadCommandTooShort.)
280    virtual void SectionsMissing(const string &name);
281
282    // The segment named |name| claims that its contents lie beyond the end
283    // of the file.
284    virtual void MisplacedSegmentData(const string &name);
285
286    // The section named |section| in the segment named |segment| claims that
287    // its contents do not lie entirely within the segment.
288    virtual void MisplacedSectionData(const string &section,
289                                      const string &segment);
290
291    // The LC_SYMTAB command claims that symbol table contents are located
292    // beyond the end of the file.
293    virtual void MisplacedSymbolTable();
294
295    // An attempt was made to read a Mach-O file of the unsupported
296    // CPU architecture |cpu_type|.
297    virtual void UnsupportedCPUType(cpu_type_t cpu_type);
298
299   private:
300    string filename_;
301  };
302
303  // A handler for sections parsed from a segment. The WalkSegmentSections
304  // member function accepts an instance of this class, and applies it to
305  // each section defined in a given segment.
306  class SectionHandler {
307   public:
308    virtual ~SectionHandler() { }
309
310    // Called to report that the segment's section list contains |section|.
311    // This should return true if the iteration should continue, or false
312    // if it should stop.
313    virtual bool HandleSection(const Section &section) = 0;
314  };
315
316  // A handler for the load commands in a Mach-O file.
317  class LoadCommandHandler {
318   public:
319    LoadCommandHandler() { }
320    virtual ~LoadCommandHandler() { }
321
322    // When called from WalkLoadCommands, the following handler functions
323    // should return true if they wish to continue iterating over the load
324    // command list, or false if they wish to stop iterating.
325    //
326    // When called from LoadCommandIterator::Handle or Reader::Handle,
327    // these functions' return values are simply passed through to Handle's
328    // caller.
329    //
330    // The definitions provided by this base class simply return true; the
331    // default is to silently ignore sections whose member functions the
332    // subclass doesn't override.
333
334    // COMMAND is load command we don't recognize. We provide only the
335    // command type and a ByteBuffer enclosing the command's data (If we
336    // cannot parse the command type or its size, we call
337    // reporter_->IncompleteLoadCommand instead.)
338    virtual bool UnknownCommand(LoadCommandType type,
339                                const ByteBuffer &contents) {
340      return true;
341    }
342
343    // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
344    // with the properties given in |segment|.
345    virtual bool SegmentCommand(const Segment &segment) {
346      return true;
347    }
348
349    // The load command is LC_SYMTAB. |entries| holds the array of nlist
350    // entries, and |names| holds the strings the entries refer to.
351    virtual bool SymtabCommand(const ByteBuffer &entries,
352                               const ByteBuffer &names) {
353      return true;
354    }
355
356    // Add handler functions for more load commands here as needed.
357  };
358
359  // Create a Mach-O file reader that reports problems to |reporter|.
360  explicit Reader(Reporter *reporter)
361      : reporter_(reporter) { }
362
363  // Read the given data as a Mach-O file. The reader retains pointers
364  // into the data passed, so the data should live as long as the reader
365  // does. On success, return true; on failure, return false.
366  //
367  // At most one of these functions should be invoked once on each Reader
368  // instance.
369  bool Read(const uint8_t *buffer,
370            size_t size,
371            cpu_type_t expected_cpu_type,
372            cpu_subtype_t expected_cpu_subtype);
373  bool Read(const ByteBuffer &buffer,
374            cpu_type_t expected_cpu_type,
375            cpu_subtype_t expected_cpu_subtype) {
376    return Read(buffer.start,
377                buffer.Size(),
378                expected_cpu_type,
379                expected_cpu_subtype);
380  }
381
382  // Return this file's characteristics, as found in the Mach-O header.
383  cpu_type_t    cpu_type()    const { return cpu_type_; }
384  cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
385  FileType      file_type()   const { return file_type_; }
386  FileFlags     flags()       const { return flags_; }
387
388  // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
389  // Mach-O file.
390  bool bits_64() const { return bits_64_; }
391
392  // Return true if this is a big-endian Mach-O file, false if it is
393  // little-endian.
394  bool big_endian() const { return big_endian_; }
395
396  // Apply |handler| to each load command in this Mach-O file, stopping when
397  // a handler function returns false. If we encounter a malformed load
398  // command, report it via reporter_ and return false. Return true if all
399  // load commands were parseable and all handlers returned true.
400  bool WalkLoadCommands(LoadCommandHandler *handler) const;
401
402  // Set |segment| to describe the segment named |name|, if present. If
403  // found, |segment|'s byte buffers refer to a subregion of the bytes
404  // passed to Read. If we find the section, return true; otherwise,
405  // return false.
406  bool FindSegment(const string &name, Segment *segment) const;
407
408  // Apply |handler| to each section defined in |segment|. If |handler| returns
409  // false, stop iterating and return false. If all calls to |handler| return
410  // true and we reach the end of the section list, return true.
411  bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
412    const;
413
414  // Clear |section_map| and then populate it with a map of the sections
415  // in |segment|, from section names to Section structures.
416  // Each Section's contents refer to bytes in |segment|'s contents.
417  // On success, return true; if a problem occurs, report it and return false.
418  bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
419    const;
420
421 private:
422  // Used internally.
423  class SegmentFinder;
424  class SectionMapper;
425
426  // We use this to report problems parsing the file's contents. (WEAK)
427  Reporter *reporter_;
428
429  // The contents of the Mach-O file we're parsing. We do not own the
430  // storage it refers to.
431  ByteBuffer buffer_;
432
433  // True if this file is big-endian.
434  bool big_endian_;
435
436  // True if this file is a 64-bit Mach-O file.
437  bool bits_64_;
438
439  // This file's cpu type and subtype.
440  cpu_type_t cpu_type_;        // mach_header[_64].cputype
441  cpu_subtype_t cpu_subtype_;  // mach_header[_64].cpusubtype
442
443  // This file's type.
444  FileType file_type_;         // mach_header[_64].filetype
445
446  // The region of buffer_ occupied by load commands.
447  ByteBuffer load_commands_;
448
449  // The number of load commands in load_commands_.
450  uint32_t load_command_count_;  // mach_header[_64].ncmds
451
452  // This file's header flags.
453  FileFlags flags_;
454};
455
456}  // namespace mach_o
457}  // namespace google_breakpad
458
459#endif  // BREAKPAD_COMMON_MAC_MACHO_READER_H_
460