1// -*- mode: c++ -*-
2
3// Copyright (c) 2011, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// dump_syms.mm: Create a symbol file for use with minidumps
35
36#include "common/mac/dump_syms.h"
37
38#include <Foundation/Foundation.h>
39#include <mach-o/arch.h>
40#include <mach-o/fat.h>
41#include <stdio.h>
42
43#include <ostream>
44#include <string>
45#include <vector>
46
47#include "common/dwarf/bytereader-inl.h"
48#include "common/dwarf/dwarf2reader.h"
49#include "common/dwarf_cfi_to_module.h"
50#include "common/dwarf_cu_to_module.h"
51#include "common/dwarf_line_to_module.h"
52#include "common/mac/file_id.h"
53#include "common/mac/arch_utilities.h"
54#include "common/mac/macho_reader.h"
55#include "common/module.h"
56#include "common/scoped_ptr.h"
57#include "common/stabs_reader.h"
58#include "common/stabs_to_module.h"
59#include "common/symbol_data.h"
60
61#ifndef CPU_TYPE_ARM
62#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
63#endif //  CPU_TYPE_ARM
64
65#ifndef CPU_TYPE_ARM64
66#define CPU_TYPE_ARM64 (static_cast<cpu_type_t>(16777228))
67#endif  // CPU_TYPE_ARM64
68
69using dwarf2reader::ByteReader;
70using google_breakpad::DwarfCUToModule;
71using google_breakpad::DwarfLineToModule;
72using google_breakpad::FileID;
73using google_breakpad::mach_o::FatReader;
74using google_breakpad::mach_o::Section;
75using google_breakpad::mach_o::Segment;
76using google_breakpad::Module;
77using google_breakpad::StabsReader;
78using google_breakpad::StabsToModule;
79using google_breakpad::scoped_ptr;
80using std::make_pair;
81using std::pair;
82using std::string;
83using std::vector;
84
85namespace google_breakpad {
86
87bool DumpSymbols::Read(NSString *filename) {
88  if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) {
89    fprintf(stderr, "Object file does not exist: %s\n",
90            [filename fileSystemRepresentation]);
91    return false;
92  }
93
94  input_pathname_ = [filename retain];
95
96  // Does this filename refer to a dSYM bundle?
97  NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_];
98
99  if (bundle) {
100    // Filenames referring to bundles usually have names of the form
101    // "<basename>.dSYM"; however, if the user has specified a wrapper
102    // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings),
103    // then the name may have the form "<basename>.<extension>.dSYM". In
104    // either case, the resource name for the file containing the DWARF
105    // info within the bundle is <basename>.
106    //
107    // Since there's no way to tell how much to strip off, remove one
108    // extension at a time, and use the first one that
109    // pathForResource:ofType:inDirectory likes.
110    NSString *base_name = [input_pathname_ lastPathComponent];
111    NSString *dwarf_resource;
112
113    do {
114      NSString *new_base_name = [base_name stringByDeletingPathExtension];
115
116      // If stringByDeletingPathExtension returned the name unchanged, then
117      // there's nothing more for us to strip off --- lose.
118      if ([new_base_name isEqualToString:base_name]) {
119        fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
120                [input_pathname_ fileSystemRepresentation]);
121        return false;
122      }
123
124      // Take the shortened result as our new base_name.
125      base_name = new_base_name;
126
127      // Try to find a DWARF resource in the bundle under the new base_name.
128      dwarf_resource = [bundle pathForResource:base_name
129                        ofType:nil inDirectory:@"DWARF"];
130    } while (!dwarf_resource);
131
132    object_filename_ = [dwarf_resource retain];
133  } else {
134    object_filename_ = [input_pathname_ retain];
135  }
136
137  // Read the file's contents into memory.
138  //
139  // The documentation for dataWithContentsOfMappedFile says:
140  //
141  //     Because of file mapping restrictions, this method should only be
142  //     used if the file is guaranteed to exist for the duration of the
143  //     data object’s existence. It is generally safer to use the
144  //     dataWithContentsOfFile: method.
145  //
146  // I gather this means that OS X doesn't have (or at least, that method
147  // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the
148  // process appears to get its own copy of the data, and changes to the
149  // file don't affect memory and vice versa).
150  NSError *error;
151  contents_ = [NSData dataWithContentsOfFile:object_filename_
152                                     options:0
153                                       error:&error];
154  if (!contents_) {
155    fprintf(stderr, "Error reading object file: %s: %s\n",
156            [object_filename_ fileSystemRepresentation],
157            [[error localizedDescription] UTF8String]);
158    return false;
159  }
160  [contents_ retain];
161
162  // Get the list of object files present in the file.
163  FatReader::Reporter fat_reporter([object_filename_
164                                    fileSystemRepresentation]);
165  FatReader fat_reader(&fat_reporter);
166  if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]),
167                       [contents_ length])) {
168    return false;
169  }
170
171  // Get our own copy of fat_reader's object file list.
172  size_t object_files_count;
173  const struct fat_arch *object_files =
174    fat_reader.object_files(&object_files_count);
175  if (object_files_count == 0) {
176    fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
177            [object_filename_ fileSystemRepresentation]);
178    return false;
179  }
180  object_files_.resize(object_files_count);
181  memcpy(&object_files_[0], object_files,
182         sizeof(struct fat_arch) * object_files_count);
183
184  return true;
185}
186
187bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type,
188                                  cpu_subtype_t cpu_subtype) {
189  // Find the best match for the architecture the user requested.
190  const struct fat_arch *best_match
191    = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0],
192                        static_cast<uint32_t>(object_files_.size()));
193  if (!best_match) return false;
194
195  // Record the selected object file.
196  selected_object_file_ = best_match;
197  return true;
198}
199
200bool DumpSymbols::SetArchitecture(const std::string &arch_name) {
201  bool arch_set = false;
202  const NXArchInfo *arch_info =
203      google_breakpad::BreakpadGetArchInfoFromName(arch_name.c_str());
204  if (arch_info) {
205    arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype);
206  }
207  return arch_set;
208}
209
210string DumpSymbols::Identifier() {
211  FileID file_id([object_filename_ fileSystemRepresentation]);
212  unsigned char identifier_bytes[16];
213  cpu_type_t cpu_type = selected_object_file_->cputype;
214  cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
215  if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
216    fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
217            [object_filename_ fileSystemRepresentation]);
218    return "";
219  }
220
221  char identifier_string[40];
222  FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
223                                    sizeof(identifier_string));
224
225  string compacted(identifier_string);
226  for(size_t i = compacted.find('-'); i != string::npos;
227      i = compacted.find('-', i))
228    compacted.erase(i, 1);
229
230  return compacted;
231}
232
233// A line-to-module loader that accepts line number info parsed by
234// dwarf2reader::LineInfo and populates a Module and a line vector
235// with the results.
236class DumpSymbols::DumperLineToModule:
237      public DwarfCUToModule::LineToModuleHandler {
238 public:
239  // Create a line-to-module converter using BYTE_READER.
240  DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
241      : byte_reader_(byte_reader) { }
242
243  void StartCompilationUnit(const string& compilation_dir) {
244    compilation_dir_ = compilation_dir;
245  }
246
247  void ReadProgram(const char *program, uint64 length,
248                   Module *module, vector<Module::Line> *lines) {
249    DwarfLineToModule handler(module, compilation_dir_, lines);
250    dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
251    parser.Start();
252  }
253 private:
254  string compilation_dir_;
255  dwarf2reader::ByteReader *byte_reader_;  // WEAK
256};
257
258bool DumpSymbols::ReadDwarf(google_breakpad::Module *module,
259                            const mach_o::Reader &macho_reader,
260                            const mach_o::SectionMap &dwarf_sections,
261                            bool handle_inter_cu_refs) const {
262  // Build a byte reader of the appropriate endianness.
263  ByteReader byte_reader(macho_reader.big_endian()
264                         ? dwarf2reader::ENDIANNESS_BIG
265                         : dwarf2reader::ENDIANNESS_LITTLE);
266
267  // Construct a context for this file.
268  DwarfCUToModule::FileContext file_context(selected_object_name_,
269                                            module,
270                                            handle_inter_cu_refs);
271
272  // Build a dwarf2reader::SectionMap from our mach_o::SectionMap.
273  for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
274       it != dwarf_sections.end(); ++it) {
275    file_context.AddSectionToSectionMap(
276        it->first,
277        reinterpret_cast<const char *>(it->second.contents.start),
278        it->second.contents.Size());
279  }
280
281  // Find the __debug_info section.
282  dwarf2reader::SectionMap::const_iterator debug_info_entry =
283      file_context.section_map().find("__debug_info");
284  assert(debug_info_entry != file_context.section_map().end());
285  const std::pair<const char*, uint64>& debug_info_section =
286      debug_info_entry->second;
287  // There had better be a __debug_info section!
288  if (!debug_info_section.first) {
289    fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
290            selected_object_name_.c_str());
291    return false;
292  }
293
294  // Build a line-to-module loader for the root handler to use.
295  DumperLineToModule line_to_module(&byte_reader);
296
297  // Walk the __debug_info section, one compilation unit at a time.
298  uint64 debug_info_length = debug_info_section.second;
299  for (uint64 offset = 0; offset < debug_info_length;) {
300    // Make a handler for the root DIE that populates MODULE with the
301    // debug info.
302    DwarfCUToModule::WarningReporter reporter(selected_object_name_,
303                                              offset);
304    DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
305    // Make a Dwarf2Handler that drives our DIEHandler.
306    dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
307    // Make a DWARF parser for the compilation unit at OFFSET.
308    dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map(),
309                                               offset,
310                                               &byte_reader,
311                                               &die_dispatcher);
312    // Process the entire compilation unit; get the offset of the next.
313    offset += dwarf_reader.Start();
314  }
315
316  return true;
317}
318
319bool DumpSymbols::ReadCFI(google_breakpad::Module *module,
320                          const mach_o::Reader &macho_reader,
321                          const mach_o::Section &section,
322                          bool eh_frame) const {
323  // Find the appropriate set of register names for this file's
324  // architecture.
325  vector<string> register_names;
326  switch (macho_reader.cpu_type()) {
327    case CPU_TYPE_X86:
328      register_names = DwarfCFIToModule::RegisterNames::I386();
329      break;
330    case CPU_TYPE_X86_64:
331      register_names = DwarfCFIToModule::RegisterNames::X86_64();
332      break;
333    case CPU_TYPE_ARM:
334      register_names = DwarfCFIToModule::RegisterNames::ARM();
335      break;
336    case CPU_TYPE_ARM64:
337      register_names = DwarfCFIToModule::RegisterNames::ARM64();
338      break;
339    default: {
340      const NXArchInfo *arch = google_breakpad::BreakpadGetArchInfoFromCpuType(
341          macho_reader.cpu_type(), macho_reader.cpu_subtype());
342      fprintf(stderr, "%s: cannot convert DWARF call frame information for ",
343              selected_object_name_.c_str());
344      if (arch)
345        fprintf(stderr, "architecture '%s'", arch->name);
346      else
347        fprintf(stderr, "architecture %d,%d",
348                macho_reader.cpu_type(), macho_reader.cpu_subtype());
349      fprintf(stderr, " to Breakpad symbol file: no register name table\n");
350      return false;
351    }
352  }
353
354  // Find the call frame information and its size.
355  const char *cfi = reinterpret_cast<const char *>(section.contents.start);
356  size_t cfi_size = section.contents.Size();
357
358  // Plug together the parser, handler, and their entourages.
359  DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
360                                             section.section_name);
361  DwarfCFIToModule handler(module, register_names, &module_reporter);
362  dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ?
363                                       dwarf2reader::ENDIANNESS_BIG :
364                                       dwarf2reader::ENDIANNESS_LITTLE);
365  byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
366  // At the moment, according to folks at Apple and some cursory
367  // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
368  // this is the only base address the CFI parser will need.
369  byte_reader.SetCFIDataBase(section.address, cfi);
370
371  dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
372                                                       section.section_name);
373  dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
374                                     &byte_reader, &handler, &dwarf_reporter,
375                                     eh_frame);
376  parser.Start();
377  return true;
378}
379
380// A LoadCommandHandler that loads whatever debugging data it finds into a
381// Module.
382class DumpSymbols::LoadCommandDumper:
383      public mach_o::Reader::LoadCommandHandler {
384 public:
385  // Create a load command dumper handling load commands from READER's
386  // file, and adding data to MODULE.
387  LoadCommandDumper(const DumpSymbols &dumper,
388                    google_breakpad::Module *module,
389                    const mach_o::Reader &reader,
390                    SymbolData symbol_data,
391                    bool handle_inter_cu_refs)
392      : dumper_(dumper),
393        module_(module),
394        reader_(reader),
395        symbol_data_(symbol_data),
396        handle_inter_cu_refs_(handle_inter_cu_refs) { }
397
398  bool SegmentCommand(const mach_o::Segment &segment);
399  bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings);
400
401 private:
402  const DumpSymbols &dumper_;
403  google_breakpad::Module *module_;  // WEAK
404  const mach_o::Reader &reader_;
405  const SymbolData symbol_data_;
406  const bool handle_inter_cu_refs_;
407};
408
409bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) {
410  mach_o::SectionMap section_map;
411  if (!reader_.MapSegmentSections(segment, &section_map))
412    return false;
413
414  if (segment.name == "__TEXT") {
415    module_->SetLoadAddress(segment.vmaddr);
416    if (symbol_data_ != NO_CFI) {
417      mach_o::SectionMap::const_iterator eh_frame =
418          section_map.find("__eh_frame");
419      if (eh_frame != section_map.end()) {
420        // If there is a problem reading this, don't treat it as a fatal error.
421        dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
422      }
423    }
424    return true;
425  }
426
427  if (segment.name == "__DWARF") {
428    if (symbol_data_ != ONLY_CFI) {
429      if (!dumper_.ReadDwarf(module_, reader_, section_map,
430                             handle_inter_cu_refs_)) {
431        return false;
432      }
433    }
434    if (symbol_data_ != NO_CFI) {
435      mach_o::SectionMap::const_iterator debug_frame
436          = section_map.find("__debug_frame");
437      if (debug_frame != section_map.end()) {
438        // If there is a problem reading this, don't treat it as a fatal error.
439        dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
440      }
441    }
442  }
443
444  return true;
445}
446
447bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries,
448                                                   const ByteBuffer &strings) {
449  StabsToModule stabs_to_module(module_);
450  // Mac OS X STABS are never "unitized", and the size of the 'value' field
451  // matches the address size of the executable.
452  StabsReader stabs_reader(entries.start, entries.Size(),
453                           strings.start, strings.Size(),
454                           reader_.big_endian(),
455                           reader_.bits_64() ? 8 : 4,
456                           true,
457                           &stabs_to_module);
458  if (!stabs_reader.Process())
459    return false;
460  stabs_to_module.Finalize();
461  return true;
462}
463
464bool DumpSymbols::ReadSymbolData(Module** out_module) {
465  // Select an object file, if SetArchitecture hasn't been called to set one
466  // explicitly.
467  if (!selected_object_file_) {
468    // If there's only one architecture, that's the one.
469    if (object_files_.size() == 1)
470      selected_object_file_ = &object_files_[0];
471    else {
472      // Look for an object file whose architecture matches our own.
473      const NXArchInfo *local_arch = NXGetLocalArchInfo();
474      if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) {
475        fprintf(stderr, "%s: object file contains more than one"
476                " architecture, none of which match the current"
477                " architecture; specify an architecture explicitly"
478                " with '-a ARCH' to resolve the ambiguity\n",
479                [object_filename_ fileSystemRepresentation]);
480        return false;
481      }
482    }
483  }
484
485  assert(selected_object_file_);
486
487  // Find the name of the selected file's architecture, to appear in
488  // the MODULE record and in error messages.
489  const NXArchInfo *selected_arch_info =
490      google_breakpad::BreakpadGetArchInfoFromCpuType(
491          selected_object_file_->cputype, selected_object_file_->cpusubtype);
492
493  const char *selected_arch_name = selected_arch_info->name;
494  if (strcmp(selected_arch_name, "i386") == 0)
495    selected_arch_name = "x86";
496
497  // Produce a name to use in error messages that includes the
498  // filename, and the architecture, if there is more than one.
499  selected_object_name_ = [object_filename_ UTF8String];
500  if (object_files_.size() > 1) {
501    selected_object_name_ += ", architecture ";
502    selected_object_name_ + selected_arch_name;
503  }
504
505  // Compute a module name, to appear in the MODULE record.
506  NSString *module_name = [object_filename_ lastPathComponent];
507
508  // Choose an identifier string, to appear in the MODULE record.
509  string identifier = Identifier();
510  if (identifier.empty())
511    return false;
512  identifier += "0";
513
514  // Create a module to hold the debugging information.
515  scoped_ptr<Module> module(new Module([module_name UTF8String],
516                                       "mac",
517                                       selected_arch_name,
518                                       identifier));
519
520  // Parse the selected object file.
521  mach_o::Reader::Reporter reporter(selected_object_name_);
522  mach_o::Reader reader(&reporter);
523  if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes])
524                   + selected_object_file_->offset,
525                   selected_object_file_->size,
526                   selected_object_file_->cputype,
527                   selected_object_file_->cpusubtype))
528    return false;
529
530  // Walk its load commands, and deal with whatever is there.
531  LoadCommandDumper load_command_dumper(*this, module.get(), reader,
532                                        symbol_data_, handle_inter_cu_refs_);
533  if (!reader.WalkLoadCommands(&load_command_dumper))
534    return false;
535
536  *out_module = module.release();
537
538  return true;
539}
540
541bool DumpSymbols::WriteSymbolFile(std::ostream &stream) {
542  Module* module = NULL;
543
544  if (ReadSymbolData(&module) && module) {
545    bool res = module->Write(stream, symbol_data_);
546    delete module;
547    return res;
548  }
549
550  return false;
551}
552
553}  // namespace google_breakpad
554