1
2/*--------------------------------------------------------------------*/
3/*--- Reading of syms & debug info from Mach-O files.              ---*/
4/*---                                                  readmacho.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2005-2013 Apple Inc.
12      Greg Parker gparker@apple.com
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#if defined(VGO_darwin)
33
34#include "pub_core_basics.h"
35#include "pub_core_vki.h"
36#include "pub_core_libcbase.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_libcassert.h"
39#include "pub_core_libcfile.h"
40#include "pub_core_libcproc.h"
41#include "pub_core_aspacemgr.h"    /* for mmaping debuginfo files */
42#include "pub_core_machine.h"      /* VG_ELF_CLASS */
43#include "pub_core_options.h"
44#include "pub_core_oset.h"
45#include "pub_core_tooliface.h"    /* VG_(needs) */
46#include "pub_core_xarray.h"
47#include "pub_core_clientstate.h"
48#include "pub_core_debuginfo.h"
49
50#include "priv_misc.h"
51#include "priv_image.h"
52#include "priv_d3basics.h"
53#include "priv_tytypes.h"
54#include "priv_storage.h"
55#include "priv_readmacho.h"
56#include "priv_readdwarf.h"
57#include "priv_readdwarf3.h"
58#include "priv_readstabs.h"
59
60/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
61#include <mach-o/loader.h>
62#include <mach-o/nlist.h>
63#include <mach-o/fat.h>
64/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
65
66#if VG_WORDSIZE == 4
67# define MAGIC MH_MAGIC
68# define MACH_HEADER mach_header
69# define LC_SEGMENT_CMD LC_SEGMENT
70# define SEGMENT_COMMAND segment_command
71# define SECTION section
72# define NLIST nlist
73#else
74# define MAGIC MH_MAGIC_64
75# define MACH_HEADER mach_header_64
76# define LC_SEGMENT_CMD LC_SEGMENT_64
77# define SEGMENT_COMMAND segment_command_64
78# define SECTION section_64
79# define NLIST nlist_64
80#endif
81
82
83/*------------------------------------------------------------*/
84/*---                                                      ---*/
85/*--- Mach-O file mapping/unmapping helpers                ---*/
86/*---                                                      ---*/
87/*------------------------------------------------------------*/
88
89/* A DiSlice is used to handle the thin/fat distinction for MachO images.
90   (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
91       whatnot: the entire file.  This is the DiImage* that is the backing
92       for the DiSlice.
93   (2) the Mach-O object of interest, which is presumably somewhere inside
94       the primary image.  map_image_aboard() below, which generates this
95       info, will carefully check that the macho_ fields denote a section of
96       memory that falls entirely inside the primary image.
97*/
98
99Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
100{
101   /* (JRS: the Mach-O headers might not be in this mapped data,
102      because we only mapped a page for this initial check,
103      or at least not very much, and what's at the start of the file
104      is in general a so-called fat header.  The Mach-O object we're
105      interested in could be arbitrarily far along the image, and so
106      we can't assume its header will fall within this page.) */
107
108   /* But we can say that either it's a fat object, in which case it
109      begins with a fat header, or it's unadorned Mach-O, in which
110      case it starts with a normal header.  At least do what checks we
111      can to establish whether or not we're looking at something
112      sane. */
113
114   const struct fat_header*  fh_be = buf;
115   const struct MACH_HEADER* mh    = buf;
116
117   vg_assert(buf);
118   if (szB < sizeof(struct fat_header))
119      return False;
120   if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
121      return True;
122
123   if (szB < sizeof(struct MACH_HEADER))
124      return False;
125   if (mh->magic == MAGIC)
126      return True;
127
128   return False;
129}
130
131
132/* Unmap an image mapped in by map_image_aboard. */
133static void unmap_image ( /*MOD*/DiSlice* sli )
134{
135   vg_assert(sli);
136   if (ML_(sli_is_valid)(*sli)) {
137      ML_(img_done)(sli->img);
138      *sli = DiSlice_INVALID;
139   }
140}
141
142
143/* Open the given file, find the thin part if necessary, do some
144   checks, and return a DiSlice containing details of both the thin
145   part and (implicitly, via the contained DiImage*) the fat part.
146   returns DiSlice_INVALID if it fails.  If it succeeds, the returned
147   slice is guaranteed to refer to a valid(ish) Mach-O image. */
148static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
149                                  const HChar* filename )
150{
151   DiSlice sli = DiSlice_INVALID;
152
153   /* First off, try to map the thing in. */
154   DiImage* mimg = ML_(img_from_local_file)(filename);
155   if (mimg == NULL) {
156      VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
157                               filename );
158      VG_(message)(Vg_UserMsg, "         no symbols or debug info loaded\n" );
159      return DiSlice_INVALID;
160   }
161
162   /* Now we have a viable DiImage* for it.  Look for the embedded
163      Mach-O object.  If not findable, close the image and fail. */
164   DiOffT            fh_be_ioff = 0;
165   struct fat_header fh_be;
166   struct fat_header fh;
167
168   // Assume initially that we have a thin image, and narrow
169   // the bounds if it turns out to be fat.  This stores |mimg| as
170   // |sli.img|, so NULL out |mimg| after this point, for the sake of
171   // clarity.
172   sli  = ML_(sli_from_img)(mimg);
173   mimg = NULL;
174
175   // Check for fat header.
176   if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) {
177      ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
178      goto close_and_fail;
179   }
180
181   // Fat header is always BIG-ENDIAN
182   ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be));
183   VG_(memset)(&fh, 0, sizeof(fh));
184   fh.magic     = VG_(ntohl)(fh_be.magic);
185   fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch);
186   if (fh.magic == FAT_MAGIC) {
187      // Look for a good architecture.
188      if (ML_(img_size)(sli.img) < sizeof(struct fat_header)
189                                   + fh.nfat_arch * sizeof(struct fat_arch)) {
190         ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
191         goto close_and_fail;
192      }
193      DiOffT arch_be_ioff;
194      Int    f;
195      for (f = 0, arch_be_ioff = sizeof(struct fat_header);
196           f < fh.nfat_arch;
197           f++, arch_be_ioff += sizeof(struct fat_arch)) {
198#        if defined(VGA_ppc)
199         Int cputype = CPU_TYPE_POWERPC;
200#        elif defined(VGA_ppc64)
201         Int cputype = CPU_TYPE_POWERPC64;
202#        elif defined(VGA_x86)
203         Int cputype = CPU_TYPE_X86;
204#        elif defined(VGA_amd64)
205         Int cputype = CPU_TYPE_X86_64;
206#        else
207#          error "unknown architecture"
208#        endif
209         struct fat_arch arch_be;
210         struct fat_arch arch;
211         ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be));
212         VG_(memset)(&arch, 0, sizeof(arch));
213         arch.cputype    = VG_(ntohl)(arch_be.cputype);
214         arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype);
215         arch.offset     = VG_(ntohl)(arch_be.offset);
216         arch.size       = VG_(ntohl)(arch_be.size);
217         if (arch.cputype == cputype) {
218            if (ML_(img_size)(sli.img) < arch.offset + arch.size) {
219               ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
220               goto close_and_fail;
221            }
222            /* Found a suitable arch.  Narrow down the slice accordingly. */
223            sli.ioff = arch.offset;
224            sli.szB  = arch.size;
225            break;
226         }
227      }
228      if (f == fh.nfat_arch) {
229         ML_(symerr)(di, True,
230                     "No acceptable architecture found in fat file.");
231         goto close_and_fail;
232      }
233   }
234
235   /* Sanity check what we found. */
236
237   /* assured by logic above */
238   vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header));
239
240   if (sli.szB < sizeof(struct MACH_HEADER)) {
241      ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
242      goto close_and_fail;
243   }
244
245   if (sli.szB > ML_(img_size)(sli.img)) {
246      ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
247      goto close_and_fail;
248   }
249
250   if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) {
251      /* thin entirely within fat, as expected */
252   } else {
253      ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
254      goto close_and_fail;
255   }
256
257   /* Peer at the Mach header for the thin object, starting at the
258      beginning of the slice, to check it's at least marginally
259      sane. */
260   struct MACH_HEADER mh;
261   ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh));
262   if (mh.magic != MAGIC) {
263      ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
264      goto close_and_fail;
265   }
266
267   if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) {
268      ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
269      goto close_and_fail;
270   }
271
272   /* "main image is plausible" */
273   vg_assert(sli.img);
274   vg_assert(ML_(img_size)(sli.img) > 0);
275   /* "thin image exists and is a sub-part (or all) of main image" */
276   vg_assert(sli.ioff >= 0);
277   vg_assert(sli.szB > 0);
278   vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img));
279   return sli;  /* success */
280   /*NOTREACHED*/
281
282  close_and_fail:
283   unmap_image(&sli);
284   return DiSlice_INVALID; /* bah! */
285}
286
287
288/*------------------------------------------------------------*/
289/*---                                                      ---*/
290/*--- Mach-O symbol table reading                          ---*/
291/*---                                                      ---*/
292/*------------------------------------------------------------*/
293
294/* Read a symbol table (nlist).  Add the resulting candidate symbols
295   to 'syms'; the caller will post-process them and hand them off to
296   ML_(addSym) itself. */
297static
298void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
299                  struct _DebugInfo* di,
300                  DiCursor symtab_cur, UInt symtab_count,
301                  DiCursor strtab_cur, UInt strtab_sz )
302{
303   Int    i;
304   DiSym  disym;
305
306   // "start_according_to_valgrind"
307   static HChar* s_a_t_v = NULL; /* do not make non-static */
308
309   for (i = 0; i < symtab_count; i++) {
310      struct NLIST nl;
311      ML_(cur_read_get)(&nl,
312                        ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)),
313                        sizeof(nl));
314
315      Addr sym_addr = 0;
316      if ((nl.n_type & N_TYPE) == N_SECT) {
317         sym_addr = di->text_bias + nl.n_value;
318      /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
319         GrP fixme don't ignore absolute symbols?
320         sym_addr = nl.n_value; */
321      } else {
322         continue;
323      }
324
325      if (di->trace_symtab) {
326         HChar* str = ML_(cur_read_strdup)(
327                         ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
328                         "di.read_symtab.1");
329         VG_(printf)("nlist raw: avma %010lx  %s\n", sym_addr, str );
330         ML_(dinfo_free)(str);
331      }
332
333      /* If no part of the symbol falls within the mapped range,
334         ignore it. */
335      if (sym_addr <= di->text_avma
336          || sym_addr >= di->text_avma+di->text_size) {
337         continue;
338      }
339
340      /* skip names which point outside the string table;
341         following these risks segfaulting Valgrind */
342      if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) {
343         continue;
344      }
345
346      HChar* name
347         = ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
348                                 "di.read_symtab.2");
349
350      /* skip nameless symbols; these appear to be common, but
351         useless */
352      if (*name == 0) {
353         ML_(dinfo_free)(name);
354         continue;
355      }
356
357      disym.addr      = sym_addr;
358      disym.tocptr    = 0;
359      disym.pri_name  = ML_(addStr)(di, name, -1);
360      disym.sec_names = NULL;
361      disym.size      = // let canonicalize fix it
362                        di->text_avma+di->text_size - sym_addr;
363      disym.isText    = True;
364      disym.isIFunc   = False;
365      // Lots of user function names get prepended with an underscore.  Eg. the
366      // function 'f' becomes the symbol '_f'.  And the "below main"
367      // function is called "start".  So we skip the leading underscore, and
368      // if we see 'start' and --show-below-main=no, we rename it as
369      // "start_according_to_valgrind", which makes it easy to spot later
370      // and display as "(below main)".
371      if (disym.pri_name[0] == '_') {
372         disym.pri_name++;
373      }
374      else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
375         if (s_a_t_v == NULL)
376            s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
377         vg_assert(s_a_t_v);
378         disym.pri_name = s_a_t_v;
379      }
380
381      vg_assert(disym.pri_name);
382      VG_(addToXA)( syms, &disym );
383      ML_(dinfo_free)(name);
384   }
385}
386
387
388/* Compare DiSyms by their start address, and for equal addresses, use
389   the primary name as a secondary sort key. */
390static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 )
391{
392   const DiSym* s1 = (DiSym*)v1;
393   const DiSym* s2 = (DiSym*)v2;
394   if (s1->addr < s2->addr) return -1;
395   if (s1->addr > s2->addr) return 1;
396   return VG_(strcmp)(s1->pri_name, s2->pri_name);
397}
398
399/* 'cand' is a bunch of candidate symbols obtained by reading
400   nlist-style symbol table entries.  Their ends may overlap, so sort
401   them and truncate them accordingly.  The code in this routine is
402   copied almost verbatim from read_symbol_table() in readxcoff.c. */
403static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
404                                Bool trace_symtab )
405{
406   Word nsyms, i, j, k, m;
407
408   nsyms = VG_(sizeXA)(syms);
409
410   VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
411   VG_(sortXA)(syms);
412
413   /* We only know for sure the start addresses (actual VMAs) of
414      symbols, and an overestimation of their end addresses.  So sort
415      by start address, then clip each symbol so that its end address
416      does not overlap with the next one along.
417
418      There is a small refinement: if a group of symbols have the same
419      address, treat them as a group: find the next symbol along that
420      has a higher start address, and clip all of the group
421      accordingly.  This clips the group as a whole so as not to
422      overlap following symbols.  This leaves prefersym() in
423      storage.c, which is not nlist-specific, to later decide which of
424      the symbols in the group to keep.
425
426      Another refinement is that we need to get rid of symbols which,
427      after clipping, have identical starts, ends, and names.  So the
428      sorting uses the name as a secondary key.
429   */
430
431   for (i = 0; i < nsyms; i++) {
432      for (k = i+1;
433           k < nsyms
434             && ((DiSym*)VG_(indexXA)(syms,i))->addr
435                 == ((DiSym*)VG_(indexXA)(syms,k))->addr;
436           k++)
437         ;
438      /* So now [i .. k-1] is a group all with the same start address.
439         Clip their ending addresses so they don't overlap [k].  In
440         the normal case (no overlaps), k == i+1. */
441      if (k < nsyms) {
442         DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
443         for (m = i; m < k; m++) {
444            DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
445            vg_assert(here->addr < next->addr);
446            if (here->addr + here->size > next->addr)
447               here->size = next->addr - here->addr;
448         }
449      }
450      i = k-1;
451      vg_assert(i <= nsyms);
452   }
453
454   j = 0;
455   if (nsyms > 0) {
456      j = 1;
457      for (i = 1; i < nsyms; i++) {
458         DiSym *s_j1, *s_j, *s_i;
459         vg_assert(j <= i);
460         s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
461         s_j  = (DiSym*)VG_(indexXA)(syms, j);
462         s_i  = (DiSym*)VG_(indexXA)(syms, i);
463         if (s_i->addr != s_j1->addr
464             || s_i->size != s_j1->size
465             || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
466            *s_j = *s_i;
467            j++;
468         } else {
469            if (trace_symtab)
470               VG_(printf)("nlist cleanup: dump duplicate avma %010lx  %s\n",
471                           s_i->addr, s_i->pri_name );
472         }
473      }
474   }
475   vg_assert(j >= 0 && j <= nsyms);
476   VG_(dropTailXA)(syms, nsyms - j);
477}
478
479
480/*------------------------------------------------------------*/
481/*---                                                      ---*/
482/*--- Mach-O top-level processing                          ---*/
483/*---                                                      ---*/
484/*------------------------------------------------------------*/
485
486#if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
487#define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
488#endif
489
490
491static Bool file_exists_p(const HChar *path)
492{
493   struct vg_stat sbuf;
494   SysRes res = VG_(stat)(path, &sbuf);
495   return sr_isError(res) ? False : True;
496}
497
498
499/* Search for an existing dSYM file as a possible separate debug file.
500   Adapted from gdb. */
501static HChar *
502find_separate_debug_file (const HChar *executable_name)
503{
504   const HChar *basename_str;
505   HChar *dot_ptr;
506   HChar *slash_ptr;
507   HChar *dsymfile;
508
509   /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
510      will end up with an infinite loop where after we add a dSYM symbol file,
511      it will then enter this function asking if there is a debug file for the
512      dSYM file itself.  */
513   if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
514   {
515      /* Check for the existence of a .dSYM file for a given executable.  */
516      basename_str = VG_(basename) (executable_name);
517      dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
518                    VG_(strlen) (executable_name)
519                    + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
520                    + VG_(strlen) (basename_str)
521                    + 1
522                 );
523
524      /* First try for the dSYM in the same directory as the original file.  */
525      VG_(strcpy) (dsymfile, executable_name);
526      VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
527      VG_(strcat) (dsymfile, basename_str);
528
529      if (file_exists_p (dsymfile))
530         return dsymfile;
531
532      /* Now search for any parent directory that has a '.' in it so we can find
533         Mac OS X applications, bundles, plugins, and any other kinds of files.
534         Mac OS X application bundles wil have their program in
535         "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
536         ".bundle" or ".plugin" for other types of bundles).  So we look for any
537         prior '.' character and try appending the apple dSYM extension and
538         subdirectory and see if we find an existing dSYM file (in the above
539         MyApp example the dSYM would be at either:
540         "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
541         "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp".  */
542      VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
543      while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
544      {
545         /* Find the directory delimiter that follows the '.' character since
546            we now look for a .dSYM that follows any bundle extension.  */
547         slash_ptr = VG_(strchr) (dot_ptr, '/');
548         if (slash_ptr)
549         {
550             /* NULL terminate the string at the '/' character and append
551                the path down to the dSYM file.  */
552            *slash_ptr = '\0';
553            VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
554            VG_(strcat) (slash_ptr, basename_str);
555            if (file_exists_p (dsymfile))
556               return dsymfile;
557         }
558
559         /* NULL terminate the string at the '.' character and append
560            the path down to the dSYM file.  */
561         *dot_ptr = '\0';
562         VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
563         VG_(strcat) (dot_ptr, basename_str);
564         if (file_exists_p (dsymfile))
565            return dsymfile;
566
567         /* NULL terminate the string at the '.' locatated by the strrchr()
568            function again.  */
569         *dot_ptr = '\0';
570
571         /* We found a previous extension '.' character and did not find a
572            dSYM file so now find previous directory delimiter so we don't
573            try multiple times on a file name that may have a version number
574            in it such as "/some/path/MyApp.6.0.4.app".  */
575         slash_ptr = VG_(strrchr) (dsymfile, '/');
576         if (!slash_ptr)
577            break;
578         /* NULL terminate the string at the previous directory character
579            and search again.  */
580         *slash_ptr = '\0';
581      }
582   }
583
584   return NULL;
585}
586
587
588/* Given a DiSlice covering the entire Mach-O thin image, find the
589   DiSlice for the specified (segname, sectname) pairing, if
590   possible. */
591static DiSlice getsectdata ( DiSlice img,
592                             const HChar *segname, const HChar *sectname )
593{
594   DiCursor cur = ML_(cur_from_sli)(img);
595
596   struct MACH_HEADER mh;
597   ML_(cur_step_get)(&mh, &cur, sizeof(mh));
598
599   Int c;
600   for (c = 0; c < mh.ncmds; c++) {
601      struct load_command cmd;
602      ML_(cur_read_get)(&cmd, cur, sizeof(cmd));
603      if (cmd.cmd == LC_SEGMENT_CMD) {
604         struct SEGMENT_COMMAND seg;
605         ML_(cur_read_get)(&seg, cur, sizeof(seg));
606         if (0 == VG_(strncmp(&seg.segname[0],
607                              segname, sizeof(seg.segname)))) {
608            DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg));
609            Int s;
610            for (s = 0; s < seg.nsects; s++) {
611               struct SECTION sect;
612               ML_(cur_step_get)(&sect, &sects_cur, sizeof(sect));
613               if (0 == VG_(strncmp(sect.sectname, sectname,
614                                    sizeof(sect.sectname)))) {
615                  DiSlice res = img;
616                  res.ioff = sect.offset;
617                  res.szB = sect.size;
618                  return res;
619               }
620            }
621
622         }
623      }
624      cur = ML_(cur_plus)(cur, cmd.cmdsize);
625   }
626
627   return DiSlice_INVALID;
628}
629
630
631/* Brute force just simply search for uuid[0..15] in |sli| */
632static Bool check_uuid_matches ( DiSlice sli, UChar* uuid )
633{
634   if (sli.szB < 16)
635      return False;
636
637   /* Work through the slice in 1 KB chunks. */
638   UChar  first    = uuid[0];
639   DiOffT min_off  = sli.ioff;
640   DiOffT max1_off = sli.ioff + sli.szB;
641   DiOffT curr_off = min_off;
642   vg_assert(min_off < max1_off);
643   while (1) {
644      vg_assert(curr_off >= min_off && curr_off <= max1_off);
645      if (curr_off == max1_off) break;
646      DiOffT avail = max1_off - curr_off;
647      vg_assert(avail > 0 && avail <= max1_off);
648      if (avail > 1024) avail = 1024;
649      UChar buf[1024];
650      SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail);
651      vg_assert(nGot >= 1 && nGot <= avail);
652      UInt i;
653      /* Scan through the 1K chunk we got, looking for the start char. */
654      for (i = 0; i < (UInt)nGot; i++) {
655         if (LIKELY(buf[i] != first))
656            continue;
657         /* first char matches.  See if we can get 16 bytes at this
658            offset, and compare. */
659         if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) {
660            UChar buff16[16];
661            ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16);
662            if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16))
663               return True;
664         }
665      }
666      curr_off += nGot;
667   }
668   return False;
669}
670
671
672/* Heuristic kludge: return True if this looks like an installed
673   standard library; hence we shouldn't consider automagically running
674   dsymutil on it. */
675static Bool is_systemish_library_name ( HChar* name )
676{
677   vg_assert(name);
678   if (0 == VG_(strncasecmp)(name, "/usr/", 5)
679       || 0 == VG_(strncasecmp)(name, "/bin/", 5)
680       || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
681       || 0 == VG_(strncasecmp)(name, "/opt/", 5)
682       || 0 == VG_(strncasecmp)(name, "/sw/", 4)
683       || 0 == VG_(strncasecmp)(name, "/System/", 8)
684       || 0 == VG_(strncasecmp)(name, "/Library/", 9)
685       || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
686      return True;
687   } else {
688      return False;
689   }
690}
691
692
693Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
694{
695   DiSlice  msli         = DiSlice_INVALID; // the main image
696   DiSlice  dsli         = DiSlice_INVALID; // the debuginfo image
697   DiCursor sym_cur      = DiCursor_INVALID;
698   DiCursor dysym_cur    = DiCursor_INVALID;
699   HChar*   dsymfilename = NULL;
700   Bool     have_uuid    = False;
701   UChar    uuid[16];
702   Word     i;
703   struct _DebugInfoMapping* rx_map = NULL;
704   struct _DebugInfoMapping* rw_map = NULL;
705
706   /* mmap the object file to look for di->soname and di->text_bias
707      and uuid and nlist and STABS */
708
709   /* This should be ensured by our caller (that we're in the accept
710      state). */
711   vg_assert(di->fsm.have_rx_map);
712   vg_assert(di->fsm.have_rw_map);
713
714   for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
715      struct _DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
716      if (map->rx && !rx_map)
717         rx_map = map;
718      if (map->rw && !rw_map)
719         rw_map = map;
720      if (rx_map && rw_map)
721         break;
722   }
723   vg_assert(rx_map);
724   vg_assert(rw_map);
725
726   if (VG_(clo_verbosity) > 1)
727      VG_(message)(Vg_DebugMsg,
728                   "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
729                   rx_map->avma, rw_map->avma );
730
731   VG_(memset)(&uuid, 0, sizeof(uuid));
732
733   msli = map_image_aboard( di, di->fsm.filename );
734   if (!ML_(sli_is_valid)(msli)) {
735      ML_(symerr)(di, False, "Connect to main image failed.");
736      goto fail;
737   }
738
739   vg_assert(msli.img != NULL && msli.szB > 0);
740
741   /* Poke around in the Mach-O header, to find some important
742      stuff. */
743   // Find LC_SYMTAB and LC_DYSYMTAB, if present.
744   // Read di->soname from LC_ID_DYLIB if present,
745   //    or from LC_ID_DYLINKER if present,
746   //    or use "NONE".
747   // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
748   // Get uuid for later dsym search
749
750   di->text_bias = 0;
751
752   {
753      DiCursor cmd_cur = ML_(cur_from_sli)(msli);
754
755      struct MACH_HEADER mh;
756      ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh));
757
758      /* Now cur_cmd points just after the Mach header, right at the
759         start of the load commands, which is where we need it to start
760         the following loop. */
761
762      Int c;
763      for (c = 0; c < mh.ncmds; c++) {
764         struct load_command cmd;
765         ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd));
766
767         if (cmd.cmd == LC_SYMTAB) {
768            sym_cur = cmd_cur;
769         }
770         else if (cmd.cmd == LC_DYSYMTAB) {
771            dysym_cur = cmd_cur;
772         }
773         else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) {
774            // GrP fixme bundle?
775            struct dylib_command dcmd;
776            ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
777            DiCursor dylibname_cur
778               = ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset);
779            HChar* dylibname
780               = ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1");
781            HChar* soname = VG_(strrchr)(dylibname, '/');
782            if (!soname) soname = dylibname;
783            else soname++;
784            di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
785                                           soname);
786            ML_(dinfo_free)(dylibname);
787         }
788         else if (cmd.cmd==LC_ID_DYLINKER  &&  mh.filetype==MH_DYLINKER) {
789            struct dylinker_command dcmd;
790            ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
791            DiCursor dylinkername_cur
792               = ML_(cur_plus)(cmd_cur, dcmd.name.offset);
793            HChar* dylinkername
794               = ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2");
795            HChar* soname = VG_(strrchr)(dylinkername, '/');
796            if (!soname) soname = dylinkername;
797            else soname++;
798            di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
799                                           soname);
800            ML_(dinfo_free)(dylinkername);
801         }
802
803         // A comment from Julian about why varinfo[35] fail:
804         //
805         // My impression is, from comparing the output of otool -l for these
806         // executables with the logic in ML_(read_macho_debug_info),
807         // specifically the part that begins "else if (cmd->cmd ==
808         // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
809         // to work ok for text symbols.  In particular, it appears to assume
810         // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
811         // "struct SEGMENT_COMMAND" inside it is going to contain the info we
812         // need.  However, otool -l shows, and also the Apple docs state,
813         // that a struct load_command may contain an arbitrary number of
814         // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
815         // snarf the first.  But I'm not sure about this.
816         //
817         // The "Try for __DATA" block below simply adds acquisition of data
818         // svma/bias values using the same assumption.  It also needs
819         // (probably) to deal with bss sections, but I don't understand how
820         // this all ties together really, so it requires further study.
821         //
822         // If you can get your head around the relationship between MachO
823         // segments, sections and load commands, this might be relatively
824         // easy to fix properly.
825         //
826         // Basically we need to come up with plausible numbers for di->
827         // {text,data,bss}_{avma,svma}, from which the _bias numbers are
828         // then trivially derived.  Then I think the debuginfo reader should
829         // work pretty well.
830         else if (cmd.cmd == LC_SEGMENT_CMD) {
831            struct SEGMENT_COMMAND seg;
832            ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg));
833            /* Try for __TEXT */
834            if (!di->text_present
835                && 0 == VG_(strcmp)(&seg.segname[0], "__TEXT")
836                /* DDD: is the  next line a kludge? -- JRS */
837                && seg.fileoff == 0 && seg.filesize != 0) {
838               di->text_present = True;
839               di->text_svma = (Addr)seg.vmaddr;
840               di->text_avma = rx_map->avma;
841               di->text_size = seg.vmsize;
842               di->text_bias = di->text_avma - di->text_svma;
843               /* Make the _debug_ values be the same as the
844                  svma/bias for the primary object, since there is
845                  no secondary (debuginfo) object, but nevertheless
846                  downstream biasing of Dwarf3 relies on the
847                  _debug_ values. */
848               di->text_debug_svma = di->text_svma;
849               di->text_debug_bias = di->text_bias;
850            }
851            /* Try for __DATA */
852            if (!di->data_present
853                && 0 == VG_(strcmp)(&seg.segname[0], "__DATA")
854                /* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) {
855               di->data_present = True;
856               di->data_svma = (Addr)seg.vmaddr;
857               di->data_avma = rw_map->avma;
858               di->data_size = seg.vmsize;
859               di->data_bias = di->data_avma - di->data_svma;
860               di->data_debug_svma = di->data_svma;
861               di->data_debug_bias = di->data_bias;
862            }
863         }
864         else if (cmd.cmd == LC_UUID) {
865             ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid));
866             have_uuid = True;
867         }
868         // Move the cursor along
869         cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize);
870      }
871   }
872
873   if (!di->soname) {
874      di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
875   }
876
877   if (di->trace_symtab) {
878      VG_(printf)("\n");
879      VG_(printf)("SONAME = %s\n", di->soname);
880      VG_(printf)("\n");
881   }
882
883   /* Now we have the base object to hand.  Read symbols from it. */
884
885   // We already asserted that ..
886   vg_assert(msli.img != NULL && msli.szB > 0);
887
888   if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) {
889
890      struct symtab_command   symcmd;
891      struct dysymtab_command dysymcmd;
892
893      ML_(cur_read_get)(&symcmd,   sym_cur,   sizeof(symcmd));
894      ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd));
895
896      /* Read nlist symbol table */
897      DiCursor syms = DiCursor_INVALID;
898      DiCursor strs = DiCursor_INVALID;
899      XArray* /* DiSym */ candSyms = NULL;
900      Word nCandSyms;
901
902      if (msli.szB < symcmd.stroff + symcmd.strsize
903          || msli.szB < symcmd.symoff + symcmd.nsyms
904                                        * sizeof(struct NLIST)) {
905         ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
906         goto fail;
907      }
908      if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms
909          || dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) {
910         ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
911         goto fail;
912      }
913
914      syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff);
915      strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff);
916
917      if (VG_(clo_verbosity) > 1)
918         VG_(message)(Vg_DebugMsg,
919            "   reading syms   from primary file (%d %d)\n",
920            dysymcmd.nextdefsym, dysymcmd.nlocalsym );
921
922      /* Read candidate symbols into 'candSyms', so we can truncate
923         overlapping ends and generally tidy up, before presenting
924         them to ML_(addSym). */
925      candSyms = VG_(newXA)(
926                    ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
927                    ML_(dinfo_free), sizeof(DiSym)
928                 );
929      vg_assert(candSyms);
930
931      // extern symbols
932      read_symtab(candSyms,
933                  di,
934                  ML_(cur_plus)(syms,
935                                dysymcmd.iextdefsym * sizeof(struct NLIST)),
936                  dysymcmd.nextdefsym, strs, symcmd.strsize);
937      // static and private_extern symbols
938      read_symtab(candSyms,
939                  di,
940                  ML_(cur_plus)(syms,
941                                dysymcmd.ilocalsym * sizeof(struct NLIST)),
942                  dysymcmd.nlocalsym, strs, symcmd.strsize);
943
944      /* tidy up the cand syms -- trim overlapping ends.  May resize
945         candSyms. */
946      tidy_up_cand_syms( candSyms, di->trace_symtab );
947
948      /* and finally present them to ML_(addSym) */
949      nCandSyms = VG_(sizeXA)( candSyms );
950      for (i = 0; i < nCandSyms; i++) {
951         DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
952         vg_assert(cand->pri_name != NULL);
953         vg_assert(cand->sec_names == NULL);
954         if (di->trace_symtab)
955            VG_(printf)("nlist final: acquire  avma %010lx-%010lx  %s\n",
956                        cand->addr, cand->addr + cand->size - 1,
957                        cand->pri_name );
958         ML_(addSym)( di, cand );
959      }
960      VG_(deleteXA)( candSyms );
961   }
962
963   /* If there's no UUID in the primary, don't even bother to try and
964      read any DWARF, since we won't be able to verify it matches.
965      Our policy is not to load debug info unless we can verify that
966      it matches the primary.  Just declare success at this point.
967      And don't complain to the user, since that would cause us to
968      complain on objects compiled without -g.  (Some versions of
969      XCode are observed to omit a UUID entry for object linked(?)
970      without -g.  Others don't appear to omit it.) */
971   if (!have_uuid)
972      goto success;
973
974   /* mmap the dSYM file to look for DWARF debug info.  If successful,
975      use the .macho_img and .macho_img_szB in dsli. */
976
977   dsymfilename = find_separate_debug_file( di->fsm.filename );
978
979   /* Try to load it. */
980   if (dsymfilename) {
981      Bool valid;
982
983      if (VG_(clo_verbosity) > 1)
984         VG_(message)(Vg_DebugMsg, "   dSYM= %s\n", dsymfilename);
985
986      dsli = map_image_aboard( di, dsymfilename );
987      if (!ML_(sli_is_valid)(dsli)) {
988         ML_(symerr)(di, False, "Connect to debuginfo image failed "
989                                "(first attempt).");
990         goto fail;
991      }
992
993      /* check it has the right uuid. */
994      vg_assert(have_uuid);
995      valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
996      if (valid)
997         goto read_the_dwarf;
998
999      if (VG_(clo_verbosity) > 1)
1000         VG_(message)(Vg_DebugMsg, "   dSYM does not have "
1001                                   "correct UUID (out of date?)\n");
1002   }
1003
1004   /* There was no dsym file, or it doesn't match.  We'll have to try
1005      regenerating it, unless --dsymutil=no, in which case just complain
1006      instead. */
1007
1008   /* If this looks like a lib that we shouldn't run dsymutil on, just
1009      give up.  (possible reasons: is system lib, or in /usr etc, or
1010      the dsym dir would not be writable by the user, or we're running
1011      as root) */
1012   vg_assert(di->fsm.filename);
1013   if (is_systemish_library_name(di->fsm.filename))
1014      goto success;
1015
1016   if (!VG_(clo_dsymutil)) {
1017      if (VG_(clo_verbosity) == 1) {
1018         VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
1019      }
1020      if (VG_(clo_verbosity) > 0)
1021         VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
1022                      "--dsymutil=yes\n",
1023                      VG_(clo_verbosity) > 1 ? "   " : "",
1024                      dsymfilename ? "has wrong UUID" : "is missing");
1025      goto success;
1026   }
1027
1028   /* Run dsymutil */
1029
1030   { Int r;
1031     const HChar* dsymutil = "/usr/bin/dsymutil ";
1032     HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
1033                                     VG_(strlen)(dsymutil)
1034                                     + VG_(strlen)(di->fsm.filename)
1035                                     + 32 /* misc */ );
1036     VG_(strcpy)(cmd, dsymutil);
1037     if (0) VG_(strcat)(cmd, "--verbose ");
1038     VG_(strcat)(cmd, "\"");
1039     VG_(strcat)(cmd, di->fsm.filename);
1040     VG_(strcat)(cmd, "\"");
1041     VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
1042     r = VG_(system)( cmd );
1043     if (r)
1044        VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
1045     ML_(dinfo_free)(cmd);
1046     dsymfilename = find_separate_debug_file(di->fsm.filename);
1047   }
1048
1049   /* Try again to load it. */
1050   if (dsymfilename) {
1051      Bool valid;
1052
1053      if (VG_(clo_verbosity) > 1)
1054         VG_(message)(Vg_DebugMsg, "   dsyms= %s\n", dsymfilename);
1055
1056      dsli = map_image_aboard( di, dsymfilename );
1057      if (!ML_(sli_is_valid)(dsli)) {
1058         ML_(symerr)(di, False, "Connect to debuginfo image failed "
1059                                "(second attempt).");
1060         goto fail;
1061      }
1062
1063      /* check it has the right uuid. */
1064      vg_assert(have_uuid);
1065      vg_assert(have_uuid);
1066      valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1067      if (!valid) {
1068         if (VG_(clo_verbosity) > 0) {
1069            VG_(message)(Vg_DebugMsg,
1070               "WARNING: did not find expected UUID %02X%02X%02X%02X"
1071               "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1072               " in dSYM dir\n",
1073               (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1074               (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1075               (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1076               (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1077               (UInt)uuid[14], (UInt)uuid[15] );
1078            VG_(message)(Vg_DebugMsg,
1079                         "WARNING: for %s\n", di->fsm.filename);
1080         }
1081         unmap_image( &dsli );
1082         /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1083            re-try unmap_image. */
1084         goto fail;
1085      }
1086   }
1087
1088   /* Right.  Finally we have our best try at the dwarf image, so go
1089      on to reading stuff out of it. */
1090
1091  read_the_dwarf:
1092   if (ML_(sli_is_valid)(msli) && msli.szB > 0) {
1093      // "_mscn" is "mach-o section"
1094      DiSlice debug_info_mscn
1095         = getsectdata(dsli, "__DWARF", "__debug_info");
1096      DiSlice debug_abbv_mscn
1097         = getsectdata(dsli, "__DWARF", "__debug_abbrev");
1098      DiSlice debug_line_mscn
1099         = getsectdata(dsli, "__DWARF", "__debug_line");
1100      DiSlice debug_str_mscn
1101         = getsectdata(dsli, "__DWARF", "__debug_str");
1102      DiSlice debug_ranges_mscn
1103         = getsectdata(dsli, "__DWARF", "__debug_ranges");
1104      DiSlice debug_loc_mscn
1105         = getsectdata(dsli, "__DWARF", "__debug_loc");
1106
1107      if (ML_(sli_is_valid)(debug_info_mscn)) {
1108         if (VG_(clo_verbosity) > 1) {
1109            if (0)
1110            VG_(message)(Vg_DebugMsg,
1111                         "Reading dwarf3 for %s (%#lx) from %s"
1112                         " (%lld %lld %lld %lld %lld %lld)\n",
1113                         di->fsm.filename, di->text_avma, dsymfilename,
1114                         debug_info_mscn.szB, debug_abbv_mscn.szB,
1115                         debug_line_mscn.szB, debug_str_mscn.szB,
1116                         debug_ranges_mscn.szB, debug_loc_mscn.szB
1117                         );
1118            VG_(message)(Vg_DebugMsg,
1119               "   reading dwarf3 from dsyms file\n");
1120         }
1121         /* The old reader: line numbers and unwind info only */
1122         ML_(read_debuginfo_dwarf3) ( di,
1123                                      debug_info_mscn,
1124				      DiSlice_INVALID, /* .debug_types */
1125                                      debug_abbv_mscn,
1126                                      debug_line_mscn,
1127                                      debug_str_mscn,
1128                                      DiSlice_INVALID /* ALT .debug_str */ );
1129
1130         /* The new reader: read the DIEs in .debug_info to acquire
1131            information on variable types and locations.  But only if
1132            the tool asks for it, or the user requests it on the
1133            command line. */
1134         if (VG_(needs).var_info /* the tool requires it */
1135             || VG_(clo_read_var_info) /* the user asked for it */) {
1136            ML_(new_dwarf3_reader)(
1137               di, debug_info_mscn,
1138                   DiSlice_INVALID, /* .debug_types */
1139                   debug_abbv_mscn,
1140                   debug_line_mscn,
1141                   debug_str_mscn,
1142                   debug_ranges_mscn,
1143                   debug_loc_mscn,
1144                   DiSlice_INVALID, /* ALT .debug_info */
1145                   DiSlice_INVALID, /* ALT .debug_abbv */
1146                   DiSlice_INVALID, /* ALT .debug_line */
1147                   DiSlice_INVALID  /* ALT .debug_str */
1148            );
1149         }
1150      }
1151   }
1152
1153   if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1154
1155  success:
1156   unmap_image(&msli);
1157   unmap_image(&dsli);
1158   return True;
1159
1160   /* NOTREACHED */
1161
1162  fail:
1163   ML_(symerr)(di, True, "Error reading Mach-O object.");
1164   unmap_image(&msli);
1165   unmap_image(&dsli);
1166   return False;
1167}
1168
1169#endif // defined(VGO_darwin)
1170
1171/*--------------------------------------------------------------------*/
1172/*--- end                                                          ---*/
1173/*--------------------------------------------------------------------*/
1174