1
2/*--------------------------------------------------------------------*/
3/*--- Reading of syms & debug info from Mach-O files.              ---*/
4/*---                                                  readmacho.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2005-2012 Apple Inc.
12      Greg Parker gparker@apple.com
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#if defined(VGO_darwin)
33
34#include "pub_core_basics.h"
35#include "pub_core_vki.h"
36#include "pub_core_libcbase.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_libcassert.h"
39#include "pub_core_libcfile.h"
40#include "pub_core_libcproc.h"
41#include "pub_core_aspacemgr.h"    /* for mmaping debuginfo files */
42#include "pub_core_machine.h"      /* VG_ELF_CLASS */
43#include "pub_core_options.h"
44#include "pub_core_oset.h"
45#include "pub_core_tooliface.h"    /* VG_(needs) */
46#include "pub_core_xarray.h"
47#include "pub_core_clientstate.h"
48#include "pub_core_debuginfo.h"
49
50#include "priv_d3basics.h"
51#include "priv_misc.h"
52#include "priv_tytypes.h"
53#include "priv_storage.h"
54#include "priv_readmacho.h"
55#include "priv_readdwarf.h"
56#include "priv_readdwarf3.h"
57#include "priv_readstabs.h"
58
59/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60#include <mach-o/loader.h>
61#include <mach-o/nlist.h>
62#include <mach-o/fat.h>
63/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
64
65#if VG_WORDSIZE == 4
66# define MAGIC MH_MAGIC
67# define MACH_HEADER mach_header
68# define LC_SEGMENT_CMD LC_SEGMENT
69# define SEGMENT_COMMAND segment_command
70# define SECTION section
71# define NLIST nlist
72#else
73# define MAGIC MH_MAGIC_64
74# define MACH_HEADER mach_header_64
75# define LC_SEGMENT_CMD LC_SEGMENT_64
76# define SEGMENT_COMMAND segment_command_64
77# define SECTION section_64
78# define NLIST nlist_64
79#endif
80
81
82/*------------------------------------------------------------*/
83/*---                                                      ---*/
84/*--- Mach-O file mapping/unmapping helpers                ---*/
85/*---                                                      ---*/
86/*------------------------------------------------------------*/
87
88typedef
89   struct {
90      /* These two describe the entire mapped-in ("primary") image,
91         fat headers, kitchen sink, whatnot: the entire file.  The
92         image is mapped into img[0 .. img_szB-1]. */
93      UChar* img;
94      SizeT  img_szB;
95      /* These two describe the Mach-O object of interest, which is
96         presumably somewhere inside the primary image.
97         map_image_aboard() below, which generates this info, will
98         carefully check that the macho_ fields denote a section of
99         memory that falls entirely inside img[0 .. img_szB-1]. */
100      UChar* macho_img;
101      SizeT  macho_img_szB;
102   }
103   ImageInfo;
104
105
106Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
107{
108   /* (JRS: the Mach-O headers might not be in this mapped data,
109      because we only mapped a page for this initial check,
110      or at least not very much, and what's at the start of the file
111      is in general a so-called fat header.  The Mach-O object we're
112      interested in could be arbitrarily far along the image, and so
113      we can't assume its header will fall within this page.) */
114
115   /* But we can say that either it's a fat object, in which case it
116      begins with a fat header, or it's unadorned Mach-O, in which
117      case it starts with a normal header.  At least do what checks we
118      can to establish whether or not we're looking at something
119      sane. */
120
121   const struct fat_header*  fh_be = buf;
122   const struct MACH_HEADER* mh    = buf;
123
124   vg_assert(buf);
125   if (szB < sizeof(struct fat_header))
126      return False;
127   if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
128      return True;
129
130   if (szB < sizeof(struct MACH_HEADER))
131      return False;
132   if (mh->magic == MAGIC)
133      return True;
134
135   return False;
136}
137
138
139/* Unmap an image mapped in by map_image_aboard. */
140static void unmap_image ( /*MOD*/ImageInfo* ii )
141{
142   SysRes sres;
143   vg_assert(ii->img);
144   vg_assert(ii->img_szB > 0);
145   sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB );
146   /* Do we care if this fails?  I suppose so; it would indicate
147      some fairly serious snafu with the mapping of the file. */
148   vg_assert( !sr_isError(sres) );
149   VG_(memset)(ii, 0, sizeof(*ii));
150}
151
152
153/* Map a given fat or thin object aboard, find the thin part if
154   necessary, do some checks, and write details of both the fat and
155   thin parts into *ii.  Returns False (and leaves the file unmapped)
156   on failure.  Guarantees to return pointers to a valid(ish) Mach-O
157   image if it succeeds. */
158static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */
159                               /*OUT*/ImageInfo* ii, UChar* filename )
160{
161   VG_(memset)(ii, 0, sizeof(*ii));
162
163   /* First off, try to map the thing in. */
164   { SizeT  size;
165     SysRes fd, sres;
166     struct vg_stat stat_buf;
167
168     fd = VG_(stat)(filename, &stat_buf);
169     if (sr_isError(fd)) {
170        ML_(symerr)(di, True, "Can't stat image (to determine its size)?!");
171        return False;
172     }
173     size = stat_buf.size;
174
175     fd = VG_(open)(filename, VKI_O_RDONLY, 0);
176     if (sr_isError(fd)) {
177       ML_(symerr)(di, True, "Can't open image to read symbols?!");
178        return False;
179     }
180
181     sres = VG_(am_mmap_file_float_valgrind)
182               ( size, VKI_PROT_READ, sr_Res(fd), 0 );
183     if (sr_isError(sres)) {
184        ML_(symerr)(di, True, "Can't mmap image to read symbols?!");
185        return False;
186     }
187
188     VG_(close)(sr_Res(fd));
189
190     ii->img     = (UChar*)sr_Res(sres);
191     ii->img_szB = size;
192   }
193
194   /* Now it's mapped in and we have .img and .img_szB set.  Look for
195      the embedded Mach-O object.  If not findable, unmap and fail. */
196   { struct fat_header*  fh_be;
197     struct fat_header   fh;
198     struct MACH_HEADER* mh;
199
200     // Assume initially that we have a thin image, and update
201     // these if it turns out to be fat.
202     ii->macho_img     = ii->img;
203     ii->macho_img_szB = ii->img_szB;
204
205     // Check for fat header.
206     if (ii->img_szB < sizeof(struct fat_header)) {
207        ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
208        goto unmap_and_fail;
209     }
210
211     // Fat header is always BIG-ENDIAN
212     fh_be = (struct fat_header *)ii->img;
213     fh.magic = VG_(ntohl)(fh_be->magic);
214     fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch);
215     if (fh.magic == FAT_MAGIC) {
216        // Look for a good architecture.
217        struct fat_arch *arch_be;
218        struct fat_arch arch;
219        Int f;
220        if (ii->img_szB < sizeof(struct fat_header)
221                          + fh.nfat_arch * sizeof(struct fat_arch)) {
222           ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
223           goto unmap_and_fail;
224        }
225        for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
226             f < fh.nfat_arch;
227             f++, arch_be++) {
228           Int cputype;
229#          if defined(VGA_ppc)
230           cputype = CPU_TYPE_POWERPC;
231#          elif defined(VGA_ppc64)
232           cputype = CPU_TYPE_POWERPC64;
233#          elif defined(VGA_x86)
234           cputype = CPU_TYPE_X86;
235#          elif defined(VGA_amd64)
236           cputype = CPU_TYPE_X86_64;
237#          else
238#            error "unknown architecture"
239#          endif
240           arch.cputype    = VG_(ntohl)(arch_be->cputype);
241           arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype);
242           arch.offset     = VG_(ntohl)(arch_be->offset);
243           arch.size       = VG_(ntohl)(arch_be->size);
244           if (arch.cputype == cputype) {
245              if (ii->img_szB < arch.offset + arch.size) {
246                 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
247                 goto unmap_and_fail;
248              }
249              ii->macho_img     = ii->img + arch.offset;
250              ii->macho_img_szB = arch.size;
251              break;
252           }
253        }
254        if (f == fh.nfat_arch) {
255           ML_(symerr)(di, True,
256                       "No acceptable architecture found in fat file.");
257           goto unmap_and_fail;
258        }
259     }
260
261     /* Sanity check what we found. */
262
263     /* assured by logic above */
264     vg_assert(ii->img_szB >= sizeof(struct fat_header));
265
266     if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) {
267        ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
268        goto unmap_and_fail;
269     }
270
271     if (ii->macho_img_szB > ii->img_szB) {
272        ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
273        goto unmap_and_fail;
274     }
275
276     if (ii->macho_img >= ii->img
277         && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
278        /* thin entirely within fat, as expected */
279     } else {
280        ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
281        goto unmap_and_fail;
282     }
283
284     mh = (struct MACH_HEADER *)ii->macho_img;
285     if (mh->magic != MAGIC) {
286        ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
287        goto unmap_and_fail;
288     }
289
290     if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) {
291        ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
292        goto unmap_and_fail;
293     }
294   }
295
296   vg_assert(ii->img);
297   vg_assert(ii->macho_img);
298   vg_assert(ii->img_szB > 0);
299   vg_assert(ii->macho_img_szB > 0);
300   vg_assert(ii->macho_img >= ii->img);
301   vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
302   return True;  /* success */
303   /*NOTREACHED*/
304
305  unmap_and_fail:
306   unmap_image(ii);
307   return False; /* bah! */
308}
309
310
311/*------------------------------------------------------------*/
312/*---                                                      ---*/
313/*--- Mach-O symbol table reading                          ---*/
314/*---                                                      ---*/
315/*------------------------------------------------------------*/
316
317/* Read a symbol table (nlist).  Add the resulting candidate symbols
318   to 'syms'; the caller will post-process them and hand them off to
319   ML_(addSym) itself. */
320static
321void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
322                  struct _DebugInfo* di,
323                  struct NLIST* o_symtab, UInt o_symtab_count,
324                  UChar*     o_strtab, UInt o_strtab_sz )
325{
326   Int    i;
327   Addr   sym_addr;
328   DiSym  disym;
329   UChar* name;
330
331   static UChar* s_a_t_v = NULL; /* do not make non-static */
332
333   for (i = 0; i < o_symtab_count; i++) {
334      struct NLIST *nl = o_symtab+i;
335      if ((nl->n_type & N_TYPE) == N_SECT) {
336         sym_addr = di->text_bias + nl->n_value;
337    /*} else if ((nl->n_type & N_TYPE) == N_ABS) {
338         GrP fixme don't ignore absolute symbols?
339         sym_addr = nl->n_value; */
340      } else {
341         continue;
342      }
343
344      if (di->trace_symtab)
345         VG_(printf)("nlist raw: avma %010lx  %s\n",
346                     sym_addr, o_strtab + nl->n_un.n_strx );
347
348      /* If no part of the symbol falls within the mapped range,
349         ignore it. */
350      if (sym_addr <= di->text_avma
351          || sym_addr >= di->text_avma+di->text_size) {
352         continue;
353      }
354
355      /* skip names which point outside the string table;
356         following these risks segfaulting Valgrind */
357      name = o_strtab + nl->n_un.n_strx;
358      if (name < o_strtab || name >= o_strtab + o_strtab_sz)
359         continue;
360
361      /* skip nameless symbols; these appear to be common, but
362         useless */
363      if (*name == 0)
364         continue;
365
366      disym.addr      = sym_addr;
367      disym.tocptr    = 0;
368      disym.pri_name  = ML_(addStr)(di, name, -1);
369      disym.sec_names = NULL;
370      disym.size      = // let canonicalize fix it
371                        di->text_avma+di->text_size - sym_addr;
372      disym.isText    = True;
373      disym.isIFunc   = False;
374      // Lots of user function names get prepended with an underscore.  Eg. the
375      // function 'f' becomes the symbol '_f'.  And the "below main"
376      // function is called "start".  So we skip the leading underscore, and
377      // if we see 'start' and --show-below-main=no, we rename it as
378      // "start_according_to_valgrind", which makes it easy to spot later
379      // and display as "(below main)".
380      if (disym.pri_name[0] == '_') {
381         disym.pri_name++;
382      }
383      else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
384         if (s_a_t_v == NULL)
385            s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
386         vg_assert(s_a_t_v);
387         disym.pri_name = s_a_t_v;
388      }
389
390      vg_assert(disym.pri_name);
391      VG_(addToXA)( syms, &disym );
392   }
393}
394
395
396/* Compare DiSyms by their start address, and for equal addresses, use
397   the primary name as a secondary sort key. */
398static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 )
399{
400   DiSym* s1 = (DiSym*)v1;
401   DiSym* s2 = (DiSym*)v2;
402   if (s1->addr < s2->addr) return -1;
403   if (s1->addr > s2->addr) return 1;
404   return VG_(strcmp)(s1->pri_name, s2->pri_name);
405}
406
407/* 'cand' is a bunch of candidate symbols obtained by reading
408   nlist-style symbol table entries.  Their ends may overlap, so sort
409   them and truncate them accordingly.  The code in this routine is
410   copied almost verbatim from read_symbol_table() in readxcoff.c. */
411static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
412                                Bool trace_symtab )
413{
414   Word nsyms, i, j, k, m;
415
416   nsyms = VG_(sizeXA)(syms);
417
418   VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
419   VG_(sortXA)(syms);
420
421   /* We only know for sure the start addresses (actual VMAs) of
422      symbols, and an overestimation of their end addresses.  So sort
423      by start address, then clip each symbol so that its end address
424      does not overlap with the next one along.
425
426      There is a small refinement: if a group of symbols have the same
427      address, treat them as a group: find the next symbol along that
428      has a higher start address, and clip all of the group
429      accordingly.  This clips the group as a whole so as not to
430      overlap following symbols.  This leaves prefersym() in
431      storage.c, which is not nlist-specific, to later decide which of
432      the symbols in the group to keep.
433
434      Another refinement is that we need to get rid of symbols which,
435      after clipping, have identical starts, ends, and names.  So the
436      sorting uses the name as a secondary key.
437   */
438
439   for (i = 0; i < nsyms; i++) {
440      for (k = i+1;
441           k < nsyms
442             && ((DiSym*)VG_(indexXA)(syms,i))->addr
443                 == ((DiSym*)VG_(indexXA)(syms,k))->addr;
444           k++)
445         ;
446      /* So now [i .. k-1] is a group all with the same start address.
447         Clip their ending addresses so they don't overlap [k].  In
448         the normal case (no overlaps), k == i+1. */
449      if (k < nsyms) {
450         DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
451         for (m = i; m < k; m++) {
452            DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
453            vg_assert(here->addr < next->addr);
454            if (here->addr + here->size > next->addr)
455               here->size = next->addr - here->addr;
456         }
457      }
458      i = k-1;
459      vg_assert(i <= nsyms);
460   }
461
462   j = 0;
463   if (nsyms > 0) {
464      j = 1;
465      for (i = 1; i < nsyms; i++) {
466         DiSym *s_j1, *s_j, *s_i;
467         vg_assert(j <= i);
468         s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
469         s_j  = (DiSym*)VG_(indexXA)(syms, j);
470         s_i  = (DiSym*)VG_(indexXA)(syms, i);
471         if (s_i->addr != s_j1->addr
472             || s_i->size != s_j1->size
473             || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
474            *s_j = *s_i;
475            j++;
476         } else {
477            if (trace_symtab)
478               VG_(printf)("nlist cleanup: dump duplicate avma %010lx  %s\n",
479                           s_i->addr, s_i->pri_name );
480         }
481      }
482   }
483   vg_assert(j >= 0 && j <= nsyms);
484   VG_(dropTailXA)(syms, nsyms - j);
485}
486
487
488/*------------------------------------------------------------*/
489/*---                                                      ---*/
490/*--- Mach-O top-level processing                          ---*/
491/*---                                                      ---*/
492/*------------------------------------------------------------*/
493
494#if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
495#define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
496#endif
497
498
499static Bool file_exists_p(const Char *path)
500{
501   struct vg_stat sbuf;
502   SysRes res = VG_(stat)(path, &sbuf);
503   return sr_isError(res) ? False : True;
504}
505
506
507/* Search for an existing dSYM file as a possible separate debug file.
508   Adapted from gdb. */
509static Char *
510find_separate_debug_file (const Char *executable_name)
511{
512   Char *basename_str;
513   Char *dot_ptr;
514   Char *slash_ptr;
515   Char *dsymfile;
516
517   /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
518      will end up with an infinite loop where after we add a dSYM symbol file,
519      it will then enter this function asking if there is a debug file for the
520      dSYM file itself.  */
521   if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
522   {
523      /* Check for the existence of a .dSYM file for a given executable.  */
524      basename_str = VG_(basename) (executable_name);
525      dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
526                    VG_(strlen) (executable_name)
527                    + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
528                    + VG_(strlen) (basename_str)
529                    + 1
530                 );
531
532      /* First try for the dSYM in the same directory as the original file.  */
533      VG_(strcpy) (dsymfile, executable_name);
534      VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
535      VG_(strcat) (dsymfile, basename_str);
536
537      if (file_exists_p (dsymfile))
538         return dsymfile;
539
540      /* Now search for any parent directory that has a '.' in it so we can find
541         Mac OS X applications, bundles, plugins, and any other kinds of files.
542         Mac OS X application bundles wil have their program in
543         "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
544         ".bundle" or ".plugin" for other types of bundles).  So we look for any
545         prior '.' character and try appending the apple dSYM extension and
546         subdirectory and see if we find an existing dSYM file (in the above
547         MyApp example the dSYM would be at either:
548         "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
549         "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp".  */
550      VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
551      while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
552      {
553         /* Find the directory delimiter that follows the '.' character since
554            we now look for a .dSYM that follows any bundle extension.  */
555         slash_ptr = VG_(strchr) (dot_ptr, '/');
556         if (slash_ptr)
557         {
558             /* NULL terminate the string at the '/' character and append
559                the path down to the dSYM file.  */
560            *slash_ptr = '\0';
561            VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
562            VG_(strcat) (slash_ptr, basename_str);
563            if (file_exists_p (dsymfile))
564               return dsymfile;
565         }
566
567         /* NULL terminate the string at the '.' character and append
568            the path down to the dSYM file.  */
569         *dot_ptr = '\0';
570         VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
571         VG_(strcat) (dot_ptr, basename_str);
572         if (file_exists_p (dsymfile))
573            return dsymfile;
574
575         /* NULL terminate the string at the '.' locatated by the strrchr()
576            function again.  */
577         *dot_ptr = '\0';
578
579         /* We found a previous extension '.' character and did not find a
580            dSYM file so now find previous directory delimiter so we don't
581            try multiple times on a file name that may have a version number
582            in it such as "/some/path/MyApp.6.0.4.app".  */
583         slash_ptr = VG_(strrchr) (dsymfile, '/');
584         if (!slash_ptr)
585            break;
586         /* NULL terminate the string at the previous directory character
587            and search again.  */
588         *slash_ptr = '\0';
589      }
590   }
591
592   return NULL;
593}
594
595
596static UChar *getsectdata(UChar* base, SizeT size,
597                          Char *segname, Char *sectname,
598                          /*OUT*/Word *sect_size)
599{
600   struct MACH_HEADER *mh = (struct MACH_HEADER *)base;
601   struct load_command *cmd;
602   Int c;
603
604   for (c = 0, cmd = (struct load_command *)(mh+1);
605        c < mh->ncmds;
606        c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd))
607   {
608      if (cmd->cmd == LC_SEGMENT_CMD) {
609         struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
610         if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) {
611            struct SECTION *sects = (struct SECTION *)(seg+1);
612            Int s;
613            for (s = 0; s < seg->nsects; s++) {
614               if (0 == VG_(strncmp(sects[s].sectname, sectname,
615                                    sizeof(sects[s].sectname))))
616               {
617                  if (sect_size) *sect_size = sects[s].size;
618                  return (UChar *)(base + sects[s].offset);
619               }
620            }
621         }
622      }
623   }
624
625   if (sect_size) *sect_size = 0;
626   return 0;
627}
628
629
630/* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */
631static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid )
632{
633   Word   i;
634   UChar* img = (UChar*)imgA;
635   UChar  first = uuid[0];
636   if (n_img < 16)
637      return False;
638   for (i = 0; i < n_img-16; i++) {
639      if (img[i] != first)
640         continue;
641      if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 ))
642         return True;
643   }
644   return False;
645}
646
647
648/* Heuristic kludge: return True if this looks like an installed
649   standard library; hence we shouldn't consider automagically running
650   dsymutil on it. */
651static Bool is_systemish_library_name ( UChar* name )
652{
653   vg_assert(name);
654   if (0 == VG_(strncasecmp)(name, "/usr/", 5)
655       || 0 == VG_(strncasecmp)(name, "/bin/", 5)
656       || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
657       || 0 == VG_(strncasecmp)(name, "/opt/", 5)
658       || 0 == VG_(strncasecmp)(name, "/sw/", 4)
659       || 0 == VG_(strncasecmp)(name, "/System/", 8)
660       || 0 == VG_(strncasecmp)(name, "/Library/", 9)
661       || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
662      return True;
663   } else {
664      return False;
665   }
666}
667
668
669Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
670{
671   struct symtab_command *symcmd = NULL;
672   struct dysymtab_command *dysymcmd = NULL;
673   HChar* dsymfilename = NULL;
674   Bool have_uuid = False;
675   UChar uuid[16];
676   ImageInfo ii;  /* main file */
677   ImageInfo iid; /* auxiliary .dSYM file */
678   Bool ok;
679   Word i;
680   struct _DebugInfoMapping* rx_map = NULL;
681   struct _DebugInfoMapping* rw_map = NULL;
682
683   /* mmap the object file to look for di->soname and di->text_bias
684      and uuid and nlist and STABS */
685
686   /* This should be ensured by our caller (that we're in the accept
687      state). */
688   vg_assert(di->fsm.have_rx_map);
689   vg_assert(di->fsm.have_rw_map);
690
691   for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
692      struct _DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
693      if (map->rx && !rx_map)
694         rx_map = map;
695      if (map->rw && !rw_map)
696         rw_map = map;
697      if (rx_map && rw_map)
698         break;
699   }
700   vg_assert(rx_map);
701   vg_assert(rw_map);
702
703   if (VG_(clo_verbosity) > 1)
704      VG_(message)(Vg_DebugMsg,
705                   "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
706                   rx_map->avma, rw_map->avma );
707
708   VG_(memset)(&ii,   0, sizeof(ii));
709   VG_(memset)(&iid,  0, sizeof(iid));
710   VG_(memset)(&uuid, 0, sizeof(uuid));
711
712   ok = map_image_aboard( di, &ii, di->fsm.filename );
713   if (!ok) goto fail;
714
715   vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
716
717   /* Poke around in the Mach-O header, to find some important
718      stuff. */
719   // Find LC_SYMTAB and LC_DYSYMTAB, if present.
720   // Read di->soname from LC_ID_DYLIB if present,
721   //    or from LC_ID_DYLINKER if present,
722   //    or use "NONE".
723   // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
724   // Get uuid for later dsym search
725
726   di->text_bias = 0;
727
728   { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img;
729      struct load_command *cmd;
730      Int c;
731
732      for (c = 0, cmd = (struct load_command *)(mh+1);
733           c < mh->ncmds;
734           c++, cmd = (struct load_command *)(cmd->cmdsize
735                                              + (unsigned long)cmd)) {
736         if (cmd->cmd == LC_SYMTAB) {
737            symcmd = (struct symtab_command *)cmd;
738         }
739         else if (cmd->cmd == LC_DYSYMTAB) {
740            dysymcmd = (struct dysymtab_command *)cmd;
741         }
742         else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) {
743            // GrP fixme bundle?
744            struct dylib_command *dcmd = (struct dylib_command *)cmd;
745            UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd;
746            UChar *soname = VG_(strrchr)(dylibname, '/');
747            if (!soname) soname = dylibname;
748            else soname++;
749            di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
750                                           soname);
751         }
752         else if (cmd->cmd==LC_ID_DYLINKER  &&  mh->filetype==MH_DYLINKER) {
753            struct dylinker_command *dcmd = (struct dylinker_command *)cmd;
754            UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd;
755            UChar *soname = VG_(strrchr)(dylinkername, '/');
756            if (!soname) soname = dylinkername;
757            else soname++;
758            di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
759                                           soname);
760         }
761
762         // A comment from Julian about why varinfo[35] fail:
763         //
764         // My impression is, from comparing the output of otool -l for these
765         // executables with the logic in ML_(read_macho_debug_info),
766         // specifically the part that begins "else if (cmd->cmd ==
767         // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
768         // to work ok for text symbols.  In particular, it appears to assume
769         // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
770         // "struct SEGMENT_COMMAND" inside it is going to contain the info we
771         // need.  However, otool -l shows, and also the Apple docs state,
772         // that a struct load_command may contain an arbitrary number of
773         // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
774         // snarf the first.  But I'm not sure about this.
775         //
776         // The "Try for __DATA" block below simply adds acquisition of data
777         // svma/bias values using the same assumption.  It also needs
778         // (probably) to deal with bss sections, but I don't understand how
779         // this all ties together really, so it requires further study.
780         //
781         // If you can get your head around the relationship between MachO
782         // segments, sections and load commands, this might be relatively
783         // easy to fix properly.
784         //
785         // Basically we need to come up with plausible numbers for di->
786         // {text,data,bss}_{avma,svma}, from which the _bias numbers are
787         // then trivially derived.  Then I think the debuginfo reader should
788         // work pretty well.
789         else if (cmd->cmd == LC_SEGMENT_CMD) {
790            struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
791            /* Try for __TEXT */
792            if (!di->text_present
793                && 0 == VG_(strcmp)(seg->segname, "__TEXT")
794                /* DDD: is the  next line a kludge? -- JRS */
795                && seg->fileoff == 0 && seg->filesize != 0) {
796               di->text_present = True;
797               di->text_svma = (Addr)seg->vmaddr;
798               di->text_avma = rx_map->avma;
799               di->text_size = seg->vmsize;
800               di->text_bias = di->text_avma - di->text_svma;
801               /* Make the _debug_ values be the same as the
802                  svma/bias for the primary object, since there is
803                  no secondary (debuginfo) object, but nevertheless
804                  downstream biasing of Dwarf3 relies on the
805                  _debug_ values. */
806               di->text_debug_svma = di->text_svma;
807               di->text_debug_bias = di->text_bias;
808            }
809            /* Try for __DATA */
810            if (!di->data_present
811                && 0 == VG_(strcmp)(seg->segname, "__DATA")
812                /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) {
813               di->data_present = True;
814               di->data_svma = (Addr)seg->vmaddr;
815               di->data_avma = rw_map->avma;
816               di->data_size = seg->vmsize;
817               di->data_bias = di->data_avma - di->data_svma;
818               di->data_debug_svma = di->data_svma;
819               di->data_debug_bias = di->data_bias;
820            }
821         }
822         else if (cmd->cmd == LC_UUID) {
823             struct uuid_command *uuid_cmd = (struct uuid_command *)cmd;
824             VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid));
825             have_uuid = True;
826         }
827      }
828   }
829
830   if (!di->soname) {
831      di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
832   }
833
834   if (di->trace_symtab) {
835      VG_(printf)("\n");
836      VG_(printf)("SONAME = %s\n", di->soname);
837      VG_(printf)("\n");
838   }
839
840   /* Now we have the base object to hand.  Read symbols from it. */
841
842   if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) {
843
844      /* Read nlist symbol table */
845      struct NLIST *syms;
846      UChar *strs;
847      XArray* /* DiSym */ candSyms = NULL;
848      Word nCandSyms;
849
850      if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize
851          || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms
852                                                 * sizeof(struct NLIST)) {
853         ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
854         goto fail;
855      }
856      if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms
857          || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) {
858         ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
859         goto fail;
860      }
861
862      syms = (struct NLIST *)(ii.macho_img + symcmd->symoff);
863      strs = (UChar *)(ii.macho_img + symcmd->stroff);
864
865      if (VG_(clo_verbosity) > 1)
866         VG_(message)(Vg_DebugMsg,
867            "   reading syms   from primary file (%d %d)\n",
868            dysymcmd->nextdefsym, dysymcmd->nlocalsym );
869
870      /* Read candidate symbols into 'candSyms', so we can truncate
871         overlapping ends and generally tidy up, before presenting
872         them to ML_(addSym). */
873      candSyms = VG_(newXA)(
874                    ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
875                    ML_(dinfo_free), sizeof(DiSym)
876                 );
877      vg_assert(candSyms);
878
879      // extern symbols
880      read_symtab(candSyms,
881                  di,
882                  syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym,
883                  strs, symcmd->strsize);
884      // static and private_extern symbols
885      read_symtab(candSyms,
886                  di,
887                  syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym,
888                  strs, symcmd->strsize);
889
890      /* tidy up the cand syms -- trim overlapping ends.  May resize
891         candSyms. */
892      tidy_up_cand_syms( candSyms, di->trace_symtab );
893
894      /* and finally present them to ML_(addSym) */
895      nCandSyms = VG_(sizeXA)( candSyms );
896      for (i = 0; i < nCandSyms; i++) {
897         DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
898         vg_assert(cand->pri_name != NULL);
899         vg_assert(cand->sec_names == NULL);
900         if (di->trace_symtab)
901            VG_(printf)("nlist final: acquire  avma %010lx-%010lx  %s\n",
902                        cand->addr, cand->addr + cand->size - 1,
903                        cand->pri_name );
904         ML_(addSym)( di, cand );
905      }
906      VG_(deleteXA)( candSyms );
907   }
908
909   /* If there's no UUID in the primary, don't even bother to try and
910      read any DWARF, since we won't be able to verify it matches.
911      Our policy is not to load debug info unless we can verify that
912      it matches the primary.  Just declare success at this point.
913      And don't complain to the user, since that would cause us to
914      complain on objects compiled without -g.  (Some versions of
915      XCode are observed to omit a UUID entry for object linked(?)
916      without -g.  Others don't appear to omit it.) */
917   if (!have_uuid)
918      goto success;
919
920   /* mmap the dSYM file to look for DWARF debug info.  If successful,
921      use the .macho_img and .macho_img_szB in iid. */
922
923   dsymfilename = find_separate_debug_file( di->fsm.filename );
924
925   /* Try to load it. */
926   if (dsymfilename) {
927      Bool valid;
928
929      if (VG_(clo_verbosity) > 1)
930         VG_(message)(Vg_DebugMsg, "   dSYM= %s\n", dsymfilename);
931
932      ok = map_image_aboard( di, &iid, dsymfilename );
933      if (!ok) goto fail;
934
935      /* check it has the right uuid. */
936      vg_assert(have_uuid);
937      valid = iid.macho_img && iid.macho_img_szB > 0
938              && check_uuid_matches( (Addr)iid.macho_img,
939                                     iid.macho_img_szB, uuid );
940      if (valid)
941         goto read_the_dwarf;
942
943      if (VG_(clo_verbosity) > 1)
944         VG_(message)(Vg_DebugMsg, "   dSYM does not have "
945                                   "correct UUID (out of date?)\n");
946   }
947
948   /* There was no dsym file, or it doesn't match.  We'll have to try
949      regenerating it, unless --dsymutil=no, in which case just complain
950      instead. */
951
952   /* If this looks like a lib that we shouldn't run dsymutil on, just
953      give up.  (possible reasons: is system lib, or in /usr etc, or
954      the dsym dir would not be writable by the user, or we're running
955      as root) */
956   vg_assert(di->fsm.filename);
957   if (is_systemish_library_name(di->fsm.filename))
958      goto success;
959
960   if (!VG_(clo_dsymutil)) {
961      if (VG_(clo_verbosity) == 1) {
962         VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
963      }
964      if (VG_(clo_verbosity) > 0)
965         VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
966                      "--dsymutil=yes\n",
967                      VG_(clo_verbosity) > 1 ? "   " : "",
968                      dsymfilename ? "has wrong UUID" : "is missing");
969      goto success;
970   }
971
972   /* Run dsymutil */
973
974   { Int r;
975     HChar* dsymutil = "/usr/bin/dsymutil ";
976     HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
977                                     VG_(strlen)(dsymutil)
978                                     + VG_(strlen)(di->fsm.filename)
979                                     + 32 /* misc */ );
980     VG_(strcpy)(cmd, dsymutil);
981     if (0) VG_(strcat)(cmd, "--verbose ");
982     VG_(strcat)(cmd, "\"");
983     VG_(strcat)(cmd, di->fsm.filename);
984     VG_(strcat)(cmd, "\"");
985     VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
986     r = VG_(system)( cmd );
987     if (r)
988        VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
989     ML_(dinfo_free)(cmd);
990     dsymfilename = find_separate_debug_file(di->fsm.filename);
991   }
992
993   /* Try again to load it. */
994   if (dsymfilename) {
995      Bool valid;
996
997      if (VG_(clo_verbosity) > 1)
998         VG_(message)(Vg_DebugMsg, "   dsyms= %s\n", dsymfilename);
999
1000      ok = map_image_aboard( di, &iid, dsymfilename );
1001      if (!ok) goto fail;
1002
1003      /* check it has the right uuid. */
1004      vg_assert(have_uuid);
1005      valid = iid.macho_img && iid.macho_img_szB > 0
1006              && check_uuid_matches( (Addr)iid.macho_img,
1007                                     iid.macho_img_szB, uuid );
1008      if (!valid) {
1009         if (VG_(clo_verbosity) > 0) {
1010            VG_(message)(Vg_DebugMsg,
1011               "WARNING: did not find expected UUID %02X%02X%02X%02X"
1012               "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1013               " in dSYM dir\n",
1014               (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1015               (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1016               (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1017               (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1018               (UInt)uuid[14], (UInt)uuid[15] );
1019            VG_(message)(Vg_DebugMsg,
1020                         "WARNING: for %s\n", di->fsm.filename);
1021         }
1022         unmap_image( &iid );
1023         /* unmap_image zeroes the fields, so the following test makes
1024            sense. */
1025         goto fail;
1026      }
1027   }
1028
1029   /* Right.  Finally we have our best try at the dwarf image, so go
1030      on to reading stuff out of it. */
1031
1032  read_the_dwarf:
1033   if (iid.macho_img && iid.macho_img_szB > 0) {
1034      UChar* debug_info_img = NULL;
1035      Word   debug_info_sz;
1036      UChar* debug_abbv_img;
1037      Word   debug_abbv_sz;
1038      UChar* debug_line_img;
1039      Word   debug_line_sz;
1040      UChar* debug_str_img;
1041      Word   debug_str_sz;
1042      UChar* debug_ranges_img;
1043      Word   debug_ranges_sz;
1044      UChar* debug_loc_img;
1045      Word   debug_loc_sz;
1046      UChar* debug_name_img;
1047      Word   debug_name_sz;
1048
1049      debug_info_img =
1050          getsectdata(iid.macho_img, iid.macho_img_szB,
1051                      "__DWARF", "__debug_info", &debug_info_sz);
1052      debug_abbv_img =
1053          getsectdata(iid.macho_img, iid.macho_img_szB,
1054                      "__DWARF", "__debug_abbrev", &debug_abbv_sz);
1055      debug_line_img =
1056          getsectdata(iid.macho_img, iid.macho_img_szB,
1057                      "__DWARF", "__debug_line", &debug_line_sz);
1058      debug_str_img =
1059          getsectdata(iid.macho_img, iid.macho_img_szB,
1060                      "__DWARF", "__debug_str", &debug_str_sz);
1061      debug_ranges_img =
1062          getsectdata(iid.macho_img, iid.macho_img_szB,
1063                      "__DWARF", "__debug_ranges", &debug_ranges_sz);
1064      debug_loc_img =
1065          getsectdata(iid.macho_img, iid.macho_img_szB,
1066                      "__DWARF", "__debug_loc", &debug_loc_sz);
1067      debug_name_img =
1068          getsectdata(iid.macho_img, iid.macho_img_szB,
1069                      "__DWARF", "__debug_pubnames", &debug_name_sz);
1070
1071      if (debug_info_img) {
1072         if (VG_(clo_verbosity) > 1) {
1073            if (0)
1074            VG_(message)(Vg_DebugMsg,
1075                         "Reading dwarf3 for %s (%#lx) from %s"
1076                         " (%ld %ld %ld %ld %ld %ld)\n",
1077                         di->fsm.filename, di->text_avma, dsymfilename,
1078                         debug_info_sz, debug_abbv_sz, debug_line_sz,
1079                         debug_str_sz, debug_ranges_sz, debug_loc_sz
1080                         );
1081            VG_(message)(Vg_DebugMsg,
1082               "   reading dwarf3 from dsyms file\n");
1083         }
1084         /* The old reader: line numbers and unwind info only */
1085         ML_(read_debuginfo_dwarf3) ( di,
1086                                      debug_info_img, debug_info_sz,
1087				      NULL,           0,
1088                                      debug_abbv_img, debug_abbv_sz,
1089                                      debug_line_img, debug_line_sz,
1090                                      debug_str_img,  debug_str_sz,
1091                                      NULL, 0 /* ALT .debug_str */ );
1092
1093         /* The new reader: read the DIEs in .debug_info to acquire
1094            information on variable types and locations.  But only if
1095            the tool asks for it, or the user requests it on the
1096            command line. */
1097         if (VG_(needs).var_info /* the tool requires it */
1098             || VG_(clo_read_var_info) /* the user asked for it */) {
1099            ML_(new_dwarf3_reader)(
1100               di, debug_info_img,   debug_info_sz,
1101	           NULL,             0,
1102                   debug_abbv_img,   debug_abbv_sz,
1103                   debug_line_img,   debug_line_sz,
1104                   debug_str_img,    debug_str_sz,
1105                   debug_ranges_img, debug_ranges_sz,
1106                   debug_loc_img,    debug_loc_sz,
1107                   NULL, 0, /* ALT .debug_info */
1108                   NULL, 0, /* ALT .debug_abbv */
1109                   NULL, 0, /* ALT .debug_line */
1110                   NULL, 0  /* ALT .debug_str */
1111            );
1112         }
1113      }
1114   }
1115
1116   if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1117
1118  success:
1119   if (ii.img)
1120      unmap_image(&ii);
1121   if (iid.img)
1122      unmap_image(&iid);
1123   return True;
1124
1125   /* NOTREACHED */
1126
1127  fail:
1128   ML_(symerr)(di, True, "Error reading Mach-O object.");
1129   if (ii.img)
1130      unmap_image(&ii);
1131   if (iid.img)
1132      unmap_image(&iid);
1133   return False;
1134}
1135
1136#endif // defined(VGO_darwin)
1137
1138/*--------------------------------------------------------------------*/
1139/*--- end                                                          ---*/
1140/*--------------------------------------------------------------------*/
1141