readmacho.c revision d935068fc7b53c8a826b3436cdfccd5b7d446903
1
2/*--------------------------------------------------------------------*/
3/*--- Reading of syms & debug info from Mach-O files.              ---*/
4/*---                                                  readmacho.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2005-2011 Apple Inc.
12      Greg Parker gparker@apple.com
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#if defined(VGO_darwin)
33
34#include "pub_core_basics.h"
35#include "pub_core_vki.h"
36#include "pub_core_libcbase.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_libcassert.h"
39#include "pub_core_libcfile.h"
40#include "pub_core_libcproc.h"
41#include "pub_core_aspacemgr.h"    /* for mmaping debuginfo files */
42#include "pub_core_machine.h"      /* VG_ELF_CLASS */
43#include "pub_core_options.h"
44#include "pub_core_oset.h"
45#include "pub_core_tooliface.h"    /* VG_(needs) */
46#include "pub_core_xarray.h"
47#include "pub_core_clientstate.h"
48#include "pub_core_debuginfo.h"
49
50#include "priv_d3basics.h"
51#include "priv_misc.h"
52#include "priv_tytypes.h"
53#include "priv_storage.h"
54#include "priv_readmacho.h"
55#include "priv_readdwarf.h"
56#include "priv_readdwarf3.h"
57#include "priv_readstabs.h"
58
59/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60#include <mach-o/loader.h>
61#include <mach-o/nlist.h>
62#include <mach-o/fat.h>
63/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
64
65#if VG_WORDSIZE == 4
66# define MAGIC MH_MAGIC
67# define MACH_HEADER mach_header
68# define LC_SEGMENT_CMD LC_SEGMENT
69# define SEGMENT_COMMAND segment_command
70# define SECTION section
71# define NLIST nlist
72#else
73# define MAGIC MH_MAGIC_64
74# define MACH_HEADER mach_header_64
75# define LC_SEGMENT_CMD LC_SEGMENT_64
76# define SEGMENT_COMMAND segment_command_64
77# define SECTION section_64
78# define NLIST nlist_64
79#endif
80
81
82/*------------------------------------------------------------*/
83/*---                                                      ---*/
84/*--- Mach-O file mapping/unmapping helpers                ---*/
85/*---                                                      ---*/
86/*------------------------------------------------------------*/
87
88typedef
89   struct {
90      /* These two describe the entire mapped-in ("primary") image,
91         fat headers, kitchen sink, whatnot: the entire file.  The
92         image is mapped into img[0 .. img_szB-1]. */
93      UChar* img;
94      SizeT  img_szB;
95      /* These two describe the Mach-O object of interest, which is
96         presumably somewhere inside the primary image.
97         map_image_aboard() below, which generates this info, will
98         carefully check that the macho_ fields denote a section of
99         memory that falls entirely inside img[0 .. img_szB-1]. */
100      UChar* macho_img;
101      SizeT  macho_img_szB;
102   }
103   ImageInfo;
104
105
106Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
107{
108   /* (JRS: the Mach-O headers might not be in this mapped data,
109      because we only mapped a page for this initial check,
110      or at least not very much, and what's at the start of the file
111      is in general a so-called fat header.  The Mach-O object we're
112      interested in could be arbitrarily far along the image, and so
113      we can't assume its header will fall within this page.) */
114
115   /* But we can say that either it's a fat object, in which case it
116      begins with a fat header, or it's unadorned Mach-O, in which
117      case it starts with a normal header.  At least do what checks we
118      can to establish whether or not we're looking at something
119      sane. */
120
121   const struct fat_header*  fh_be = buf;
122   const struct MACH_HEADER* mh    = buf;
123
124   vg_assert(buf);
125   if (szB < sizeof(struct fat_header))
126      return False;
127   if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
128      return True;
129
130   if (szB < sizeof(struct MACH_HEADER))
131      return False;
132   if (mh->magic == MAGIC)
133      return True;
134
135   return False;
136}
137
138
139/* Unmap an image mapped in by map_image_aboard. */
140static void unmap_image ( /*MOD*/ImageInfo* ii )
141{
142   SysRes sres;
143   vg_assert(ii->img);
144   vg_assert(ii->img_szB > 0);
145   sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB );
146   /* Do we care if this fails?  I suppose so; it would indicate
147      some fairly serious snafu with the mapping of the file. */
148   vg_assert( !sr_isError(sres) );
149   VG_(memset)(ii, 0, sizeof(*ii));
150}
151
152
153/* Map a given fat or thin object aboard, find the thin part if
154   necessary, do some checks, and write details of both the fat and
155   thin parts into *ii.  Returns False (and leaves the file unmapped)
156   on failure.  Guarantees to return pointers to a valid(ish) Mach-O
157   image if it succeeds. */
158static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */
159                               /*OUT*/ImageInfo* ii, UChar* filename )
160{
161   VG_(memset)(ii, 0, sizeof(*ii));
162
163   /* First off, try to map the thing in. */
164   { SizeT  size;
165     SysRes fd, sres;
166     struct vg_stat stat_buf;
167
168     fd = VG_(stat)(filename, &stat_buf);
169     if (sr_isError(fd)) {
170        ML_(symerr)(di, True, "Can't stat image (to determine its size)?!");
171        return False;
172     }
173     size = stat_buf.size;
174
175     fd = VG_(open)(filename, VKI_O_RDONLY, 0);
176     if (sr_isError(fd)) {
177       ML_(symerr)(di, True, "Can't open image to read symbols?!");
178        return False;
179     }
180
181     sres = VG_(am_mmap_file_float_valgrind)
182               ( size, VKI_PROT_READ, sr_Res(fd), 0 );
183     if (sr_isError(sres)) {
184        ML_(symerr)(di, True, "Can't mmap image to read symbols?!");
185        return False;
186     }
187
188     VG_(close)(sr_Res(fd));
189
190     ii->img     = (UChar*)sr_Res(sres);
191     ii->img_szB = size;
192   }
193
194   /* Now it's mapped in and we have .img and .img_szB set.  Look for
195      the embedded Mach-O object.  If not findable, unmap and fail. */
196   { struct fat_header*  fh_be;
197     struct fat_header   fh;
198     struct MACH_HEADER* mh;
199
200     // Assume initially that we have a thin image, and update
201     // these if it turns out to be fat.
202     ii->macho_img     = ii->img;
203     ii->macho_img_szB = ii->img_szB;
204
205     // Check for fat header.
206     if (ii->img_szB < sizeof(struct fat_header)) {
207        ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
208        goto unmap_and_fail;
209     }
210
211     // Fat header is always BIG-ENDIAN
212     fh_be = (struct fat_header *)ii->img;
213     fh.magic = VG_(ntohl)(fh_be->magic);
214     fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch);
215     if (fh.magic == FAT_MAGIC) {
216        // Look for a good architecture.
217        struct fat_arch *arch_be;
218        struct fat_arch arch;
219        Int f;
220        if (ii->img_szB < sizeof(struct fat_header)
221                          + fh.nfat_arch * sizeof(struct fat_arch)) {
222           ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
223           goto unmap_and_fail;
224        }
225        for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
226             f < fh.nfat_arch;
227             f++, arch_be++) {
228           Int cputype;
229#          if defined(VGA_ppc)
230           cputype = CPU_TYPE_POWERPC;
231#          elif defined(VGA_ppc64)
232           cputype = CPU_TYPE_POWERPC64;
233#          elif defined(VGA_x86)
234           cputype = CPU_TYPE_X86;
235#          elif defined(VGA_amd64)
236           cputype = CPU_TYPE_X86_64;
237#          else
238#            error "unknown architecture"
239#          endif
240           arch.cputype    = VG_(ntohl)(arch_be->cputype);
241           arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype);
242           arch.offset     = VG_(ntohl)(arch_be->offset);
243           arch.size       = VG_(ntohl)(arch_be->size);
244           if (arch.cputype == cputype) {
245              if (ii->img_szB < arch.offset + arch.size) {
246                 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
247                 goto unmap_and_fail;
248              }
249              ii->macho_img     = ii->img + arch.offset;
250              ii->macho_img_szB = arch.size;
251              break;
252           }
253        }
254        if (f == fh.nfat_arch) {
255           ML_(symerr)(di, True,
256                       "No acceptable architecture found in fat file.");
257           goto unmap_and_fail;
258        }
259     }
260
261     /* Sanity check what we found. */
262
263     /* assured by logic above */
264     vg_assert(ii->img_szB >= sizeof(struct fat_header));
265
266     if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) {
267        ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
268        goto unmap_and_fail;
269     }
270
271     if (ii->macho_img_szB > ii->img_szB) {
272        ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
273        goto unmap_and_fail;
274     }
275
276     if (ii->macho_img >= ii->img
277         && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
278        /* thin entirely within fat, as expected */
279     } else {
280        ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
281        goto unmap_and_fail;
282     }
283
284     mh = (struct MACH_HEADER *)ii->macho_img;
285     if (mh->magic != MAGIC) {
286        ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
287        goto unmap_and_fail;
288     }
289
290     if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) {
291        ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
292        goto unmap_and_fail;
293     }
294   }
295
296   vg_assert(ii->img);
297   vg_assert(ii->macho_img);
298   vg_assert(ii->img_szB > 0);
299   vg_assert(ii->macho_img_szB > 0);
300   vg_assert(ii->macho_img >= ii->img);
301   vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
302   return True;  /* success */
303   /*NOTREACHED*/
304
305  unmap_and_fail:
306   unmap_image(ii);
307   return False; /* bah! */
308}
309
310
311/*------------------------------------------------------------*/
312/*---                                                      ---*/
313/*--- Mach-O symbol table reading                          ---*/
314/*---                                                      ---*/
315/*------------------------------------------------------------*/
316
317/* Read a symbol table (nlist).  Add the resulting candidate symbols
318   to 'syms'; the caller will post-process them and hand them off to
319   ML_(addSym) itself. */
320static
321void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
322                  struct _DebugInfo* di,
323                  struct NLIST* o_symtab, UInt o_symtab_count,
324                  UChar*     o_strtab, UInt o_strtab_sz )
325{
326   Int    i;
327   Addr   sym_addr;
328   DiSym  disym;
329   UChar* name;
330
331   static UChar* s_a_t_v = NULL; /* do not make non-static */
332
333   for (i = 0; i < o_symtab_count; i++) {
334      struct NLIST *nl = o_symtab+i;
335      if ((nl->n_type & N_TYPE) == N_SECT) {
336         sym_addr = di->text_bias + nl->n_value;
337    /*} else if ((nl->n_type & N_TYPE) == N_ABS) {
338         GrP fixme don't ignore absolute symbols?
339         sym_addr = nl->n_value; */
340      } else {
341         continue;
342      }
343
344      if (di->trace_symtab)
345         VG_(printf)("nlist raw: avma %010lx  %s\n",
346                     sym_addr, o_strtab + nl->n_un.n_strx );
347
348      /* If no part of the symbol falls within the mapped range,
349         ignore it. */
350      if (sym_addr <= di->text_avma
351          || sym_addr >= di->text_avma+di->text_size) {
352         continue;
353      }
354
355      /* skip names which point outside the string table;
356         following these risks segfaulting Valgrind */
357      name = o_strtab + nl->n_un.n_strx;
358      if (name < o_strtab || name >= o_strtab + o_strtab_sz)
359         continue;
360
361      /* skip nameless symbols; these appear to be common, but
362         useless */
363      if (*name == 0)
364         continue;
365
366      disym.addr      = sym_addr;
367      disym.tocptr    = 0;
368      disym.pri_name  = ML_(addStr)(di, name, -1);
369      disym.sec_names = NULL;
370      disym.size      = // let canonicalize fix it
371                        di->text_avma+di->text_size - sym_addr;
372      disym.isText    = True;
373      disym.isIFunc   = False;
374      // Lots of user function names get prepended with an underscore.  Eg. the
375      // function 'f' becomes the symbol '_f'.  And the "below main"
376      // function is called "start".  So we skip the leading underscore, and
377      // if we see 'start' and --show-below-main=no, we rename it as
378      // "start_according_to_valgrind", which makes it easy to spot later
379      // and display as "(below main)".
380      if (disym.pri_name[0] == '_') {
381         disym.pri_name++;
382      }
383      else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
384         if (s_a_t_v == NULL)
385            s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
386         vg_assert(s_a_t_v);
387         disym.pri_name = s_a_t_v;
388      }
389
390      vg_assert(disym.pri_name);
391      VG_(addToXA)( syms, &disym );
392   }
393}
394
395
396/* Compare DiSyms by their start address, and for equal addresses, use
397   the primary name as a secondary sort key. */
398static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 )
399{
400   DiSym* s1 = (DiSym*)v1;
401   DiSym* s2 = (DiSym*)v2;
402   if (s1->addr < s2->addr) return -1;
403   if (s1->addr > s2->addr) return 1;
404   return VG_(strcmp)(s1->pri_name, s2->pri_name);
405}
406
407/* 'cand' is a bunch of candidate symbols obtained by reading
408   nlist-style symbol table entries.  Their ends may overlap, so sort
409   them and truncate them accordingly.  The code in this routine is
410   copied almost verbatim from read_symbol_table() in readxcoff.c. */
411static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
412                                Bool trace_symtab )
413{
414   Word nsyms, i, j, k, m;
415
416   nsyms = VG_(sizeXA)(syms);
417
418   VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
419   VG_(sortXA)(syms);
420
421   /* We only know for sure the start addresses (actual VMAs) of
422      symbols, and an overestimation of their end addresses.  So sort
423      by start address, then clip each symbol so that its end address
424      does not overlap with the next one along.
425
426      There is a small refinement: if a group of symbols have the same
427      address, treat them as a group: find the next symbol along that
428      has a higher start address, and clip all of the group
429      accordingly.  This clips the group as a whole so as not to
430      overlap following symbols.  This leaves prefersym() in
431      storage.c, which is not nlist-specific, to later decide which of
432      the symbols in the group to keep.
433
434      Another refinement is that we need to get rid of symbols which,
435      after clipping, have identical starts, ends, and names.  So the
436      sorting uses the name as a secondary key.
437   */
438
439   for (i = 0; i < nsyms; i++) {
440      for (k = i+1;
441           k < nsyms
442             && ((DiSym*)VG_(indexXA)(syms,i))->addr
443                 == ((DiSym*)VG_(indexXA)(syms,k))->addr;
444           k++)
445         ;
446      /* So now [i .. k-1] is a group all with the same start address.
447         Clip their ending addresses so they don't overlap [k].  In
448         the normal case (no overlaps), k == i+1. */
449      if (k < nsyms) {
450         DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
451         for (m = i; m < k; m++) {
452            DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
453            vg_assert(here->addr < next->addr);
454            if (here->addr + here->size > next->addr)
455               here->size = next->addr - here->addr;
456         }
457      }
458      i = k-1;
459      vg_assert(i <= nsyms);
460   }
461
462   j = 0;
463   if (nsyms > 0) {
464      j = 1;
465      for (i = 1; i < nsyms; i++) {
466         DiSym *s_j1, *s_j, *s_i;
467         vg_assert(j <= i);
468         s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
469         s_j  = (DiSym*)VG_(indexXA)(syms, j);
470         s_i  = (DiSym*)VG_(indexXA)(syms, i);
471         if (s_i->addr != s_j1->addr
472             || s_i->size != s_j1->size
473             || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
474            *s_j = *s_i;
475            j++;
476         } else {
477            if (trace_symtab)
478               VG_(printf)("nlist cleanup: dump duplicate avma %010lx  %s\n",
479                           s_i->addr, s_i->pri_name );
480         }
481      }
482   }
483   vg_assert(j >= 0 && j <= nsyms);
484   VG_(dropTailXA)(syms, nsyms - j);
485}
486
487
488/*------------------------------------------------------------*/
489/*---                                                      ---*/
490/*--- Mach-O top-level processing                          ---*/
491/*---                                                      ---*/
492/*------------------------------------------------------------*/
493
494#if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
495#define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
496#endif
497
498
499static Bool file_exists_p(const Char *path)
500{
501   struct vg_stat sbuf;
502   SysRes res = VG_(stat)(path, &sbuf);
503   return sr_isError(res) ? False : True;
504}
505
506
507/* Search for an existing dSYM file as a possible separate debug file.
508   Adapted from gdb. */
509static Char *
510find_separate_debug_file (const Char *executable_name)
511{
512   Char *basename_str;
513   Char *dot_ptr;
514   Char *slash_ptr;
515   Char *dsymfile;
516
517   /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
518      will end up with an infinite loop where after we add a dSYM symbol file,
519      it will then enter this function asking if there is a debug file for the
520      dSYM file itself.  */
521   if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
522   {
523      /* Check for the existence of a .dSYM file for a given executable.  */
524      basename_str = VG_(basename) (executable_name);
525      dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
526                    VG_(strlen) (executable_name)
527                    + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
528                    + VG_(strlen) (basename_str)
529                    + 1
530                 );
531
532      /* First try for the dSYM in the same directory as the original file.  */
533      VG_(strcpy) (dsymfile, executable_name);
534      VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
535      VG_(strcat) (dsymfile, basename_str);
536
537      if (file_exists_p (dsymfile))
538         return dsymfile;
539
540      /* Now search for any parent directory that has a '.' in it so we can find
541         Mac OS X applications, bundles, plugins, and any other kinds of files.
542         Mac OS X application bundles wil have their program in
543         "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
544         ".bundle" or ".plugin" for other types of bundles).  So we look for any
545         prior '.' character and try appending the apple dSYM extension and
546         subdirectory and see if we find an existing dSYM file (in the above
547         MyApp example the dSYM would be at either:
548         "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
549         "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp".  */
550      VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
551      while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
552      {
553         /* Find the directory delimiter that follows the '.' character since
554            we now look for a .dSYM that follows any bundle extension.  */
555         slash_ptr = VG_(strchr) (dot_ptr, '/');
556         if (slash_ptr)
557         {
558             /* NULL terminate the string at the '/' character and append
559                the path down to the dSYM file.  */
560            *slash_ptr = '\0';
561            VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
562            VG_(strcat) (slash_ptr, basename_str);
563            if (file_exists_p (dsymfile))
564               return dsymfile;
565         }
566
567         /* NULL terminate the string at the '.' character and append
568            the path down to the dSYM file.  */
569         *dot_ptr = '\0';
570         VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
571         VG_(strcat) (dot_ptr, basename_str);
572         if (file_exists_p (dsymfile))
573            return dsymfile;
574
575         /* NULL terminate the string at the '.' locatated by the strrchr()
576            function again.  */
577         *dot_ptr = '\0';
578
579         /* We found a previous extension '.' character and did not find a
580            dSYM file so now find previous directory delimiter so we don't
581            try multiple times on a file name that may have a version number
582            in it such as "/some/path/MyApp.6.0.4.app".  */
583         slash_ptr = VG_(strrchr) (dsymfile, '/');
584         if (!slash_ptr)
585            break;
586         /* NULL terminate the string at the previous directory character
587            and search again.  */
588         *slash_ptr = '\0';
589      }
590   }
591
592   return NULL;
593}
594
595
596static UChar *getsectdata(UChar* base, SizeT size,
597                          Char *segname, Char *sectname,
598                          /*OUT*/Word *sect_size)
599{
600   struct MACH_HEADER *mh = (struct MACH_HEADER *)base;
601   struct load_command *cmd;
602   Int c;
603
604   for (c = 0, cmd = (struct load_command *)(mh+1);
605        c < mh->ncmds;
606        c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd))
607   {
608      if (cmd->cmd == LC_SEGMENT_CMD) {
609         struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
610         if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) {
611            struct SECTION *sects = (struct SECTION *)(seg+1);
612            Int s;
613            for (s = 0; s < seg->nsects; s++) {
614               if (0 == VG_(strncmp(sects[s].sectname, sectname,
615                                    sizeof(sects[s].sectname))))
616               {
617                  if (sect_size) *sect_size = sects[s].size;
618                  return (UChar *)(base + sects[s].offset);
619               }
620            }
621         }
622      }
623   }
624
625   if (sect_size) *sect_size = 0;
626   return 0;
627}
628
629
630/* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */
631static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid )
632{
633   Word   i;
634   UChar* img = (UChar*)imgA;
635   UChar  first = uuid[0];
636   if (n_img < 16)
637      return False;
638   for (i = 0; i < n_img-16; i++) {
639      if (img[i] != first)
640         continue;
641      if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 ))
642         return True;
643   }
644   return False;
645}
646
647
648/* Heuristic kludge: return True if this looks like an installed
649   standard library; hence we shouldn't consider automagically running
650   dsymutil on it. */
651static Bool is_systemish_library_name ( UChar* name )
652{
653   vg_assert(name);
654   if (0 == VG_(strncasecmp)(name, "/usr/", 5)
655       || 0 == VG_(strncasecmp)(name, "/bin/", 5)
656       || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
657       || 0 == VG_(strncasecmp)(name, "/opt/", 5)
658       || 0 == VG_(strncasecmp)(name, "/sw/", 4)
659       || 0 == VG_(strncasecmp)(name, "/System/", 8)
660       || 0 == VG_(strncasecmp)(name, "/Library/", 9)
661       || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
662      return True;
663   } else {
664      return False;
665   }
666}
667
668
669Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
670{
671   struct symtab_command *symcmd = NULL;
672   struct dysymtab_command *dysymcmd = NULL;
673   HChar* dsymfilename = NULL;
674   Bool have_uuid = False;
675   UChar uuid[16];
676   ImageInfo ii;  /* main file */
677   ImageInfo iid; /* auxiliary .dSYM file */
678   Bool ok;
679
680   /* mmap the object file to look for di->soname and di->text_bias
681      and uuid and nlist and STABS */
682
683   if (VG_(clo_verbosity) > 1)
684      VG_(message)(Vg_DebugMsg,
685                   "%s (%#lx)\n", di->fsm.filename, di->fsm.rx_map_avma );
686
687   /* This should be ensured by our caller (that we're in the accept
688      state). */
689   vg_assert(di->fsm.have_rx_map);
690   vg_assert(di->fsm.have_rw_map);
691
692   VG_(memset)(&ii,   0, sizeof(ii));
693   VG_(memset)(&iid,  0, sizeof(iid));
694   VG_(memset)(&uuid, 0, sizeof(uuid));
695
696   ok = map_image_aboard( di, &ii, di->fsm.filename );
697   if (!ok) goto fail;
698
699   vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
700
701   /* Poke around in the Mach-O header, to find some important
702      stuff. */
703   // Find LC_SYMTAB and LC_DYSYMTAB, if present.
704   // Read di->soname from LC_ID_DYLIB if present,
705   //    or from LC_ID_DYLINKER if present,
706   //    or use "NONE".
707   // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
708   // Get uuid for later dsym search
709
710   di->text_bias = 0;
711
712   { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img;
713      struct load_command *cmd;
714      Int c;
715
716      for (c = 0, cmd = (struct load_command *)(mh+1);
717           c < mh->ncmds;
718           c++, cmd = (struct load_command *)(cmd->cmdsize
719                                              + (unsigned long)cmd)) {
720         if (cmd->cmd == LC_SYMTAB) {
721            symcmd = (struct symtab_command *)cmd;
722         }
723         else if (cmd->cmd == LC_DYSYMTAB) {
724            dysymcmd = (struct dysymtab_command *)cmd;
725         }
726         else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) {
727            // GrP fixme bundle?
728            struct dylib_command *dcmd = (struct dylib_command *)cmd;
729            UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd;
730            UChar *soname = VG_(strrchr)(dylibname, '/');
731            if (!soname) soname = dylibname;
732            else soname++;
733            di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
734                                           soname);
735         }
736         else if (cmd->cmd==LC_ID_DYLINKER  &&  mh->filetype==MH_DYLINKER) {
737            struct dylinker_command *dcmd = (struct dylinker_command *)cmd;
738            UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd;
739            UChar *soname = VG_(strrchr)(dylinkername, '/');
740            if (!soname) soname = dylinkername;
741            else soname++;
742            di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
743                                           soname);
744         }
745
746         // A comment from Julian about why varinfo[35] fail:
747         //
748         // My impression is, from comparing the output of otool -l for these
749         // executables with the logic in ML_(read_macho_debug_info),
750         // specifically the part that begins "else if (cmd->cmd ==
751         // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
752         // to work ok for text symbols.  In particular, it appears to assume
753         // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
754         // "struct SEGMENT_COMMAND" inside it is going to contain the info we
755         // need.  However, otool -l shows, and also the Apple docs state,
756         // that a struct load_command may contain an arbitrary number of
757         // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
758         // snarf the first.  But I'm not sure about this.
759         //
760         // The "Try for __DATA" block below simply adds acquisition of data
761         // svma/bias values using the same assumption.  It also needs
762         // (probably) to deal with bss sections, but I don't understand how
763         // this all ties together really, so it requires further study.
764         //
765         // If you can get your head around the relationship between MachO
766         // segments, sections and load commands, this might be relatively
767         // easy to fix properly.
768         //
769         // Basically we need to come up with plausible numbers for di->
770         // {text,data,bss}_{avma,svma}, from which the _bias numbers are
771         // then trivially derived.  Then I think the debuginfo reader should
772         // work pretty well.
773         else if (cmd->cmd == LC_SEGMENT_CMD) {
774            struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
775            /* Try for __TEXT */
776            if (!di->text_present
777                && 0 == VG_(strcmp)(seg->segname, "__TEXT")
778                /* DDD: is the  next line a kludge? -- JRS */
779                && seg->fileoff == 0 && seg->filesize != 0) {
780               di->text_present = True;
781               di->text_svma = (Addr)seg->vmaddr;
782               di->text_avma = di->fsm.rx_map_avma;
783               di->text_size = seg->vmsize;
784               di->text_bias = di->text_avma - di->text_svma;
785               /* Make the _debug_ values be the same as the
786                  svma/bias for the primary object, since there is
787                  no secondary (debuginfo) object, but nevertheless
788                  downstream biasing of Dwarf3 relies on the
789                  _debug_ values. */
790               di->text_debug_svma = di->text_svma;
791               di->text_debug_bias = di->text_bias;
792            }
793            /* Try for __DATA */
794            if (!di->data_present
795                && 0 == VG_(strcmp)(seg->segname, "__DATA")
796                /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) {
797               di->data_present = True;
798               di->data_svma = (Addr)seg->vmaddr;
799               di->data_avma = di->fsm.rw_map_avma;
800               di->data_size = seg->vmsize;
801               di->data_bias = di->data_avma - di->data_svma;
802               di->data_debug_svma = di->data_svma;
803               di->data_debug_bias = di->data_bias;
804            }
805         }
806         else if (cmd->cmd == LC_UUID) {
807             struct uuid_command *uuid_cmd = (struct uuid_command *)cmd;
808             VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid));
809             have_uuid = True;
810         }
811      }
812   }
813
814   if (!di->soname) {
815      di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
816   }
817
818   if (di->trace_symtab) {
819      VG_(printf)("\n");
820      VG_(printf)("SONAME = %s\n", di->soname);
821      VG_(printf)("\n");
822   }
823
824   /* Now we have the base object to hand.  Read symbols from it. */
825
826   if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) {
827
828      /* Read nlist symbol table */
829      struct NLIST *syms;
830      UChar *strs;
831      XArray* /* DiSym */ candSyms = NULL;
832      Word i, nCandSyms;
833
834      if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize
835          || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms
836                                                 * sizeof(struct NLIST)) {
837         ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
838         goto fail;
839      }
840      if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms
841          || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) {
842         ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
843         goto fail;
844      }
845
846      syms = (struct NLIST *)(ii.macho_img + symcmd->symoff);
847      strs = (UChar *)(ii.macho_img + symcmd->stroff);
848
849      if (VG_(clo_verbosity) > 1)
850         VG_(message)(Vg_DebugMsg,
851            "   reading syms   from primary file (%d %d)\n",
852            dysymcmd->nextdefsym, dysymcmd->nlocalsym );
853
854      /* Read candidate symbols into 'candSyms', so we can truncate
855         overlapping ends and generally tidy up, before presenting
856         them to ML_(addSym). */
857      candSyms = VG_(newXA)(
858                    ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
859                    ML_(dinfo_free), sizeof(DiSym)
860                 );
861      vg_assert(candSyms);
862
863      // extern symbols
864      read_symtab(candSyms,
865                  di,
866                  syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym,
867                  strs, symcmd->strsize);
868      // static and private_extern symbols
869      read_symtab(candSyms,
870                  di,
871                  syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym,
872                  strs, symcmd->strsize);
873
874      /* tidy up the cand syms -- trim overlapping ends.  May resize
875         candSyms. */
876      tidy_up_cand_syms( candSyms, di->trace_symtab );
877
878      /* and finally present them to ML_(addSym) */
879      nCandSyms = VG_(sizeXA)( candSyms );
880      for (i = 0; i < nCandSyms; i++) {
881         DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
882         vg_assert(cand->pri_name != NULL);
883         vg_assert(cand->sec_names == NULL);
884         if (di->trace_symtab)
885            VG_(printf)("nlist final: acquire  avma %010lx-%010lx  %s\n",
886                        cand->addr, cand->addr + cand->size - 1,
887                        cand->pri_name );
888         ML_(addSym)( di, cand );
889      }
890      VG_(deleteXA)( candSyms );
891   }
892
893   /* If there's no UUID in the primary, don't even bother to try and
894      read any DWARF, since we won't be able to verify it matches.
895      Our policy is not to load debug info unless we can verify that
896      it matches the primary.  Just declare success at this point.
897      And don't complain to the user, since that would cause us to
898      complain on objects compiled without -g.  (Some versions of
899      XCode are observed to omit a UUID entry for object linked(?)
900      without -g.  Others don't appear to omit it.) */
901   if (!have_uuid)
902      goto success;
903
904   /* mmap the dSYM file to look for DWARF debug info.  If successful,
905      use the .macho_img and .macho_img_szB in iid. */
906
907   dsymfilename = find_separate_debug_file( di->fsm.filename );
908
909   /* Try to load it. */
910   if (dsymfilename) {
911      Bool valid;
912
913      if (VG_(clo_verbosity) > 1)
914         VG_(message)(Vg_DebugMsg, "   dSYM= %s\n", dsymfilename);
915
916      ok = map_image_aboard( di, &iid, dsymfilename );
917      if (!ok) goto fail;
918
919      /* check it has the right uuid. */
920      vg_assert(have_uuid);
921      valid = iid.macho_img && iid.macho_img_szB > 0
922              && check_uuid_matches( (Addr)iid.macho_img,
923                                     iid.macho_img_szB, uuid );
924      if (valid)
925         goto read_the_dwarf;
926
927      if (VG_(clo_verbosity) > 1)
928         VG_(message)(Vg_DebugMsg, "   dSYM does not have "
929                                   "correct UUID (out of date?)\n");
930   }
931
932   /* There was no dsym file, or it doesn't match.  We'll have to try
933      regenerating it, unless --dsymutil=no, in which case just complain
934      instead. */
935
936   /* If this looks like a lib that we shouldn't run dsymutil on, just
937      give up.  (possible reasons: is system lib, or in /usr etc, or
938      the dsym dir would not be writable by the user, or we're running
939      as root) */
940   vg_assert(di->fsm.filename);
941   if (is_systemish_library_name(di->fsm.filename))
942      goto success;
943
944   if (!VG_(clo_dsymutil)) {
945      if (VG_(clo_verbosity) == 1) {
946         VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
947      }
948      if (VG_(clo_verbosity) > 0)
949         VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
950                      "--dsymutil=yes\n",
951                      VG_(clo_verbosity) > 1 ? "   " : "",
952                      dsymfilename ? "has wrong UUID" : "is missing");
953      goto success;
954   }
955
956   /* Run dsymutil */
957
958   { Int r;
959     HChar* dsymutil = "/usr/bin/dsymutil ";
960     HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
961                                     VG_(strlen)(dsymutil)
962                                     + VG_(strlen)(di->fsm.filename)
963                                     + 32 /* misc */ );
964     VG_(strcpy)(cmd, dsymutil);
965     if (0) VG_(strcat)(cmd, "--verbose ");
966     VG_(strcat)(cmd, "\"");
967     VG_(strcat)(cmd, di->fsm.filename);
968     VG_(strcat)(cmd, "\"");
969     VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
970     r = VG_(system)( cmd );
971     if (r)
972        VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
973     ML_(dinfo_free)(cmd);
974     dsymfilename = find_separate_debug_file(di->fsm.filename);
975   }
976
977   /* Try again to load it. */
978   if (dsymfilename) {
979      Bool valid;
980
981      if (VG_(clo_verbosity) > 1)
982         VG_(message)(Vg_DebugMsg, "   dsyms= %s\n", dsymfilename);
983
984      ok = map_image_aboard( di, &iid, dsymfilename );
985      if (!ok) goto fail;
986
987      /* check it has the right uuid. */
988      vg_assert(have_uuid);
989      valid = iid.macho_img && iid.macho_img_szB > 0
990              && check_uuid_matches( (Addr)iid.macho_img,
991                                     iid.macho_img_szB, uuid );
992      if (!valid) {
993         if (VG_(clo_verbosity) > 0) {
994            VG_(message)(Vg_DebugMsg,
995               "WARNING: did not find expected UUID %02X%02X%02X%02X"
996               "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
997               " in dSYM dir\n",
998               (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
999               (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1000               (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1001               (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1002               (UInt)uuid[14], (UInt)uuid[15] );
1003            VG_(message)(Vg_DebugMsg,
1004                         "WARNING: for %s\n", di->fsm.filename);
1005         }
1006         unmap_image( &iid );
1007         /* unmap_image zeroes the fields, so the following test makes
1008            sense. */
1009         goto fail;
1010      }
1011   }
1012
1013   /* Right.  Finally we have our best try at the dwarf image, so go
1014      on to reading stuff out of it. */
1015
1016  read_the_dwarf:
1017   if (iid.macho_img && iid.macho_img_szB > 0) {
1018      UChar* debug_info_img = NULL;
1019      Word   debug_info_sz;
1020      UChar* debug_abbv_img;
1021      Word   debug_abbv_sz;
1022      UChar* debug_line_img;
1023      Word   debug_line_sz;
1024      UChar* debug_str_img;
1025      Word   debug_str_sz;
1026      UChar* debug_ranges_img;
1027      Word   debug_ranges_sz;
1028      UChar* debug_loc_img;
1029      Word   debug_loc_sz;
1030      UChar* debug_name_img;
1031      Word   debug_name_sz;
1032
1033      debug_info_img =
1034          getsectdata(iid.macho_img, iid.macho_img_szB,
1035                      "__DWARF", "__debug_info", &debug_info_sz);
1036      debug_abbv_img =
1037          getsectdata(iid.macho_img, iid.macho_img_szB,
1038                      "__DWARF", "__debug_abbrev", &debug_abbv_sz);
1039      debug_line_img =
1040          getsectdata(iid.macho_img, iid.macho_img_szB,
1041                      "__DWARF", "__debug_line", &debug_line_sz);
1042      debug_str_img =
1043          getsectdata(iid.macho_img, iid.macho_img_szB,
1044                      "__DWARF", "__debug_str", &debug_str_sz);
1045      debug_ranges_img =
1046          getsectdata(iid.macho_img, iid.macho_img_szB,
1047                      "__DWARF", "__debug_ranges", &debug_ranges_sz);
1048      debug_loc_img =
1049          getsectdata(iid.macho_img, iid.macho_img_szB,
1050                      "__DWARF", "__debug_loc", &debug_loc_sz);
1051      debug_name_img =
1052          getsectdata(iid.macho_img, iid.macho_img_szB,
1053                      "__DWARF", "__debug_pubnames", &debug_name_sz);
1054
1055      if (debug_info_img) {
1056         if (VG_(clo_verbosity) > 1) {
1057            if (0)
1058            VG_(message)(Vg_DebugMsg,
1059                         "Reading dwarf3 for %s (%#lx) from %s"
1060                         " (%ld %ld %ld %ld %ld %ld)\n",
1061                         di->fsm.filename, di->text_avma, dsymfilename,
1062                         debug_info_sz, debug_abbv_sz, debug_line_sz,
1063                         debug_str_sz, debug_ranges_sz, debug_loc_sz
1064                         );
1065            VG_(message)(Vg_DebugMsg,
1066               "   reading dwarf3 from dsyms file\n");
1067         }
1068         /* The old reader: line numbers and unwind info only */
1069         ML_(read_debuginfo_dwarf3) ( di,
1070                                      debug_info_img, debug_info_sz,
1071				      NULL,           0,
1072                                      debug_abbv_img, debug_abbv_sz,
1073                                      debug_line_img, debug_line_sz,
1074                                      debug_str_img,  debug_str_sz );
1075
1076         /* The new reader: read the DIEs in .debug_info to acquire
1077            information on variable types and locations.  But only if
1078            the tool asks for it, or the user requests it on the
1079            command line. */
1080         if (VG_(needs).var_info /* the tool requires it */
1081             || VG_(clo_read_var_info) /* the user asked for it */) {
1082            ML_(new_dwarf3_reader)(
1083               di, debug_info_img,   debug_info_sz,
1084	           NULL,             0,
1085                   debug_abbv_img,   debug_abbv_sz,
1086                   debug_line_img,   debug_line_sz,
1087                   debug_str_img,    debug_str_sz,
1088                   debug_ranges_img, debug_ranges_sz,
1089                   debug_loc_img,    debug_loc_sz
1090            );
1091         }
1092      }
1093   }
1094
1095   if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1096
1097  success:
1098   if (ii.img)
1099      unmap_image(&ii);
1100   if (iid.img)
1101      unmap_image(&iid);
1102   return True;
1103
1104   /* NOTREACHED */
1105
1106  fail:
1107   ML_(symerr)(di, True, "Error reading Mach-O object.");
1108   if (ii.img)
1109      unmap_image(&ii);
1110   if (iid.img)
1111      unmap_image(&iid);
1112   return False;
1113}
1114
1115#endif // defined(VGO_darwin)
1116
1117/*--------------------------------------------------------------------*/
1118/*--- end                                                          ---*/
1119/*--------------------------------------------------------------------*/
1120