1
2/* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3   GPL 2+ therefore.
4
5   Can be compiled as either a 32- or 64-bit program (doesn't matter).
6*/
7
8/* What does this program do?  In short it postprocesses tool
9   executables on MacOSX, after linking using /usr/bin/ld.  This is so
10   as to work around a bug in the linker on Xcode 4.0.0 and Xcode
11   4.0.1.  Xcode versions prior to 4.0.0 are unaffected.
12
13   The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
14
15   The bug causes 64-bit tool executables to segfault at startup,
16   because:
17
18   Comparing the MachO load commands vs a (working) tool executable
19   that was created by Xcode 3.2.x, it appears that the new linker has
20   partially ignored the build system's request to place the tool
21   executable's stack at a non standard location.  The build system
22   tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
23
24   With the Xcode 3.2 linker those flags produce two results:
25
26   (1) A load command to allocate the stack at the said location:
27          Load command 3
28                cmd LC_SEGMENT_64
29            cmdsize 72
30            segname __UNIXSTACK
31             vmaddr 0x0000000133800000
32             vmsize 0x0000000000800000
33            fileoff 2285568
34           filesize 0
35            maxprot 0x00000007
36           initprot 0x00000003
37             nsects 0
38              flags 0x0
39
40   (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
41       at process startup, 0x134000000.
42
43   With Xcode 4.0.1, (1) is missing but (2) is still present.  The
44   tool executable therefore starts up with %rsp pointing to unmapped
45   memory and faults almost instantly.
46
47   The workaround implemented by this program is documented in comment
48   8 of bug 267997, viz:
49
50   One really sick workaround is to observe that the executables
51   contain a redundant MachO load command:
52
53      Load command 2
54            cmd LC_SEGMENT_64
55        cmdsize 72
56        segname __LINKEDIT
57         vmaddr 0x0000000138dea000
58         vmsize 0x00000000000ad000
59        fileoff 2658304
60       filesize 705632
61        maxprot 0x00000007
62       initprot 0x00000001
63         nsects 0
64          flags 0x0
65
66   The described section presumably contains information intended for
67   the dynamic linker, but is irrelevant because this is a statically
68   linked executable.  Hence it might be possible to postprocess the
69   executables after linking, to overwrite this entry with the
70   information that would have been in the missing __UNIXSTACK entry.
71   I tried this by hand (with a binary editor) earlier and got
72   something that worked.
73*/
74
75#define DEBUGPRINTING 0
76
77#include <assert.h>
78#include <stdlib.h>
79#include <stdio.h>
80#include <string.h>
81#include <sys/mman.h>
82#include <sys/stat.h>
83#include <unistd.h>
84#include <fcntl.h>
85
86
87#undef PLAT_x86_darwin
88#undef PLAT_amd64_darwin
89
90#if defined(__APPLE__) && defined(__i386__)
91#  define PLAT_x86_darwin 1
92#elif defined(__APPLE__) && defined(__x86_64__)
93#  define PLAT_amd64_darwin 1
94#else
95#  error "Can't be compiled on this platform"
96#endif
97
98#include <mach-o/loader.h>
99#include <mach-o/nlist.h>
100#include <mach-o/fat.h>
101#include <mach/i386/thread_status.h>
102
103
104typedef  unsigned char   UChar;
105typedef    signed char   Char;
106typedef           char   HChar; /* signfulness depends on host */
107
108typedef  unsigned int    UInt;
109typedef    signed int    Int;
110
111typedef  unsigned char   Bool;
112#define  True   ((Bool)1)
113#define  False  ((Bool)0)
114
115typedef  unsigned long   UWord;
116
117typedef  UWord           SizeT;
118typedef  UWord           Addr;
119
120typedef  unsigned long long int   ULong;
121typedef    signed long long int   Long;
122
123
124
125__attribute__((noreturn))
126void fail ( HChar* msg )
127{
128   fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
129   exit(1);
130}
131
132
133/*------------------------------------------------------------*/
134/*---                                                      ---*/
135/*--- Mach-O file mapping/unmapping helpers                ---*/
136/*---                                                      ---*/
137/*------------------------------------------------------------*/
138
139typedef
140   struct {
141      /* These two describe the entire mapped-in ("primary") image,
142         fat headers, kitchen sink, whatnot: the entire file.  The
143         image is mapped into img[0 .. img_szB-1]. */
144      UChar* img;
145      SizeT  img_szB;
146      /* These two describe the Mach-O object of interest, which is
147         presumably somewhere inside the primary image.
148         map_image_aboard() below, which generates this info, will
149         carefully check that the macho_ fields denote a section of
150         memory that falls entirely inside img[0 .. img_szB-1]. */
151      UChar* macho_img;
152      SizeT  macho_img_szB;
153   }
154   ImageInfo;
155
156
157Bool is_macho_object_file( const void* buf, SizeT szB )
158{
159   /* (JRS: the Mach-O headers might not be in this mapped data,
160      because we only mapped a page for this initial check,
161      or at least not very much, and what's at the start of the file
162      is in general a so-called fat header.  The Mach-O object we're
163      interested in could be arbitrarily far along the image, and so
164      we can't assume its header will fall within this page.) */
165
166   /* But we can say that either it's a fat object, in which case it
167      begins with a fat header, or it's unadorned Mach-O, in which
168      case it starts with a normal header.  At least do what checks we
169      can to establish whether or not we're looking at something
170      sane. */
171
172   const struct fat_header*  fh_be = buf;
173   const struct mach_header_64* mh    = buf;
174
175   assert(buf);
176   if (szB < sizeof(struct fat_header))
177      return False;
178   if (ntohl(fh_be->magic) == FAT_MAGIC)
179      return True;
180
181   if (szB < sizeof(struct mach_header_64))
182      return False;
183   if (mh->magic == MH_MAGIC_64)
184      return True;
185
186   return False;
187}
188
189
190/* Unmap an image mapped in by map_image_aboard. */
191static void unmap_image ( /*MOD*/ImageInfo* ii )
192{
193   Int r;
194   assert(ii->img);
195   assert(ii->img_szB > 0);
196   r = munmap( ii->img, ii->img_szB );
197   /* Do we care if this fails?  I suppose so; it would indicate
198      some fairly serious snafu with the mapping of the file. */
199   assert( !r );
200   memset(ii, 0, sizeof(*ii));
201}
202
203
204/* Map a given fat or thin object aboard, find the thin part if
205   necessary, do some checks, and write details of both the fat and
206   thin parts into *ii.  Returns 32 (and leaves the file unmapped) if
207   the thin part is a 32 bit file.  Returns 64 if it's a 64 bit file.
208   Does not return on failure.  Guarantees to return pointers to a
209   valid(ish) Mach-O image if it succeeds. */
210static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
211{
212   memset(ii, 0, sizeof(*ii));
213
214   /* First off, try to map the thing in. */
215   { SizeT  size;
216     Int r, fd;
217     struct stat stat_buf;
218
219     r = stat(filename, &stat_buf);
220     if (r)
221        fail("Can't stat image (to determine its size)?!");
222     size = stat_buf.st_size;
223
224     fd = open(filename, O_RDWR, 0);
225     if (fd == -1)
226        fail("Can't open image for possible modification!");
227     if (DEBUGPRINTING)
228        printf("size %lu fd %d\n", size, fd);
229     void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
230                                  MAP_FILE|MAP_SHARED, fd, 0 );
231     if (v == MAP_FAILED) {
232        perror("mmap failed");
233        fail("Can't mmap image for possible modification!");
234     }
235
236     close(fd);
237
238     ii->img     = (UChar*)v;
239     ii->img_szB = size;
240   }
241
242   /* Now it's mapped in and we have .img and .img_szB set.  Look for
243      the embedded Mach-O object.  If not findable, unmap and fail. */
244   { struct fat_header*  fh_be;
245     struct fat_header   fh;
246     struct mach_header_64* mh;
247
248     // Assume initially that we have a thin image, and update
249     // these if it turns out to be fat.
250     ii->macho_img     = ii->img;
251     ii->macho_img_szB = ii->img_szB;
252
253     // Check for fat header.
254     if (ii->img_szB < sizeof(struct fat_header))
255        fail("Invalid Mach-O file (0 too small).");
256
257     // Fat header is always BIG-ENDIAN
258     fh_be = (struct fat_header *)ii->img;
259     fh.magic = ntohl(fh_be->magic);
260     fh.nfat_arch = ntohl(fh_be->nfat_arch);
261     if (fh.magic == FAT_MAGIC) {
262        // Look for a good architecture.
263        struct fat_arch *arch_be;
264        struct fat_arch arch;
265        Int f;
266        if (ii->img_szB < sizeof(struct fat_header)
267                          + fh.nfat_arch * sizeof(struct fat_arch))
268           fail("Invalid Mach-O file (1 too small).");
269
270        for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
271             f < fh.nfat_arch;
272             f++, arch_be++) {
273           Int cputype;
274#          if defined(PLAT_x86_darwin)
275           cputype = CPU_TYPE_X86;
276#          elif defined(PLAT_amd64_darwin)
277           cputype = CPU_TYPE_X86_64;
278#          else
279#            error "unknown architecture"
280#          endif
281           arch.cputype    = ntohl(arch_be->cputype);
282           arch.cpusubtype = ntohl(arch_be->cpusubtype);
283           arch.offset     = ntohl(arch_be->offset);
284           arch.size       = ntohl(arch_be->size);
285           if (arch.cputype == cputype) {
286              if (ii->img_szB < arch.offset + arch.size)
287                 fail("Invalid Mach-O file (2 too small).");
288              ii->macho_img     = ii->img + arch.offset;
289              ii->macho_img_szB = arch.size;
290              break;
291           }
292        }
293        if (f == fh.nfat_arch)
294           fail("No acceptable architecture found in fat file.");
295     }
296
297     /* Sanity check what we found. */
298
299     /* assured by logic above */
300     assert(ii->img_szB >= sizeof(struct fat_header));
301
302     if (ii->macho_img_szB < sizeof(struct mach_header_64))
303        fail("Invalid Mach-O file (3 too small).");
304
305     if (ii->macho_img_szB > ii->img_szB)
306        fail("Invalid Mach-O file (thin bigger than fat).");
307
308     if (ii->macho_img >= ii->img
309         && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
310        /* thin entirely within fat, as expected */
311     } else {
312        fail("Invalid Mach-O file (thin not inside fat).");
313     }
314
315     mh = (struct mach_header_64 *)ii->macho_img;
316     if (mh->magic == MH_MAGIC) {
317        assert(ii->img);
318        assert(ii->macho_img);
319        assert(ii->img_szB > 0);
320        assert(ii->macho_img_szB > 0);
321        assert(ii->macho_img >= ii->img);
322        assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
323        return 32;
324     }
325     if (mh->magic != MH_MAGIC_64)
326        fail("Invalid Mach-O file (bad magic).");
327
328     if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
329        fail("Invalid Mach-O file (4 too small).");
330   }
331
332   assert(ii->img);
333   assert(ii->macho_img);
334   assert(ii->img_szB > 0);
335   assert(ii->macho_img_szB > 0);
336   assert(ii->macho_img >= ii->img);
337   assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
338   return 64;
339}
340
341
342/*------------------------------------------------------------*/
343/*---                                                      ---*/
344/*--- Mach-O top-level processing                          ---*/
345/*---                                                      ---*/
346/*------------------------------------------------------------*/
347
348void modify_macho_loadcmds ( HChar* filename,
349                             ULong  expected_stack_start,
350                             ULong  expected_stack_size )
351{
352   ImageInfo ii;
353   memset(&ii, 0, sizeof(ii));
354
355   Int size = map_image_aboard( &ii, filename );
356   if (size == 32) {
357      fprintf(stderr, "fixup_macho_loadcmds:   Is 32-bit MachO file;"
358              " no modifications needed.\n");
359      goto out;
360   }
361
362   assert(size == 64);
363
364   assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
365
366   /* Poke around in the Mach-O header, to find some important
367      stuff.
368      * the location of the __UNIXSTACK load command, if any
369      * the location of the __LINKEDIT load command, if any
370      * the initial RSP value as stated in the LC_UNIXTHREAD
371   */
372
373   /* The collected data */
374   ULong init_rsp = 0;
375   Bool  have_rsp = False;
376   struct segment_command_64* seg__unixstack = NULL;
377   struct segment_command_64* seg__linkedit  = NULL;
378
379   /* Loop over the load commands and fill in the above 4 variables. */
380
381   { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
382      struct load_command *cmd;
383      Int c;
384
385      for (c = 0, cmd = (struct load_command *)(mh+1);
386           c < mh->ncmds;
387           c++, cmd = (struct load_command *)(cmd->cmdsize
388                                              + (unsigned long)cmd)) {
389         if (DEBUGPRINTING)
390            printf("load cmd: offset %4lu   size %3d   kind %2d = ",
391                   (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
392                   cmd->cmdsize, cmd->cmd);
393
394         switch (cmd->cmd) {
395            case LC_SEGMENT_64:
396               if (DEBUGPRINTING)
397                  printf("LC_SEGMENT_64");
398               break;
399            case LC_SYMTAB:
400               if (DEBUGPRINTING)
401                  printf("LC_SYMTAB");
402               break;
403            case LC_DYSYMTAB:
404               if (DEBUGPRINTING)
405                  printf("LC_DYSYMTAB");
406               break;
407            case LC_UUID:
408               if (DEBUGPRINTING)
409                  printf("LC_UUID");
410               break;
411            case LC_UNIXTHREAD:
412               if (DEBUGPRINTING)
413                  printf("LC_UNIXTHREAD");
414               break;
415            default:
416                  printf("???");
417               fail("unexpected load command in Mach header");
418            break;
419         }
420         if (DEBUGPRINTING)
421            printf("\n");
422
423         /* Note what the stated initial RSP value is, so we can
424            check it is as expected. */
425         if (cmd->cmd == LC_UNIXTHREAD) {
426            struct thread_command* tcmd = (struct thread_command*)cmd;
427            UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
428            if (DEBUGPRINTING)
429               printf("UnixThread: flavor %u = ", w32s[0]);
430            if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
431               if (DEBUGPRINTING)
432                  printf("x86_THREAD_STATE64\n");
433               x86_thread_state64_t* state64
434                  = (x86_thread_state64_t*)(&w32s[2]);
435               have_rsp = True;
436               init_rsp = state64->__rsp;
437               if (DEBUGPRINTING)
438                  printf("rsp = 0x%llx\n", init_rsp);
439            } else {
440               if (DEBUGPRINTING)
441                  printf("???");
442            }
443            if (DEBUGPRINTING)
444               printf("\n");
445         }
446
447         if (cmd->cmd == LC_SEGMENT_64) {
448            struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
449            if (0 == strcmp(seg->segname, "__LINKEDIT"))
450               seg__linkedit = seg;
451            if (0 == strcmp(seg->segname, "__UNIXSTACK"))
452               seg__unixstack = seg;
453         }
454
455      }
456   }
457
458   /*
459      Actions are then as follows:
460
461      * (always) check the RSP value is as expected, and abort if not
462
463      * if there's a UNIXSTACK load command, check it is as expected.
464        If not abort, if yes, do nothing more.
465
466      * (so there's no UNIXSTACK load command).  if there's a LINKEDIT
467        load command, check if it is minimally usable (has 0 for
468        nsects and flags).  If yes, convert it to a UNIXSTACK load
469        command.  If there is none, or is unusable, then we're out of
470        options and have to abort.
471   */
472   if (!have_rsp)
473      fail("Can't find / check initial RSP setting");
474   if (init_rsp != expected_stack_start + expected_stack_size)
475      fail("Initial RSP value not as expected");
476
477   fprintf(stderr, "fixup_macho_loadcmds:   "
478                   "initial RSP is as expected (0x%llx)\n",
479                   expected_stack_start + expected_stack_size );
480
481   if (seg__unixstack) {
482      struct segment_command_64 *seg = seg__unixstack;
483      if (seg->vmaddr != expected_stack_start)
484         fail("has __UNIXSTACK, but wrong ::vmaddr");
485      if (seg->vmsize != expected_stack_size)
486         fail("has __UNIXSTACK, but wrong ::vmsize");
487      if (seg->maxprot != 7)
488         fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
489      if (seg->initprot != 3)
490         fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
491      if (seg->nsects != 0)
492         fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
493      if (seg->flags != 0)
494         fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
495      /* looks ok */
496      fprintf(stderr, "fixup_macho_loadcmds:   "
497              "acceptable __UNIXSTACK present; no modifications.\n" );
498      goto out;
499   }
500
501   if (seg__linkedit) {
502      struct segment_command_64 *seg = seg__linkedit;
503      if (seg->nsects != 0)
504         fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
505      if (seg->flags != 0)
506         fail("has __LINKEDIT, but wrong ::flags (should be 0)");
507      fprintf(stderr, "fixup_macho_loadcmds:   "
508              "no __UNIXSTACK present.\n" );
509      fprintf(stderr, "fixup_macho_loadcmds:   "
510              "converting __LINKEDIT to __UNIXSTACK.\n" );
511      strcpy(seg->segname, "__UNIXSTACK");
512      seg->vmaddr   = expected_stack_start;
513      seg->vmsize   = expected_stack_size;
514      seg->fileoff  = 0;
515      seg->filesize = 0;
516      seg->maxprot  = 7;
517      seg->initprot = 3;
518      /* success */
519      goto out;
520   }
521
522   /* out of options */
523   fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
524        "out of options.");
525   /* NOTREACHED */
526
527  out:
528   if (ii.img)
529      unmap_image(&ii);
530}
531
532
533static Bool is_plausible_tool_exe_name ( HChar* nm )
534{
535   HChar* p;
536   if (!nm)
537      return False;
538
539   // Does it end with this string?
540   p = strstr(nm, "-x86-darwin");
541   if (p && 0 == strcmp(p, "-x86-darwin"))
542      return True;
543
544   p = strstr(nm, "-amd64-darwin");
545   if (p && 0 == strcmp(p, "-amd64-darwin"))
546      return True;
547
548   return False;
549}
550
551
552int main ( int argc, char** argv )
553{
554   Int   r;
555   ULong req_stack_addr = 0;
556   ULong req_stack_size = 0;
557
558   if (argc != 4)
559      fail("args: -stack_addr-arg -stack_size-arg "
560           "name-of-tool-executable-to-modify");
561
562   r= sscanf(argv[1], "0x%llx", &req_stack_addr);
563   if (r != 1) fail("invalid stack_addr arg");
564
565   r= sscanf(argv[2], "0x%llx", &req_stack_size);
566   if (r != 1) fail("invalid stack_size arg");
567
568   fprintf(stderr, "fixup_macho_loadcmds: "
569           "requested stack_addr (top) 0x%llx, "
570           "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
571
572   if (!is_plausible_tool_exe_name(argv[3]))
573      fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
574
575   fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
576           argv[3] );
577   modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
578                          req_stack_size );
579
580   return 0;
581}
582
583/*
584      cmd LC_SEGMENT_64
585  cmdsize 72
586  segname __LINKEDIT
587   vmaddr 0x0000000138dea000
588   vmsize 0x00000000000ad000
589  fileoff 2658304
590 filesize 705632
591  maxprot 0x00000007
592 initprot 0x00000001
593   nsects 0
594    flags 0x0
595*/
596
597/*
598      cmd LC_SEGMENT_64
599  cmdsize 72
600  segname __UNIXSTACK
601   vmaddr 0x0000000133800000
602   vmsize 0x0000000000800000
603  fileoff 2498560
604 filesize 0
605  maxprot 0x00000007
606 initprot 0x00000003
607   nsects 0
608    flags 0x0
609*/
610