1
2/*--------------------------------------------------------------------*/
3/*--- Darwin-specific syscalls, etc.          syswrap-x86-darwin.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2005-2017 Apple Inc.
11      Greg Parker  gparker@apple.com
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_x86_darwin)
32
33#include "pub_core_basics.h"
34#include "pub_core_vki.h"
35#include "pub_core_threadstate.h"
36#include "pub_core_aspacemgr.h"
37#include "pub_core_xarray.h"
38#include "pub_core_clientstate.h"
39#include "pub_core_debuglog.h"
40#include "pub_core_debuginfo.h"    // VG_(di_notify_*)
41#include "pub_core_transtab.h"     // VG_(discard_translations)
42#include "pub_core_libcbase.h"
43#include "pub_core_libcassert.h"
44#include "pub_core_libcfile.h"
45#include "pub_core_libcprint.h"
46#include "pub_core_libcproc.h"
47#include "pub_core_libcsignal.h"
48#include "pub_core_mallocfree.h"
49#include "pub_core_options.h"
50#include "pub_core_scheduler.h"
51#include "pub_core_signals.h"
52#include "pub_core_syscall.h"
53#include "pub_core_syswrap.h"
54#include "pub_core_tooliface.h"
55
56#include "priv_types_n_macros.h"
57#include "priv_syswrap-generic.h"   /* for decls of generic wrappers */
58#include "priv_syswrap-darwin.h"    /* for decls of darwin-ish wrappers */
59#include "priv_syswrap-main.h"
60
61
62#include <mach/mach.h>
63
64static void x86_thread_state32_from_vex(i386_thread_state_t *mach,
65                                        VexGuestX86State *vex)
66{
67    mach->__eax = vex->guest_EAX;
68    mach->__ebx = vex->guest_EBX;
69    mach->__ecx = vex->guest_ECX;
70    mach->__edx = vex->guest_EDX;
71    mach->__edi = vex->guest_EDI;
72    mach->__esi = vex->guest_ESI;
73    mach->__ebp = vex->guest_EBP;
74    mach->__esp = vex->guest_ESP;
75    mach->__ss = vex->guest_SS;
76    mach->__eflags = LibVEX_GuestX86_get_eflags(vex);
77    mach->__eip = vex->guest_EIP;
78    mach->__cs = vex->guest_CS;
79    mach->__ds = vex->guest_DS;
80    mach->__es = vex->guest_ES;
81    mach->__fs = vex->guest_FS;
82    mach->__gs = vex->guest_GS;
83}
84
85
86static void x86_float_state32_from_vex(i386_float_state_t *mach,
87                                       VexGuestX86State *vex)
88{
89   // DDD: #warning GrP fixme fp state
90
91   VG_(memcpy)(&mach->__fpu_xmm0, &vex->guest_XMM0, 8 * sizeof(mach->__fpu_xmm0));
92}
93
94
95void thread_state_from_vex(thread_state_t mach_generic,
96                           thread_state_flavor_t flavor,
97                           mach_msg_type_number_t count,
98                           VexGuestArchState *vex_generic)
99{
100   VexGuestX86State *vex = (VexGuestX86State *)vex_generic;
101
102   switch (flavor) {
103   case i386_THREAD_STATE:
104      vg_assert(count == i386_THREAD_STATE_COUNT);
105      x86_thread_state32_from_vex((i386_thread_state_t *)mach_generic, vex);
106      break;
107
108   case i386_FLOAT_STATE:
109      vg_assert(count == i386_FLOAT_STATE_COUNT);
110      x86_float_state32_from_vex((i386_float_state_t *)mach_generic, vex);
111      break;
112
113   default:
114      vg_assert(0);
115   }
116}
117
118
119static void x86_thread_state32_to_vex(const i386_thread_state_t *mach,
120                                      VexGuestX86State *vex)
121{
122   LibVEX_GuestX86_initialise(vex);
123   vex->guest_EAX = mach->__eax;
124   vex->guest_EBX = mach->__ebx;
125   vex->guest_ECX = mach->__ecx;
126   vex->guest_EDX = mach->__edx;
127   vex->guest_EDI = mach->__edi;
128   vex->guest_ESI = mach->__esi;
129   vex->guest_EBP = mach->__ebp;
130   vex->guest_ESP = mach->__esp;
131   vex->guest_SS = mach->__ss;
132   // DDD: #warning GrP fixme eflags
133   vex->guest_EIP = mach->__eip;
134   vex->guest_CS = mach->__cs;
135   vex->guest_DS = mach->__ds;
136   vex->guest_ES = mach->__es;
137   vex->guest_FS = mach->__fs;
138   vex->guest_GS = mach->__gs;
139}
140
141static void x86_float_state32_to_vex(const i386_float_state_t *mach,
142                                     VexGuestX86State *vex)
143{
144   // DDD: #warning GrP fixme fp state
145
146   VG_(memcpy)(&vex->guest_XMM0, &mach->__fpu_xmm0, 8 * sizeof(mach->__fpu_xmm0));
147}
148
149
150void thread_state_to_vex(const thread_state_t mach_generic,
151                         thread_state_flavor_t flavor,
152                         mach_msg_type_number_t count,
153                         VexGuestArchState *vex_generic)
154{
155   VexGuestX86State *vex = (VexGuestX86State *)vex_generic;
156
157   switch(flavor) {
158   case i386_THREAD_STATE:
159      vg_assert(count == i386_THREAD_STATE_COUNT);
160      x86_thread_state32_to_vex((const i386_thread_state_t*)mach_generic,vex);
161      break;
162   case i386_FLOAT_STATE:
163      vg_assert(count == i386_FLOAT_STATE_COUNT);
164      x86_float_state32_to_vex((const i386_float_state_t*)mach_generic,vex);
165      break;
166
167   default:
168      vg_assert(0);
169      break;
170   }
171}
172
173
174ThreadState *build_thread(const thread_state_t state,
175                          thread_state_flavor_t flavor,
176                          mach_msg_type_number_t count)
177{
178   ThreadId tid = VG_(alloc_ThreadState)();
179   ThreadState *tst = VG_(get_ThreadState)(tid);
180
181   vg_assert(flavor == i386_THREAD_STATE);
182   vg_assert(count == i386_THREAD_STATE_COUNT);
183
184   // Initialize machine registers
185
186   thread_state_to_vex(state, flavor, count, &tst->arch.vex);
187
188   I_die_here;
189   // GrP fixme signals, sig_mask, tmp_sig_mask, os_state.parent
190
191   find_stack_segment(tid, tst->arch.vex.guest_ESP);
192
193   return tst;
194}
195
196
197// Edit the thread state to send to the real kernel.
198// The real thread will run start_thread_NORETURN(tst)
199// on a separate non-client stack.
200void hijack_thread_state(thread_state_t mach_generic,
201                         thread_state_flavor_t flavor,
202                         mach_msg_type_number_t count,
203                         ThreadState *tst)
204{
205   i386_thread_state_t *mach = (i386_thread_state_t *)mach_generic;
206   char *stack;
207
208   vg_assert(flavor == i386_THREAD_STATE);
209   vg_assert(count == i386_THREAD_STATE_COUNT);
210
211   stack = (char *)allocstack(tst->tid);
212   stack -= 64+320;                       // make room for top frame
213   memset(stack, 0, 64+320);              // ...and clear it
214   *(uintptr_t *)stack = (uintptr_t)tst;  // set parameter
215   stack -= sizeof(uintptr_t);
216   *(uintptr_t *)stack = 0;               // push fake return address
217
218   mach->__eip = (uintptr_t)&start_thread_NORETURN;
219   mach->__esp = (uintptr_t)stack;
220}
221
222
223/* Call f(arg1), but first switch stacks, using 'stack' as the new
224   stack, and use 'retaddr' as f's return-to address.  Also, clear all
225   the integer registers before entering f.*/
226__attribute__((noreturn))
227void call_on_new_stack_0_1 ( Addr stack,
228			     Addr retaddr,
229			     void (*f)(Word),
230                             Word arg1 );
231//  4(%esp) == stack (must be 16-byte aligned)
232//  8(%esp) == retaddr
233// 12(%esp) == f
234// 16(%esp) == arg1
235asm(
236".globl _call_on_new_stack_0_1\n"
237"_call_on_new_stack_0_1:\n"
238"   movl %esp, %esi\n"     // remember old stack pointer
239"   movl 4(%esi), %esp\n"  // set new stack
240"   pushl $0\n"            // align stack
241"   pushl $0\n"            // align stack
242"   pushl $0\n"            // align stack
243"   pushl 16(%esi)\n"      // arg1 to stack
244"   pushl  8(%esi)\n"      // retaddr to stack
245"   pushl 12(%esi)\n"      // f to stack
246"   movl $0, %eax\n"       // zero all GP regs
247"   movl $0, %ebx\n"
248"   movl $0, %ecx\n"
249"   movl $0, %edx\n"
250"   movl $0, %esi\n"
251"   movl $0, %edi\n"
252"   movl $0, %ebp\n"
253"   ret\n"                 // jump to f
254"   ud2\n"                 // should never get here
255);
256
257
258asm(
259".globl _pthread_hijack_asm\n"
260"_pthread_hijack_asm:\n"
261"   movl %esp,%ebp\n"
262"   push $0\n"    // alignment pad
263"   push %ebp\n"  // original sp
264"   push %esi\n"  // flags
265"   push %edi\n"  // stacksize
266"   push %edx\n"  // func_arg
267"   push %ecx\n"  // func
268"   push %ebx\n"  // kport
269"   push %eax\n"  // self
270"   push $0\n"    // fake return address
271"   jmp _pthread_hijack\n"
272    );
273
274
275
276void pthread_hijack(Addr self, Addr kport, Addr func, Addr func_arg,
277                    Addr stacksize, Addr flags, Addr sp)
278{
279   vki_sigset_t blockall;
280   ThreadState *tst = (ThreadState *)func_arg;
281   VexGuestX86State *vex = &tst->arch.vex;
282
283   // VG_(printf)("pthread_hijack pthread %p, machthread %p, func %p, arg %p, stack %p, flags %p, stack %p\n", self, kport, func, func_arg, stacksize, flags, sp);
284
285   // Wait for parent thread's permission.
286   // The parent thread holds V's lock on our behalf.
287   semaphore_wait(tst->os_state.child_go);
288
289   /* Start the thread with all signals blocked.  VG_(scheduler) will
290      set the mask correctly when we finally get there. */
291   VG_(sigfillset)(&blockall);
292   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);
293
294   // Set thread's registers
295   // Do this FIRST because some code below tries to collect a backtrace,
296   // which requires valid register data.
297   // DDD: need to do post_reg_write events here?
298   LibVEX_GuestX86_initialise(vex);
299   vex->guest_EIP = pthread_starter;
300   vex->guest_EAX = self;
301   vex->guest_EBX = kport;
302   vex->guest_ECX = func;
303   vex->guest_EDX = tst->os_state.func_arg;
304   vex->guest_EDI = stacksize;
305   vex->guest_ESI = flags;
306   vex->guest_ESP = sp;
307
308   // Record thread's stack and Mach port and pthread struct
309   tst->os_state.pthread = self;
310   tst->os_state.lwpid = kport;
311   record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "thread-%p");
312
313   if ((flags & 0x01000000) == 0) {
314      // kernel allocated stack - needs mapping
315      Addr stack = VG_PGROUNDUP(sp) - stacksize;
316      tst->client_stack_highest_byte = stack+stacksize-1;
317      tst->client_stack_szB = stacksize;
318
319      // pthread structure
320      ML_(notify_core_and_tool_of_mmap)(
321            stack+stacksize, pthread_structsize,
322            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
323      // stack contents
324      ML_(notify_core_and_tool_of_mmap)(
325            stack, stacksize,
326            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
327      // guard page
328      ML_(notify_core_and_tool_of_mmap)(
329            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
330            0, VKI_MAP_PRIVATE, -1, 0);
331   } else {
332      // client allocated stack
333      find_stack_segment(tst->tid, sp);
334   }
335   ML_(sync_mappings)("after", "pthread_hijack", 0);
336
337   // DDD: should this be here rather than in POST(sys_bsdthread_create)?
338   // But we don't have ptid here...
339   //VG_TRACK ( pre_thread_ll_create, ptid, tst->tid );
340
341   // Tell parent thread's POST(sys_bsdthread_create) that we're done
342   // initializing registers and mapping memory.
343   semaphore_signal(tst->os_state.child_done);
344   // LOCK IS GONE BELOW THIS POINT
345
346   // Go!
347   call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0,
348                         start_thread_NORETURN, (Word)tst);
349
350   /*NOTREACHED*/
351   vg_assert(0);
352}
353
354
355
356asm(
357".globl _wqthread_hijack_asm\n"
358"_wqthread_hijack_asm:\n"
359"   movl %esp,%ebp\n"
360"   push $0\n"    // alignment
361"   push $0\n"    // alignment
362"   push %ebp\n"  // original sp
363"   push %edi\n"  // reuse
364"   push %edx\n"  // workitem
365"   push %ecx\n"  // stackaddr
366"   push %ebx\n"  // kport
367"   push %eax\n"  // self
368"   push $0\n"    // fake return address
369"   jmp _wqthread_hijack\n"
370    );
371
372
373/*  wqthread note: The kernel may create or destroy pthreads in the
374    wqthread pool at any time with no userspace interaction,
375    and wqthread_start may be entered at any time with no userspace
376    interaction.
377    To handle this in valgrind, we create and destroy a valgrind
378    thread for every work item.
379*/
380void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem,
381                     Int reuse, Addr sp)
382{
383   ThreadState *tst;
384   VexGuestX86State *vex;
385   Addr stack;
386   SizeT stacksize;
387   vki_sigset_t blockall;
388
389   /* When we enter here we hold no lock (!), so we better acquire it
390      pronto.  Why do we hold no lock?  Because (presumably) the only
391      way to get here is as a result of a SfMayBlock syscall
392      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
393      lock.  At least that's clear for the 'reuse' case.  The
394      non-reuse case?  Dunno, perhaps it's a new thread the kernel
395      pulled out of a hat.  In any case we still need to take a
396      lock. */
397   VG_(acquire_BigLock_LL)("wqthread_hijack");
398
399   if (0) VG_(printf)(
400             "wqthread_hijack: self %#lx, kport %#lx, "
401             "stackaddr %#lx, workitem %#lx, reuse/flags %x, sp %#lx\n",
402             self, kport, stackaddr, workitem, reuse, sp);
403
404   /* Start the thread with all signals blocked.  VG_(scheduler) will
405      set the mask correctly when we finally get there. */
406   VG_(sigfillset)(&blockall);
407   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);
408
409   /* For 10.7 and earlier, |reuse| appeared to be used as a simple
410      boolean.  In 10.8 and later its name changed to |flags| and has
411      various other bits OR-d into it too, so it's necessary to fish
412      out just the relevant parts.  Hence: */
413#  if DARWIN_VERS <= DARWIN_10_7
414   Bool is_reuse = reuse != 0;
415#  elif DARWIN_VERS > DARWIN_10_7
416   Bool is_reuse = (reuse & 0x20000 /* == WQ_FLAG_THREAD_REUSE */) != 0;
417#  else
418#    error "Unsupported Darwin version"
419#  endif
420
421   if (is_reuse) {
422
423      /* For whatever reason, tst->os_state.pthread appear to have a
424         constant offset of 72 on 10.7, but zero on 10.6 and 10.5.  No
425         idea why. */
426#     if DARWIN_VERS <= DARWIN_10_6
427      UWord magic_delta = 0;
428#     elif DARWIN_VERS == DARWIN_10_7 || DARWIN_VERS == DARWIN_10_8
429      UWord magic_delta = 0x48;
430#     elif DARWIN_VERS == DARWIN_10_9 \
431           || DARWIN_VERS == DARWIN_10_10 \
432           || DARWIN_VERS == DARWIN_10_11 \
433           || DARWIN_VERS == DARWIN_10_12
434      UWord magic_delta = 0xB0;
435#     else
436#       error "magic_delta: to be computed on new OS version"
437        // magic_delta = tst->os_state.pthread - self
438#     endif
439
440      // This thread already exists; we're merely re-entering
441      // after leaving via workq_ops(WQOPS_THREAD_RETURN).
442      // Don't allocate any V thread resources.
443      // Do reset thread registers.
444      ThreadId tid = VG_(lwpid_to_vgtid)(kport);
445      vg_assert(VG_(is_valid_tid)(tid));
446      vg_assert(mach_thread_self() == kport);
447
448      tst = VG_(get_ThreadState)(tid);
449
450      if (0) VG_(printf)("wqthread_hijack reuse %s: tid %u, tst %p, "
451                         "tst->os_state.pthread %#lx, self %#lx\n",
452                         tst->os_state.pthread == self ? "SAME" : "DIFF",
453                         tid, tst, tst->os_state.pthread, self);
454
455      vex = &tst->arch.vex;
456      vg_assert(tst->os_state.pthread - magic_delta == self);
457   }
458   else {
459      // This is a new thread.
460      tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());
461      vex = &tst->arch.vex;
462      allocstack(tst->tid);
463      LibVEX_GuestX86_initialise(vex);
464   }
465
466   // Set thread's registers
467   // Do this FIRST because some code below tries to collect a backtrace,
468   // which requires valid register data.
469   vex->guest_EIP = wqthread_starter;
470   vex->guest_EAX = self;
471   vex->guest_EBX = kport;
472   vex->guest_ECX = stackaddr;
473   vex->guest_EDX = workitem;
474   vex->guest_EDI = reuse;
475   vex->guest_ESI = 0;
476   vex->guest_ESP = sp;
477
478   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
479   stack = VG_PGROUNDUP(sp) - stacksize;
480
481   if (is_reuse) {
482       // Continue V's thread back in the scheduler.
483       // The client thread is of course in another location entirely.
484
485      /* Drop the lock before going into
486         ML_(wqthread_continue_NORETURN).  The latter will immediately
487         attempt to reacquire it in non-LL mode, which is a bit
488         wasteful but I don't think is harmful.  A better solution
489         would be to not drop the lock but instead "upgrade" it from a
490         LL lock to a full lock, but that's too much like hard work
491         right now. */
492       VG_(release_BigLock_LL)("wqthread_hijack(1)");
493       ML_(wqthread_continue_NORETURN)(tst->tid);
494   }
495   else {
496      // Record thread's stack and Mach port and pthread struct
497      tst->os_state.pthread = self;
498      tst->os_state.lwpid = kport;
499      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
500
501      // kernel allocated stack - needs mapping
502      tst->client_stack_highest_byte = stack+stacksize-1;
503      tst->client_stack_szB = stacksize;
504
505      // GrP fixme scheduler lock?!
506
507      // pthread structure
508      ML_(notify_core_and_tool_of_mmap)(
509            stack+stacksize, pthread_structsize,
510            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
511      // stack contents
512      // GrP fixme uninitialized!
513      ML_(notify_core_and_tool_of_mmap)(
514            stack, stacksize,
515            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
516      // guard page
517      // GrP fixme ban_mem_stack!
518      ML_(notify_core_and_tool_of_mmap)(
519            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
520            0, VKI_MAP_PRIVATE, -1, 0);
521
522      ML_(sync_mappings)("after", "wqthread_hijack", 0);
523
524      // Go!
525      /* Same comments as the 'release' in the then-clause.
526         start_thread_NORETURN calls run_thread_NORETURN calls
527         thread_wrapper which acquires the lock before continuing.
528         Let's hope nothing non-thread-local happens until that point.
529
530         DDD: I think this is plain wrong .. if we get to
531         thread_wrapper not holding the lock, and someone has recycled
532         this thread slot in the meantime, we're hosed.  Is that
533         possible, though? */
534      VG_(release_BigLock_LL)("wqthread_hijack(2)");
535      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0,
536                            start_thread_NORETURN, (Word)tst);
537   }
538
539   /*NOTREACHED*/
540   vg_assert(0);
541}
542
543#endif // defined(VGP_x86_darwin)
544
545/*--------------------------------------------------------------------*/
546/*--- end                                                          ---*/
547/*--------------------------------------------------------------------*/
548