1
2/*--------------------------------------------------------------------*/
3/*--- Handle system calls.                          syswrap-main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2012 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_guest_offsets.h"
32#include "libvex_trc_values.h"
33#include "pub_core_basics.h"
34#include "pub_core_aspacemgr.h"
35#include "pub_core_vki.h"
36#include "pub_core_vkiscnums.h"
37#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
38#include "pub_core_threadstate.h"
39#include "pub_core_libcbase.h"
40#include "pub_core_libcassert.h"
41#include "pub_core_libcprint.h"
42#include "pub_core_libcproc.h"      // For VG_(getpid)()
43#include "pub_core_libcsignal.h"
44#include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
45                                    //   and VG_(vg_yield)
46#include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
47#include "pub_core_tooliface.h"
48#include "pub_core_options.h"
49#include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
50#include "pub_core_syscall.h"
51#include "pub_core_machine.h"
52#include "pub_core_syswrap.h"
53
54#include "priv_types_n_macros.h"
55#include "priv_syswrap-main.h"
56
57#if defined(VGO_darwin)
58#include "priv_syswrap-darwin.h"
59#endif
60
61/* Useful info which needs to be recorded somewhere:
62   Use of registers in syscalls is:
63
64          NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65   LINUX:
66   x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
67   amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
68   ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
69   ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
70   arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
71   mips   v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
72
73   On s390x the svc instruction is used for system calls. The system call
74   number is encoded in the instruction (8 bit immediate field). Since Linux
75   2.6 it is also allowed to use svc 0 with the system call number in r1.
76   This was introduced for system calls >255, but works for all. It is
77   also possible to see the svc 0 together with an EXecute instruction, that
78   fills in the immediate field.
79   s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
80
81   DARWIN:
82   x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
83   amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
84
85   For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
86   amd64-darwin.  Apparently 0(%esp) is some kind of return address
87   (perhaps for syscalls done with "sysenter"?)  I don't think it is
88   relevant for syscalls done with "int $0x80/1/2".
89*/
90
91/* This is the top level of the system-call handler module.  All
92   system calls are channelled through here, doing two things:
93
94   * notify the tool of the events (mem/reg reads, writes) happening
95
96   * perform the syscall, usually by passing it along to the kernel
97     unmodified.
98
99   A magical piece of assembly code, do_syscall_for_client_WRK, in
100   syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
101   kernel, whilst having the simulator retain control.
102*/
103
104/* The main function is VG_(client_syscall).  The simulation calls it
105   whenever a client thread wants to do a syscall.  The following is a
106   sketch of what it does.
107
108   * Ensures the root thread's stack is suitably mapped.  Tedious and
109     arcane.  See big big comment in VG_(client_syscall).
110
111   * First, it rounds up the syscall number and args (which is a
112     platform dependent activity) and puts them in a struct ("args")
113     and also a copy in "orig_args".
114
115     The pre/post wrappers refer to these structs and so no longer
116     need magic macros to access any specific registers.  This struct
117     is stored in thread-specific storage.
118
119
120   * The pre-wrapper is called, passing it a pointer to struct
121     "args".
122
123
124   * The pre-wrapper examines the args and pokes the tool
125     appropriately.  It may modify the args; this is why "orig_args"
126     is also stored.
127
128     The pre-wrapper may choose to 'do' the syscall itself, and
129     concludes one of three outcomes:
130
131       Success(N)    -- syscall is already complete, with success;
132                        result is N
133
134       Fail(N)       -- syscall is already complete, with failure;
135                        error code is N
136
137       HandToKernel  -- (the usual case): this needs to be given to
138                        the kernel to be done, using the values in
139                        the possibly-modified "args" struct.
140
141     In addition, the pre-wrapper may set some flags:
142
143       MayBlock   -- only applicable when outcome==HandToKernel
144
145       PostOnFail -- only applicable when outcome==HandToKernel or Fail
146
147
148   * If the pre-outcome is HandToKernel, the syscall is duly handed
149     off to the kernel (perhaps involving some thread switchery, but
150     that's not important).  This reduces the possible set of outcomes
151     to either Success(N) or Fail(N).
152
153
154   * The outcome (Success(N) or Fail(N)) is written back to the guest
155     register(s).  This is platform specific:
156
157     x86:    Success(N) ==>  eax = N
158             Fail(N)    ==>  eax = -N
159
160     ditto amd64
161
162     ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
163             Fail(N) ==>     r3 = N, CR0.SO = 1
164
165     Darwin:
166     x86:    Success(N) ==>  edx:eax = N, cc = 0
167             Fail(N)    ==>  edx:eax = N, cc = 1
168
169     s390x:  Success(N) ==>  r2 = N
170             Fail(N)    ==>  r2 = -N
171
172   * The post wrapper is called if:
173
174     - it exists, and
175     - outcome==Success or (outcome==Fail and PostOnFail is set)
176
177     The post wrapper is passed the adulterated syscall args (struct
178     "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
179
180   There are several other complications, primarily to do with
181   syscalls getting interrupted, explained in comments in the code.
182*/
183
184/* CAVEATS for writing wrappers.  It is important to follow these!
185
186   The macros defined in priv_types_n_macros.h are designed to help
187   decouple the wrapper logic from the actual representation of
188   syscall args/results, since these wrappers are designed to work on
189   multiple platforms.
190
191   Sometimes a PRE wrapper will complete the syscall itself, without
192   handing it to the kernel.  It will use one of SET_STATUS_Success,
193   SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
194   value.  It is critical to appreciate that use of the macro does not
195   immediately cause the underlying guest state to be updated -- that
196   is done by the driver logic in this file, when the wrapper returns.
197
198   As a result, PRE wrappers of the following form will malfunction:
199
200   PRE(fooble)
201   {
202      ... do stuff ...
203      SET_STATUS_Somehow(...)
204
205      // do something that assumes guest state is up to date
206   }
207
208   In particular, direct or indirect calls to VG_(poll_signals) after
209   setting STATUS can cause the guest state to be read (in order to
210   build signal frames).  Do not do this.  If you want a signal poll
211   after the syscall goes through, do "*flags |= SfPollAfter" and the
212   driver logic will do it for you.
213
214   -----------
215
216   Another critical requirement following introduction of new address
217   space manager (JRS, 20050923):
218
219   In a situation where the mappedness of memory has changed, aspacem
220   should be notified BEFORE the tool.  Hence the following is
221   correct:
222
223      Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
224      VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
225      if (d)
226         VG_(discard_translations)(s->start, s->end+1 - s->start);
227
228   whilst this is wrong:
229
230      VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
231      Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
232      if (d)
233         VG_(discard_translations)(s->start, s->end+1 - s->start);
234
235   The reason is that the tool may itself ask aspacem for more shadow
236   memory as a result of the VG_TRACK call.  In such a situation it is
237   critical that aspacem's segment array is up to date -- hence the
238   need to notify aspacem first.
239
240   -----------
241
242   Also .. take care to call VG_(discard_translations) whenever
243   memory with execute permissions is unmapped.
244*/
245
246
247/* ---------------------------------------------------------------------
248   Do potentially blocking syscall for the client, and mess with
249   signal masks at the same time.
250   ------------------------------------------------------------------ */
251
252/* Perform a syscall on behalf of a client thread, using a specific
253   signal mask.  On completion, the signal mask is set to restore_mask
254   (which presumably blocks almost everything).  If a signal happens
255   during the syscall, the handler should call
256   VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
257   thread's context to do the right thing.
258
259   The _WRK function is handwritten assembly, implemented per-platform
260   in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
261   properties.  See comments at the top of
262   VG_(fixup_guest_state_after_syscall_interrupted) below for details.
263
264   This function (these functions) are required to return zero in case
265   of success (even if the syscall itself failed), and nonzero if the
266   sigprocmask-swizzling calls failed.  We don't actually care about
267   the failure values from sigprocmask, although most of the assembly
268   implementations do attempt to return that, using the convention
269   0 for success, or 0x8000 | error-code for failure.
270*/
271#if defined(VGO_linux)
272extern
273UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
274                                      void* guest_state,
275                                      const vki_sigset_t *syscall_mask,
276                                      const vki_sigset_t *restore_mask,
277                                      Word sigsetSzB );
278#elif defined(VGO_darwin)
279extern
280UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
281                                           void* guest_state,
282                                           const vki_sigset_t *syscall_mask,
283                                           const vki_sigset_t *restore_mask,
284                                           Word sigsetSzB ); /* unused */
285extern
286UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
287                                           void* guest_state,
288                                           const vki_sigset_t *syscall_mask,
289                                           const vki_sigset_t *restore_mask,
290                                           Word sigsetSzB ); /* unused */
291extern
292UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
293                                           void* guest_state,
294                                           const vki_sigset_t *syscall_mask,
295                                           const vki_sigset_t *restore_mask,
296                                           Word sigsetSzB ); /* unused */
297#else
298#  error "Unknown OS"
299#endif
300
301
302static
303void do_syscall_for_client ( Int syscallno,
304                             ThreadState* tst,
305                             const vki_sigset_t* syscall_mask )
306{
307   vki_sigset_t saved;
308   UWord err;
309#  if defined(VGO_linux)
310   err = ML_(do_syscall_for_client_WRK)(
311            syscallno, &tst->arch.vex,
312            syscall_mask, &saved, sizeof(vki_sigset_t)
313         );
314#  elif defined(VGO_darwin)
315   switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
316      case VG_DARWIN_SYSCALL_CLASS_UNIX:
317         err = ML_(do_syscall_for_client_unix_WRK)(
318                  VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
319                  syscall_mask, &saved, 0/*unused:sigsetSzB*/
320               );
321         break;
322      case VG_DARWIN_SYSCALL_CLASS_MACH:
323         err = ML_(do_syscall_for_client_mach_WRK)(
324                  VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
325                  syscall_mask, &saved, 0/*unused:sigsetSzB*/
326               );
327         break;
328      case VG_DARWIN_SYSCALL_CLASS_MDEP:
329         err = ML_(do_syscall_for_client_mdep_WRK)(
330                  VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
331                  syscall_mask, &saved, 0/*unused:sigsetSzB*/
332               );
333         break;
334      default:
335         vg_assert(0);
336         /*NOTREACHED*/
337         break;
338   }
339#  else
340#    error "Unknown OS"
341#  endif
342   vg_assert2(
343      err == 0,
344      "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
345      (Int)(err & 0xFFF)
346   );
347}
348
349
350/* ---------------------------------------------------------------------
351   Impedance matchers and misc helpers
352   ------------------------------------------------------------------ */
353
354static
355Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
356{
357   return a1->sysno == a2->sysno
358          && a1->arg1 == a2->arg1
359          && a1->arg2 == a2->arg2
360          && a1->arg3 == a2->arg3
361          && a1->arg4 == a2->arg4
362          && a1->arg5 == a2->arg5
363          && a1->arg6 == a2->arg6
364          && a1->arg7 == a2->arg7
365          && a1->arg8 == a2->arg8;
366}
367
368static
369Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
370{
371   /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
372   if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
373      return True;
374#  if defined(VGO_darwin)
375   /* Darwin-specific debugging guff */
376   vg_assert(s1->what == s2->what);
377   VG_(printf)("eq_SyscallStatus:\n");
378   VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
379   VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
380   vg_assert(0);
381#  endif
382   return False;
383}
384
385/* Convert between SysRes and SyscallStatus, to the extent possible. */
386
387static
388SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
389{
390   SyscallStatus status;
391   status.what = SsComplete;
392   status.sres = res;
393   return status;
394}
395
396
397/* Impedance matchers.  These convert syscall arg or result data from
398   the platform-specific in-guest-state format to the canonical
399   formats, and back. */
400
401static
402void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
403                                    /*IN*/ VexGuestArchState* gst_vanilla,
404                                    /*IN*/ UInt trc )
405{
406#if defined(VGP_x86_linux)
407   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
408   canonical->sysno = gst->guest_EAX;
409   canonical->arg1  = gst->guest_EBX;
410   canonical->arg2  = gst->guest_ECX;
411   canonical->arg3  = gst->guest_EDX;
412   canonical->arg4  = gst->guest_ESI;
413   canonical->arg5  = gst->guest_EDI;
414   canonical->arg6  = gst->guest_EBP;
415   canonical->arg7  = 0;
416   canonical->arg8  = 0;
417
418#elif defined(VGP_amd64_linux)
419   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
420   canonical->sysno = gst->guest_RAX;
421   canonical->arg1  = gst->guest_RDI;
422   canonical->arg2  = gst->guest_RSI;
423   canonical->arg3  = gst->guest_RDX;
424   canonical->arg4  = gst->guest_R10;
425   canonical->arg5  = gst->guest_R8;
426   canonical->arg6  = gst->guest_R9;
427   canonical->arg7  = 0;
428   canonical->arg8  = 0;
429
430#elif defined(VGP_ppc32_linux)
431   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
432   canonical->sysno = gst->guest_GPR0;
433   canonical->arg1  = gst->guest_GPR3;
434   canonical->arg2  = gst->guest_GPR4;
435   canonical->arg3  = gst->guest_GPR5;
436   canonical->arg4  = gst->guest_GPR6;
437   canonical->arg5  = gst->guest_GPR7;
438   canonical->arg6  = gst->guest_GPR8;
439   canonical->arg7  = 0;
440   canonical->arg8  = 0;
441
442#elif defined(VGP_ppc64_linux)
443   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
444   canonical->sysno = gst->guest_GPR0;
445   canonical->arg1  = gst->guest_GPR3;
446   canonical->arg2  = gst->guest_GPR4;
447   canonical->arg3  = gst->guest_GPR5;
448   canonical->arg4  = gst->guest_GPR6;
449   canonical->arg5  = gst->guest_GPR7;
450   canonical->arg6  = gst->guest_GPR8;
451   canonical->arg7  = 0;
452   canonical->arg8  = 0;
453
454#elif defined(VGP_arm_linux)
455   VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
456   canonical->sysno = gst->guest_R7;
457   canonical->arg1  = gst->guest_R0;
458   canonical->arg2  = gst->guest_R1;
459   canonical->arg3  = gst->guest_R2;
460   canonical->arg4  = gst->guest_R3;
461   canonical->arg5  = gst->guest_R4;
462   canonical->arg6  = gst->guest_R5;
463   canonical->arg7  = 0;
464   canonical->arg8  = 0;
465
466#elif defined(VGP_mips32_linux)
467   VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
468   canonical->sysno = gst->guest_r2;    // v0
469   if (canonical->sysno != __NR_syscall) {
470      canonical->arg1  = gst->guest_r4;    // a0
471      canonical->arg2  = gst->guest_r5;    // a1
472      canonical->arg3  = gst->guest_r6;    // a2
473      canonical->arg4  = gst->guest_r7;    // a3
474      canonical->arg5  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
475      canonical->arg6  = *((UInt*) (gst->guest_r29 + 20));    // 20(sp)
476      canonical->arg8 = 0;
477   } else {
478      // Fixme hack handle syscall()
479      canonical->sysno = gst->guest_r4;    // a0
480      canonical->arg1  = gst->guest_r5;    // a1
481      canonical->arg2  = gst->guest_r6;    // a2
482      canonical->arg3  = gst->guest_r7;    // a3
483      canonical->arg4  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
484      canonical->arg5  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP/sp)
485      canonical->arg6  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP/sp)
486      canonical->arg8 = __NR_syscall;
487   }
488
489#elif defined(VGP_x86_darwin)
490   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
491   UWord *stack = (UWord *)gst->guest_ESP;
492   // GrP fixme hope syscalls aren't called with really shallow stacks...
493   canonical->sysno = gst->guest_EAX;
494   if (canonical->sysno != 0) {
495      // stack[0] is return address
496      canonical->arg1  = stack[1];
497      canonical->arg2  = stack[2];
498      canonical->arg3  = stack[3];
499      canonical->arg4  = stack[4];
500      canonical->arg5  = stack[5];
501      canonical->arg6  = stack[6];
502      canonical->arg7  = stack[7];
503      canonical->arg8  = stack[8];
504   } else {
505      // GrP fixme hack handle syscall()
506      // GrP fixme what about __syscall() ?
507      // stack[0] is return address
508      // DDD: the tool can't see that the params have been shifted!  Can
509      //      lead to incorrect checking, I think, because the PRRAn/PSARn
510      //      macros will mention the pre-shifted args.
511      canonical->sysno = stack[1];
512      vg_assert(canonical->sysno != 0);
513      canonical->arg1  = stack[2];
514      canonical->arg2  = stack[3];
515      canonical->arg3  = stack[4];
516      canonical->arg4  = stack[5];
517      canonical->arg5  = stack[6];
518      canonical->arg6  = stack[7];
519      canonical->arg7  = stack[8];
520      canonical->arg8  = stack[9];
521
522      PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
523            VG_(getpid)(), /*tid,*/
524            VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
525   }
526
527   // Here we determine what kind of syscall it was by looking at the
528   // interrupt kind, and then encode the syscall number using the 64-bit
529   // encoding for Valgrind's internal use.
530   //
531   // DDD: Would it be better to stash the JMP kind into the Darwin
532   // thread state rather than passing in the trc?
533   switch (trc) {
534   case VEX_TRC_JMP_SYS_INT128:
535      // int $0x80 = Unix, 64-bit result
536      vg_assert(canonical->sysno >= 0);
537      canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
538      break;
539   case VEX_TRC_JMP_SYS_SYSENTER:
540      // syscall = Unix, 32-bit result
541      // OR        Mach, 32-bit result
542      if (canonical->sysno >= 0) {
543         // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
544         canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
545                                                             & 0xffff);
546      } else {
547         canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
548      }
549      break;
550   case VEX_TRC_JMP_SYS_INT129:
551      // int $0x81 = Mach, 32-bit result
552      vg_assert(canonical->sysno < 0);
553      canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
554      break;
555   case VEX_TRC_JMP_SYS_INT130:
556      // int $0x82 = mdep, 32-bit result
557      vg_assert(canonical->sysno >= 0);
558      canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
559      break;
560   default:
561      vg_assert(0);
562      break;
563   }
564
565#elif defined(VGP_amd64_darwin)
566   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
567   UWord *stack = (UWord *)gst->guest_RSP;
568
569   vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
570
571   // GrP fixme hope syscalls aren't called with really shallow stacks...
572   canonical->sysno = gst->guest_RAX;
573   if (canonical->sysno != __NR_syscall) {
574      // stack[0] is return address
575      canonical->arg1  = gst->guest_RDI;
576      canonical->arg2  = gst->guest_RSI;
577      canonical->arg3  = gst->guest_RDX;
578      canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
579      canonical->arg5  = gst->guest_R8;
580      canonical->arg6  = gst->guest_R9;
581      canonical->arg7  = stack[1];
582      canonical->arg8  = stack[2];
583   } else {
584      // GrP fixme hack handle syscall()
585      // GrP fixme what about __syscall() ?
586      // stack[0] is return address
587      // DDD: the tool can't see that the params have been shifted!  Can
588      //      lead to incorrect checking, I think, because the PRRAn/PSARn
589      //      macros will mention the pre-shifted args.
590      canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
591      vg_assert(canonical->sysno != __NR_syscall);
592      canonical->arg1  = gst->guest_RSI;
593      canonical->arg2  = gst->guest_RDX;
594      canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
595      canonical->arg4  = gst->guest_R8;
596      canonical->arg5  = gst->guest_R9;
597      canonical->arg6  = stack[1];
598      canonical->arg7  = stack[2];
599      canonical->arg8  = stack[3];
600
601      PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
602            VG_(getpid)(), /*tid,*/
603            VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
604   }
605
606   // no canonical->sysno adjustment needed
607
608#elif defined(VGP_s390x_linux)
609   VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
610   canonical->sysno = gst->guest_SYSNO;
611   canonical->arg1  = gst->guest_r2;
612   canonical->arg2  = gst->guest_r3;
613   canonical->arg3  = gst->guest_r4;
614   canonical->arg4  = gst->guest_r5;
615   canonical->arg5  = gst->guest_r6;
616   canonical->arg6  = gst->guest_r7;
617   canonical->arg7  = 0;
618   canonical->arg8  = 0;
619#else
620#  error "getSyscallArgsFromGuestState: unknown arch"
621#endif
622}
623
624static
625void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
626                                    /*OUT*/VexGuestArchState* gst_vanilla )
627{
628#if defined(VGP_x86_linux)
629   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
630   gst->guest_EAX = canonical->sysno;
631   gst->guest_EBX = canonical->arg1;
632   gst->guest_ECX = canonical->arg2;
633   gst->guest_EDX = canonical->arg3;
634   gst->guest_ESI = canonical->arg4;
635   gst->guest_EDI = canonical->arg5;
636   gst->guest_EBP = canonical->arg6;
637
638#elif defined(VGP_amd64_linux)
639   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
640   gst->guest_RAX = canonical->sysno;
641   gst->guest_RDI = canonical->arg1;
642   gst->guest_RSI = canonical->arg2;
643   gst->guest_RDX = canonical->arg3;
644   gst->guest_R10 = canonical->arg4;
645   gst->guest_R8  = canonical->arg5;
646   gst->guest_R9  = canonical->arg6;
647
648#elif defined(VGP_ppc32_linux)
649   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
650   gst->guest_GPR0 = canonical->sysno;
651   gst->guest_GPR3 = canonical->arg1;
652   gst->guest_GPR4 = canonical->arg2;
653   gst->guest_GPR5 = canonical->arg3;
654   gst->guest_GPR6 = canonical->arg4;
655   gst->guest_GPR7 = canonical->arg5;
656   gst->guest_GPR8 = canonical->arg6;
657
658#elif defined(VGP_ppc64_linux)
659   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
660   gst->guest_GPR0 = canonical->sysno;
661   gst->guest_GPR3 = canonical->arg1;
662   gst->guest_GPR4 = canonical->arg2;
663   gst->guest_GPR5 = canonical->arg3;
664   gst->guest_GPR6 = canonical->arg4;
665   gst->guest_GPR7 = canonical->arg5;
666   gst->guest_GPR8 = canonical->arg6;
667
668#elif defined(VGP_arm_linux)
669   VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
670   gst->guest_R7 = canonical->sysno;
671   gst->guest_R0 = canonical->arg1;
672   gst->guest_R1 = canonical->arg2;
673   gst->guest_R2 = canonical->arg3;
674   gst->guest_R3 = canonical->arg4;
675   gst->guest_R4 = canonical->arg5;
676   gst->guest_R5 = canonical->arg6;
677
678#elif defined(VGP_x86_darwin)
679   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
680   UWord *stack = (UWord *)gst->guest_ESP;
681
682   gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
683
684   // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
685   // stack[0] is return address
686   stack[1] = canonical->arg1;
687   stack[2] = canonical->arg2;
688   stack[3] = canonical->arg3;
689   stack[4] = canonical->arg4;
690   stack[5] = canonical->arg5;
691   stack[6] = canonical->arg6;
692   stack[7] = canonical->arg7;
693   stack[8] = canonical->arg8;
694
695#elif defined(VGP_amd64_darwin)
696   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
697   UWord *stack = (UWord *)gst->guest_RSP;
698
699   gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
700   // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
701
702   // stack[0] is return address
703   gst->guest_RDI = canonical->arg1;
704   gst->guest_RSI = canonical->arg2;
705   gst->guest_RDX = canonical->arg3;
706   gst->guest_RCX = canonical->arg4;
707   gst->guest_R8  = canonical->arg5;
708   gst->guest_R9  = canonical->arg6;
709   stack[1]       = canonical->arg7;
710   stack[2]       = canonical->arg8;
711
712#elif defined(VGP_s390x_linux)
713   VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
714   gst->guest_SYSNO  = canonical->sysno;
715   gst->guest_r2     = canonical->arg1;
716   gst->guest_r3     = canonical->arg2;
717   gst->guest_r4     = canonical->arg3;
718   gst->guest_r5     = canonical->arg4;
719   gst->guest_r6     = canonical->arg5;
720   gst->guest_r7     = canonical->arg6;
721
722#elif defined(VGP_mips32_linux)
723   VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
724   if (canonical->arg8 != __NR_syscall) {
725      gst->guest_r2 = canonical->sysno;
726      gst->guest_r4 = canonical->arg1;
727      gst->guest_r5 = canonical->arg2;
728      gst->guest_r6 = canonical->arg3;
729      gst->guest_r7 = canonical->arg4;
730      *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5;    // 16(guest_GPR29/sp)
731      *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6;    // 20(sp)
732   } else {
733      canonical->arg8 = 0;
734      gst->guest_r2 = __NR_syscall;
735      gst->guest_r4 = canonical->sysno;
736      gst->guest_r5 = canonical->arg1;
737      gst->guest_r6 = canonical->arg2;
738      gst->guest_r7 = canonical->arg3;
739      *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4;    // 16(guest_GPR29/sp)
740      *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5;    // 20(sp)
741      *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6;    // 24(sp)
742   }
743#else
744#  error "putSyscallArgsIntoGuestState: unknown arch"
745#endif
746}
747
748static
749void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
750                                      /*IN*/ VexGuestArchState* gst_vanilla )
751{
752#  if defined(VGP_x86_linux)
753   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
754   canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
755   canonical->what = SsComplete;
756
757#  elif defined(VGP_amd64_linux)
758   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
759   canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
760   canonical->what = SsComplete;
761
762#  elif defined(VGP_ppc32_linux)
763   VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
764   UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
765   UInt                cr0so = (cr >> 28) & 1;
766   canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
767   canonical->what = SsComplete;
768
769#  elif defined(VGP_ppc64_linux)
770   VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
771   UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
772   UInt                cr0so = (cr >> 28) & 1;
773   canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
774   canonical->what = SsComplete;
775
776#  elif defined(VGP_arm_linux)
777   VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
778   canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
779   canonical->what = SsComplete;
780
781#  elif defined(VGP_mips32_linux)
782   VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
783   UInt                v0 = gst->guest_r2;    // v0
784   UInt                v1 = gst->guest_r3;    // v1
785   UInt                a3 = gst->guest_r7;    // a3
786   canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
787   canonical->what = SsComplete;
788
789#  elif defined(VGP_x86_darwin)
790   /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
791   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
792   UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
793   UInt err = 0;
794   UInt wLO = 0;
795   UInt wHI = 0;
796   switch (gst->guest_SC_CLASS) {
797      case VG_DARWIN_SYSCALL_CLASS_UNIX:
798         // int $0x80 = Unix, 64-bit result
799         err = carry;
800         wLO = gst->guest_EAX;
801         wHI = gst->guest_EDX;
802         break;
803      case VG_DARWIN_SYSCALL_CLASS_MACH:
804         // int $0x81 = Mach, 32-bit result
805         wLO = gst->guest_EAX;
806         break;
807      case VG_DARWIN_SYSCALL_CLASS_MDEP:
808         // int $0x82 = mdep, 32-bit result
809         wLO = gst->guest_EAX;
810         break;
811      default:
812         vg_assert(0);
813         break;
814   }
815   canonical->sres = VG_(mk_SysRes_x86_darwin)(
816                        gst->guest_SC_CLASS, err ? True : False,
817                        wHI, wLO
818                     );
819   canonical->what = SsComplete;
820
821#  elif defined(VGP_amd64_darwin)
822   /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
823   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
824   ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
825   ULong err = 0;
826   ULong wLO = 0;
827   ULong wHI = 0;
828   switch (gst->guest_SC_CLASS) {
829      case VG_DARWIN_SYSCALL_CLASS_UNIX:
830         // syscall = Unix, 128-bit result
831         err = carry;
832         wLO = gst->guest_RAX;
833         wHI = gst->guest_RDX;
834         break;
835      case VG_DARWIN_SYSCALL_CLASS_MACH:
836         // syscall = Mach, 64-bit result
837         wLO = gst->guest_RAX;
838         break;
839      case VG_DARWIN_SYSCALL_CLASS_MDEP:
840         // syscall = mdep, 64-bit result
841         wLO = gst->guest_RAX;
842         break;
843      default:
844         vg_assert(0);
845         break;
846   }
847   canonical->sres = VG_(mk_SysRes_amd64_darwin)(
848                        gst->guest_SC_CLASS, err ? True : False,
849                        wHI, wLO
850                     );
851   canonical->what = SsComplete;
852
853#  elif defined(VGP_s390x_linux)
854   VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
855   canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
856   canonical->what = SsComplete;
857
858#  else
859#    error "getSyscallStatusFromGuestState: unknown arch"
860#  endif
861}
862
863static
864void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
865                                      /*IN*/ SyscallStatus*     canonical,
866                                      /*OUT*/VexGuestArchState* gst_vanilla )
867{
868#  if defined(VGP_x86_linux)
869   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
870   vg_assert(canonical->what == SsComplete);
871   if (sr_isError(canonical->sres)) {
872      /* This isn't exactly right, in that really a Failure with res
873         not in the range 1 .. 4095 is unrepresentable in the
874         Linux-x86 scheme.  Oh well. */
875      gst->guest_EAX = - (Int)sr_Err(canonical->sres);
876   } else {
877      gst->guest_EAX = sr_Res(canonical->sres);
878   }
879   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
880             OFFSET_x86_EAX, sizeof(UWord) );
881
882#  elif defined(VGP_amd64_linux)
883   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
884   vg_assert(canonical->what == SsComplete);
885   if (sr_isError(canonical->sres)) {
886      /* This isn't exactly right, in that really a Failure with res
887         not in the range 1 .. 4095 is unrepresentable in the
888         Linux-amd64 scheme.  Oh well. */
889      gst->guest_RAX = - (Long)sr_Err(canonical->sres);
890   } else {
891      gst->guest_RAX = sr_Res(canonical->sres);
892   }
893   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
894             OFFSET_amd64_RAX, sizeof(UWord) );
895
896#  elif defined(VGP_ppc32_linux)
897   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
898   UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
899   vg_assert(canonical->what == SsComplete);
900   if (sr_isError(canonical->sres)) {
901      /* set CR0.SO */
902      LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
903      gst->guest_GPR3 = sr_Err(canonical->sres);
904   } else {
905      /* clear CR0.SO */
906      LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
907      gst->guest_GPR3 = sr_Res(canonical->sres);
908   }
909   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
910             OFFSET_ppc32_GPR3, sizeof(UWord) );
911   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
912             OFFSET_ppc32_CR0_0, sizeof(UChar) );
913
914#  elif defined(VGP_ppc64_linux)
915   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
916   UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
917   vg_assert(canonical->what == SsComplete);
918   if (sr_isError(canonical->sres)) {
919      /* set CR0.SO */
920      LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
921      gst->guest_GPR3 = sr_Err(canonical->sres);
922   } else {
923      /* clear CR0.SO */
924      LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
925      gst->guest_GPR3 = sr_Res(canonical->sres);
926   }
927   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
928             OFFSET_ppc64_GPR3, sizeof(UWord) );
929   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
930             OFFSET_ppc64_CR0_0, sizeof(UChar) );
931
932#  elif defined(VGP_arm_linux)
933   VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
934   vg_assert(canonical->what == SsComplete);
935   if (sr_isError(canonical->sres)) {
936      /* This isn't exactly right, in that really a Failure with res
937         not in the range 1 .. 4095 is unrepresentable in the
938         Linux-arm scheme.  Oh well. */
939      gst->guest_R0 = - (Int)sr_Err(canonical->sres);
940   } else {
941      gst->guest_R0 = sr_Res(canonical->sres);
942   }
943   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
944             OFFSET_arm_R0, sizeof(UWord) );
945
946#elif defined(VGP_x86_darwin)
947   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
948   SysRes sres = canonical->sres;
949   vg_assert(canonical->what == SsComplete);
950   /* Unfortunately here we have to break abstraction and look
951      directly inside 'res', in order to decide what to do. */
952   switch (sres._mode) {
953      case SysRes_MACH: // int $0x81 = Mach, 32-bit result
954      case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
955         gst->guest_EAX = sres._wLO;
956         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
957                   OFFSET_x86_EAX, sizeof(UInt) );
958         break;
959      case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
960      case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
961         gst->guest_EAX = sres._wLO;
962         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
963                   OFFSET_x86_EAX, sizeof(UInt) );
964         gst->guest_EDX = sres._wHI;
965         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
966                   OFFSET_x86_EDX, sizeof(UInt) );
967         LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
968                                      gst );
969         // GrP fixme sets defined for entire eflags, not just bit c
970         // DDD: this breaks exp-ptrcheck.
971         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
972                   offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
973         break;
974      default:
975         vg_assert(0);
976         break;
977   }
978
979#elif defined(VGP_amd64_darwin)
980   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
981   SysRes sres = canonical->sres;
982   vg_assert(canonical->what == SsComplete);
983   /* Unfortunately here we have to break abstraction and look
984      directly inside 'res', in order to decide what to do. */
985   switch (sres._mode) {
986      case SysRes_MACH: // syscall = Mach, 64-bit result
987      case SysRes_MDEP: // syscall = mdep, 64-bit result
988         gst->guest_RAX = sres._wLO;
989         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
990                   OFFSET_amd64_RAX, sizeof(ULong) );
991         break;
992      case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
993      case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
994         gst->guest_RAX = sres._wLO;
995         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
996                   OFFSET_amd64_RAX, sizeof(ULong) );
997         gst->guest_RDX = sres._wHI;
998         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
999                   OFFSET_amd64_RDX, sizeof(ULong) );
1000         LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1001                                        gst );
1002         // GrP fixme sets defined for entire rflags, not just bit c
1003         // DDD: this breaks exp-ptrcheck.
1004         VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1005                   offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
1006         break;
1007      default:
1008         vg_assert(0);
1009         break;
1010   }
1011
1012#  elif defined(VGP_s390x_linux)
1013   VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
1014   vg_assert(canonical->what == SsComplete);
1015   if (sr_isError(canonical->sres)) {
1016      gst->guest_r2 = - (Long)sr_Err(canonical->sres);
1017   } else {
1018      gst->guest_r2 = sr_Res(canonical->sres);
1019   }
1020
1021#  elif defined(VGP_mips32_linux)
1022   VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
1023   vg_assert(canonical->what == SsComplete);
1024   if (sr_isError(canonical->sres)) {
1025      gst->guest_r2 = (Int)sr_Err(canonical->sres);
1026      gst->guest_r7 = (Int)sr_Err(canonical->sres);
1027   } else {
1028      gst->guest_r2 = sr_Res(canonical->sres);
1029      gst->guest_r3 = sr_ResEx(canonical->sres);
1030      gst->guest_r7 = (Int)sr_Err(canonical->sres);
1031   }
1032   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1033             OFFSET_mips32_r2, sizeof(UWord) );
1034   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1035             OFFSET_mips32_r3, sizeof(UWord) );
1036   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1037             OFFSET_mips32_r7, sizeof(UWord) );
1038
1039#  else
1040#    error "putSyscallStatusIntoGuestState: unknown arch"
1041#  endif
1042}
1043
1044
1045/* Tell me the offsets in the guest state of the syscall params, so
1046   that the scalar argument checkers don't have to have this info
1047   hardwired. */
1048
1049static
1050void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
1051{
1052#if defined(VGP_x86_linux)
1053   layout->o_sysno  = OFFSET_x86_EAX;
1054   layout->o_arg1   = OFFSET_x86_EBX;
1055   layout->o_arg2   = OFFSET_x86_ECX;
1056   layout->o_arg3   = OFFSET_x86_EDX;
1057   layout->o_arg4   = OFFSET_x86_ESI;
1058   layout->o_arg5   = OFFSET_x86_EDI;
1059   layout->o_arg6   = OFFSET_x86_EBP;
1060   layout->uu_arg7  = -1; /* impossible value */
1061   layout->uu_arg8  = -1; /* impossible value */
1062
1063#elif defined(VGP_amd64_linux)
1064   layout->o_sysno  = OFFSET_amd64_RAX;
1065   layout->o_arg1   = OFFSET_amd64_RDI;
1066   layout->o_arg2   = OFFSET_amd64_RSI;
1067   layout->o_arg3   = OFFSET_amd64_RDX;
1068   layout->o_arg4   = OFFSET_amd64_R10;
1069   layout->o_arg5   = OFFSET_amd64_R8;
1070   layout->o_arg6   = OFFSET_amd64_R9;
1071   layout->uu_arg7  = -1; /* impossible value */
1072   layout->uu_arg8  = -1; /* impossible value */
1073
1074#elif defined(VGP_ppc32_linux)
1075   layout->o_sysno  = OFFSET_ppc32_GPR0;
1076   layout->o_arg1   = OFFSET_ppc32_GPR3;
1077   layout->o_arg2   = OFFSET_ppc32_GPR4;
1078   layout->o_arg3   = OFFSET_ppc32_GPR5;
1079   layout->o_arg4   = OFFSET_ppc32_GPR6;
1080   layout->o_arg5   = OFFSET_ppc32_GPR7;
1081   layout->o_arg6   = OFFSET_ppc32_GPR8;
1082   layout->uu_arg7  = -1; /* impossible value */
1083   layout->uu_arg8  = -1; /* impossible value */
1084
1085#elif defined(VGP_ppc64_linux)
1086   layout->o_sysno  = OFFSET_ppc64_GPR0;
1087   layout->o_arg1   = OFFSET_ppc64_GPR3;
1088   layout->o_arg2   = OFFSET_ppc64_GPR4;
1089   layout->o_arg3   = OFFSET_ppc64_GPR5;
1090   layout->o_arg4   = OFFSET_ppc64_GPR6;
1091   layout->o_arg5   = OFFSET_ppc64_GPR7;
1092   layout->o_arg6   = OFFSET_ppc64_GPR8;
1093   layout->uu_arg7  = -1; /* impossible value */
1094   layout->uu_arg8  = -1; /* impossible value */
1095
1096#elif defined(VGP_arm_linux)
1097   layout->o_sysno  = OFFSET_arm_R7;
1098   layout->o_arg1   = OFFSET_arm_R0;
1099   layout->o_arg2   = OFFSET_arm_R1;
1100   layout->o_arg3   = OFFSET_arm_R2;
1101   layout->o_arg4   = OFFSET_arm_R3;
1102   layout->o_arg5   = OFFSET_arm_R4;
1103   layout->o_arg6   = OFFSET_arm_R5;
1104   layout->uu_arg7  = -1; /* impossible value */
1105   layout->uu_arg8  = -1; /* impossible value */
1106
1107#elif defined(VGP_mips32_linux)
1108   layout->o_sysno  = OFFSET_mips32_r2;
1109   layout->o_arg1   = OFFSET_mips32_r4;
1110   layout->o_arg2   = OFFSET_mips32_r5;
1111   layout->o_arg3   = OFFSET_mips32_r6;
1112   layout->o_arg4   = OFFSET_mips32_r7;
1113   layout->s_arg5   = sizeof(UWord) * 4;
1114   layout->s_arg6   = sizeof(UWord) * 5;
1115   layout->uu_arg7  = -1; /* impossible value */
1116   layout->uu_arg8  = -1; /* impossible value */
1117
1118#elif defined(VGP_x86_darwin)
1119   layout->o_sysno  = OFFSET_x86_EAX;
1120   // syscall parameters are on stack in C convention
1121   layout->s_arg1   = sizeof(UWord) * 1;
1122   layout->s_arg2   = sizeof(UWord) * 2;
1123   layout->s_arg3   = sizeof(UWord) * 3;
1124   layout->s_arg4   = sizeof(UWord) * 4;
1125   layout->s_arg5   = sizeof(UWord) * 5;
1126   layout->s_arg6   = sizeof(UWord) * 6;
1127   layout->s_arg7   = sizeof(UWord) * 7;
1128   layout->s_arg8   = sizeof(UWord) * 8;
1129
1130#elif defined(VGP_amd64_darwin)
1131   layout->o_sysno  = OFFSET_amd64_RAX;
1132   layout->o_arg1   = OFFSET_amd64_RDI;
1133   layout->o_arg2   = OFFSET_amd64_RSI;
1134   layout->o_arg3   = OFFSET_amd64_RDX;
1135   layout->o_arg4   = OFFSET_amd64_RCX;
1136   layout->o_arg5   = OFFSET_amd64_R8;
1137   layout->o_arg6   = OFFSET_amd64_R9;
1138   layout->s_arg7   = sizeof(UWord) * 1;
1139   layout->s_arg8   = sizeof(UWord) * 2;
1140
1141#elif defined(VGP_s390x_linux)
1142   layout->o_sysno  = OFFSET_s390x_SYSNO;
1143   layout->o_arg1   = OFFSET_s390x_r2;
1144   layout->o_arg2   = OFFSET_s390x_r3;
1145   layout->o_arg3   = OFFSET_s390x_r4;
1146   layout->o_arg4   = OFFSET_s390x_r5;
1147   layout->o_arg5   = OFFSET_s390x_r6;
1148   layout->o_arg6   = OFFSET_s390x_r7;
1149   layout->uu_arg7  = -1; /* impossible value */
1150   layout->uu_arg8  = -1; /* impossible value */
1151#else
1152#  error "getSyscallLayout: unknown arch"
1153#endif
1154}
1155
1156
1157/* ---------------------------------------------------------------------
1158   The main driver logic
1159   ------------------------------------------------------------------ */
1160
1161/* Finding the handlers for a given syscall, or faking up one
1162   when no handler is found. */
1163
1164static
1165void bad_before ( ThreadId              tid,
1166                  SyscallArgLayout*     layout,
1167                  /*MOD*/SyscallArgs*   args,
1168                  /*OUT*/SyscallStatus* status,
1169                  /*OUT*/UWord*         flags )
1170{
1171   VG_(dmsg)("WARNING: unhandled syscall: %s\n",
1172      VG_SYSNUM_STRING_EXTRA(args->sysno));
1173   if (VG_(clo_verbosity) > 1) {
1174      VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1175   }
1176   VG_(dmsg)("You may be able to write your own handler.\n");
1177   VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1178   VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
1179   VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1180
1181   SET_STATUS_Failure(VKI_ENOSYS);
1182}
1183
1184static SyscallTableEntry bad_sys =
1185   { bad_before, NULL };
1186
1187static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1188{
1189   const SyscallTableEntry* sys = NULL;
1190
1191#  if defined(VGO_linux)
1192   sys = ML_(get_linux_syscall_entry)( syscallno );
1193
1194#  elif defined(VGO_darwin)
1195   Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1196
1197   switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1198   case VG_DARWIN_SYSCALL_CLASS_UNIX:
1199      if (idx >= 0 && idx < ML_(syscall_table_size) &&
1200          ML_(syscall_table)[idx].before != NULL)
1201         sys = &ML_(syscall_table)[idx];
1202         break;
1203   case VG_DARWIN_SYSCALL_CLASS_MACH:
1204      if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1205          ML_(mach_trap_table)[idx].before != NULL)
1206         sys = &ML_(mach_trap_table)[idx];
1207         break;
1208   case VG_DARWIN_SYSCALL_CLASS_MDEP:
1209      if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1210          ML_(mdep_trap_table)[idx].before != NULL)
1211         sys = &ML_(mdep_trap_table)[idx];
1212         break;
1213   default:
1214      vg_assert(0);
1215      break;
1216   }
1217
1218#  else
1219#    error Unknown OS
1220#  endif
1221
1222   return sys == NULL  ? &bad_sys  : sys;
1223}
1224
1225
1226/* Add and remove signals from mask so that we end up telling the
1227   kernel the state we actually want rather than what the client
1228   wants. */
1229static void sanitize_client_sigmask(vki_sigset_t *mask)
1230{
1231   VG_(sigdelset)(mask, VKI_SIGKILL);
1232   VG_(sigdelset)(mask, VKI_SIGSTOP);
1233   VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1234}
1235
1236typedef
1237   struct {
1238      SyscallArgs   orig_args;
1239      SyscallArgs   args;
1240      SyscallStatus status;
1241      UWord         flags;
1242   }
1243   SyscallInfo;
1244
1245SyscallInfo syscallInfo[VG_N_THREADS];
1246
1247
1248/* The scheduler needs to be able to zero out these records after a
1249   fork, hence this is exported from m_syswrap. */
1250void VG_(clear_syscallInfo) ( Int tid )
1251{
1252   vg_assert(tid >= 0 && tid < VG_N_THREADS);
1253   VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1254   syscallInfo[tid].status.what = SsIdle;
1255}
1256
1257static void ensure_initialised ( void )
1258{
1259   Int i;
1260   static Bool init_done = False;
1261   if (init_done)
1262      return;
1263   init_done = True;
1264   for (i = 0; i < VG_N_THREADS; i++) {
1265      VG_(clear_syscallInfo)( i );
1266   }
1267}
1268
1269/* --- This is the main function of this file. --- */
1270
1271void VG_(client_syscall) ( ThreadId tid, UInt trc )
1272{
1273   Word                     sysno;
1274   ThreadState*             tst;
1275   const SyscallTableEntry* ent;
1276   SyscallArgLayout         layout;
1277   SyscallInfo*             sci;
1278
1279   ensure_initialised();
1280
1281   vg_assert(VG_(is_valid_tid)(tid));
1282   vg_assert(tid >= 1 && tid < VG_N_THREADS);
1283   vg_assert(VG_(is_running_thread)(tid));
1284
1285   tst = VG_(get_ThreadState)(tid);
1286
1287   /* BEGIN ensure root thread's stack is suitably mapped */
1288   /* In some rare circumstances, we may do the syscall without the
1289      bottom page of the stack being mapped, because the stack pointer
1290      was moved down just a few instructions before the syscall
1291      instruction, and there have been no memory references since
1292      then, that would cause a call to VG_(extend_stack) to have
1293      happened.
1294
1295      In native execution that's OK: the kernel automagically extends
1296      the stack's mapped area down to cover the stack pointer (or sp -
1297      redzone, really).  In simulated normal execution that's OK too,
1298      since any signals we get from accessing below the mapped area of
1299      the (guest's) stack lead us to VG_(extend_stack), where we
1300      simulate the kernel's stack extension logic.  But that leaves
1301      the problem of entering a syscall with the SP unmapped.  Because
1302      the kernel doesn't know that the segment immediately above SP is
1303      supposed to be a grow-down segment, it causes the syscall to
1304      fail, and thereby causes a divergence between native behaviour
1305      (syscall succeeds) and simulated behaviour (syscall fails).
1306
1307      This is quite a rare failure mode.  It has only been seen
1308      affecting calls to sys_readlink on amd64-linux, and even then it
1309      requires a certain code sequence around the syscall to trigger
1310      it.  Here is one:
1311
1312      extern int my_readlink ( const char* path );
1313      asm(
1314      ".text\n"
1315      ".globl my_readlink\n"
1316      "my_readlink:\n"
1317      "\tsubq    $0x1008,%rsp\n"
1318      "\tmovq    %rdi,%rdi\n"              // path is in rdi
1319      "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
1320      "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
1321      "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
1322      "\tsyscall\n"
1323      "\taddq    $0x1008,%rsp\n"
1324      "\tret\n"
1325      ".previous\n"
1326      );
1327
1328      For more details, see bug #156404
1329      (https://bugs.kde.org/show_bug.cgi?id=156404).
1330
1331      The fix is actually very simple.  We simply need to call
1332      VG_(extend_stack) for this thread, handing it the lowest
1333      possible valid address for stack (sp - redzone), to ensure the
1334      pages all the way down to that address, are mapped.  Because
1335      this is a potentially expensive and frequent operation, we
1336      filter in two ways:
1337
1338      First, only the main thread (tid=1) has a growdown stack.  So
1339      ignore all others.  It is conceivable, although highly unlikely,
1340      that the main thread exits, and later another thread is
1341      allocated tid=1, but that's harmless, I believe;
1342      VG_(extend_stack) will do nothing when applied to a non-root
1343      thread.
1344
1345      Secondly, first call VG_(am_find_nsegment) directly, to see if
1346      the page holding (sp - redzone) is mapped correctly.  If so, do
1347      nothing.  This is almost always the case.  VG_(extend_stack)
1348      calls VG_(am_find_nsegment) twice, so this optimisation -- and
1349      that's all it is -- more or less halves the number of calls to
1350      VG_(am_find_nsegment) required.
1351
1352      TODO: the test "seg->kind == SkAnonC" is really inadequate,
1353      because although it tests whether the segment is mapped
1354      _somehow_, it doesn't check that it has the right permissions
1355      (r,w, maybe x) ?  We could test that here, but it will also be
1356      necessary to fix the corresponding test in VG_(extend_stack).
1357
1358      All this guff is of course Linux-specific.  Hence the ifdef.
1359   */
1360#  if defined(VGO_linux)
1361   if (tid == 1/*ROOT THREAD*/) {
1362      Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1363      NSegment const* seg = VG_(am_find_nsegment)(stackMin);
1364      if (seg && seg->kind == SkAnonC) {
1365         /* stackMin is already mapped.  Nothing to do. */
1366      } else {
1367         (void)VG_(extend_stack)( stackMin,
1368                                  tst->client_stack_szB );
1369      }
1370   }
1371#  endif
1372   /* END ensure root thread's stack is suitably mapped */
1373
1374   /* First off, get the syscall args and number.  This is a
1375      platform-dependent action. */
1376
1377   sci = & syscallInfo[tid];
1378   vg_assert(sci->status.what == SsIdle);
1379
1380   getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1381
1382   /* Copy .orig_args to .args.  The pre-handler may modify .args, but
1383      we want to keep the originals too, just in case. */
1384   sci->args = sci->orig_args;
1385
1386   /* Save the syscall number in the thread state in case the syscall
1387      is interrupted by a signal. */
1388   sysno = sci->orig_args.sysno;
1389
1390   /* It's sometimes useful, as a crude debugging hack, to get a
1391      stack trace at each (or selected) syscalls. */
1392   if (0 && sysno == __NR_ioctl) {
1393      VG_(umsg)("\nioctl:\n");
1394      VG_(get_and_pp_StackTrace)(tid, 10);
1395      VG_(umsg)("\n");
1396   }
1397
1398#  if defined(VGO_darwin)
1399   /* Record syscall class.  But why?  Because the syscall might be
1400      interrupted by a signal, and in the signal handler (which will
1401      be m_signals.async_signalhandler) we will need to build a SysRes
1402      reflecting the syscall return result.  In order to do that we
1403      need to know the syscall class.  Hence stash it in the guest
1404      state of this thread.  This madness is not needed on Linux
1405      because it only has a single syscall return convention and so
1406      there is no ambiguity involved in converting the post-signal
1407      machine state into a SysRes. */
1408   tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1409#  endif
1410
1411   /* The default what-to-do-next thing is hand the syscall to the
1412      kernel, so we pre-set that here.  Set .sres to something
1413      harmless looking (is irrelevant because .what is not
1414      SsComplete.) */
1415   sci->status.what = SsHandToKernel;
1416   sci->status.sres = VG_(mk_SysRes_Error)(0);
1417   sci->flags       = 0;
1418
1419   /* Fetch the syscall's handlers.  If no handlers exist for this
1420      syscall, we are given dummy handlers which force an immediate
1421      return with ENOSYS. */
1422   ent = get_syscall_entry(sysno);
1423
1424   /* Fetch the layout information, which tells us where in the guest
1425      state the syscall args reside.  This is a platform-dependent
1426      action.  This info is needed so that the scalar syscall argument
1427      checks (PRE_REG_READ calls) know which bits of the guest state
1428      they need to inspect. */
1429   getSyscallArgLayout( &layout );
1430
1431   /* Make sure the tmp signal mask matches the real signal mask;
1432      sigsuspend may change this. */
1433   vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1434
1435   /* Right, we're finally ready to Party.  Call the pre-handler and
1436      see what we get back.  At this point:
1437
1438        sci->status.what  is Unset (we don't know yet).
1439        sci->orig_args    contains the original args.
1440        sci->args         is the same as sci->orig_args.
1441        sci->flags        is zero.
1442   */
1443
1444   PRINT("SYSCALL[%d,%d](%s) ",
1445      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1446
1447   /* Do any pre-syscall actions */
1448   if (VG_(needs).syscall_wrapper) {
1449      UWord tmpv[8];
1450      tmpv[0] = sci->orig_args.arg1;
1451      tmpv[1] = sci->orig_args.arg2;
1452      tmpv[2] = sci->orig_args.arg3;
1453      tmpv[3] = sci->orig_args.arg4;
1454      tmpv[4] = sci->orig_args.arg5;
1455      tmpv[5] = sci->orig_args.arg6;
1456      tmpv[6] = sci->orig_args.arg7;
1457      tmpv[7] = sci->orig_args.arg8;
1458      VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1459                    &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1460   }
1461
1462   vg_assert(ent);
1463   vg_assert(ent->before);
1464   (ent->before)( tid,
1465                  &layout,
1466                  &sci->args, &sci->status, &sci->flags );
1467
1468   /* The pre-handler may have modified:
1469         sci->args
1470         sci->status
1471         sci->flags
1472      All else remains unchanged.
1473      Although the args may be modified, pre handlers are not allowed
1474      to change the syscall number.
1475   */
1476   /* Now we proceed according to what the pre-handler decided. */
1477   vg_assert(sci->status.what == SsHandToKernel
1478             || sci->status.what == SsComplete);
1479   vg_assert(sci->args.sysno == sci->orig_args.sysno);
1480
1481   if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1482      /* The pre-handler completed the syscall itself, declaring
1483         success. */
1484      if (sci->flags & SfNoWriteResult) {
1485         PRINT(" --> [pre-success] NoWriteResult");
1486      } else {
1487         PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
1488               (ULong)sr_ResHI(sci->status.sres),
1489               (ULong)sr_Res(sci->status.sres));
1490      }
1491      /* In this case the allowable flags are to ask for a signal-poll
1492         and/or a yield after the call.  Changing the args isn't
1493         allowed. */
1494      vg_assert(0 == (sci->flags
1495                      & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1496      vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1497   }
1498
1499   else
1500   if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1501      /* The pre-handler decided to fail syscall itself. */
1502      PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
1503      /* In this case, the pre-handler is also allowed to ask for the
1504         post-handler to be run anyway.  Changing the args is not
1505         allowed. */
1506      vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1507      vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1508   }
1509
1510   else
1511   if (sci->status.what != SsHandToKernel) {
1512      /* huh?! */
1513      vg_assert(0);
1514   }
1515
1516   else /* (sci->status.what == HandToKernel) */ {
1517      /* Ok, this is the usual case -- and the complicated one.  There
1518         are two subcases: sync and async.  async is the general case
1519         and is to be used when there is any possibility that the
1520         syscall might block [a fact that the pre-handler must tell us
1521         via the sci->flags field.]  Because the tidying-away /
1522         context-switch overhead of the async case could be large, if
1523         we are sure that the syscall will not block, we fast-track it
1524         by doing it directly in this thread, which is a lot
1525         simpler. */
1526
1527      /* Check that the given flags are allowable: MayBlock, PollAfter
1528         and PostOnFail are ok. */
1529      vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1530
1531      if (sci->flags & SfMayBlock) {
1532
1533         /* Syscall may block, so run it asynchronously */
1534         vki_sigset_t mask;
1535
1536         PRINT(" --> [async] ... \n");
1537
1538         mask = tst->sig_mask;
1539         sanitize_client_sigmask(&mask);
1540
1541         /* Gack.  More impedance matching.  Copy the possibly
1542            modified syscall args back into the guest state. */
1543         /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1544            then this assertion is senseless:
1545              vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1546            The case that exposed it was sys_posix_spawn on Darwin,
1547            which heavily modifies its arguments but then lets the call
1548            go through anyway, with SfToBlock set, hence we end up here. */
1549         putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1550
1551         /* Drop the bigLock */
1552         VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1553         /* Urr.  We're now in a race against other threads trying to
1554            acquire the bigLock.  I guess that doesn't matter provided
1555            that do_syscall_for_client only touches thread-local
1556            state. */
1557
1558         /* Do the call, which operates directly on the guest state,
1559            not on our abstracted copies of the args/result. */
1560         do_syscall_for_client(sysno, tst, &mask);
1561
1562         /* do_syscall_for_client may not return if the syscall was
1563            interrupted by a signal.  In that case, flow of control is
1564            first to m_signals.async_sighandler, which calls
1565            VG_(fixup_guest_state_after_syscall_interrupted), which
1566            fixes up the guest state, and possibly calls
1567            VG_(post_syscall).  Once that's done, control drops back
1568            to the scheduler.  */
1569
1570         /* Darwin: do_syscall_for_client may not return if the
1571            syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1572            responded by starting the thread at wqthread_hijack(reuse=1)
1573            (to run another workqueue item). In that case, wqthread_hijack
1574            calls ML_(wqthread_continue), which is similar to
1575            VG_(fixup_guest_state_after_syscall_interrupted). */
1576
1577         /* Reacquire the lock */
1578         VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1579
1580         /* Even more impedance matching.  Extract the syscall status
1581            from the guest state. */
1582         getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1583         vg_assert(sci->status.what == SsComplete);
1584
1585         /* Be decorative, if required. */
1586         if (VG_(clo_trace_syscalls)) {
1587            Bool failed = sr_isError(sci->status.sres);
1588            if (failed) {
1589               PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
1590                     VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1591                     (ULong)sr_Err(sci->status.sres));
1592            } else {
1593               PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
1594                     "Success(0x%llx:0x%llx)",
1595                     VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1596                     (ULong)sr_ResHI(sci->status.sres),
1597                     (ULong)sr_Res(sci->status.sres) );
1598            }
1599         }
1600
1601      } else {
1602
1603         /* run the syscall directly */
1604         /* The pre-handler may have modified the syscall args, but
1605            since we're passing values in ->args directly to the
1606            kernel, there's no point in flushing them back to the
1607            guest state.  Indeed doing so could be construed as
1608            incorrect. */
1609         SysRes sres
1610            = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
1611                                     sci->args.arg3, sci->args.arg4,
1612                                     sci->args.arg5, sci->args.arg6,
1613                                     sci->args.arg7, sci->args.arg8 );
1614         sci->status = convert_SysRes_to_SyscallStatus(sres);
1615
1616         /* Be decorative, if required. */
1617         if (VG_(clo_trace_syscalls)) {
1618            Bool failed = sr_isError(sci->status.sres);
1619            if (failed) {
1620               PRINT("[sync] --> Failure(0x%llx)",
1621                     (ULong)sr_Err(sci->status.sres) );
1622            } else {
1623               PRINT("[sync] --> Success(0x%llx:0x%llx)",
1624                     (ULong)sr_ResHI(sci->status.sres),
1625                     (ULong)sr_Res(sci->status.sres) );
1626            }
1627         }
1628      }
1629   }
1630
1631   vg_assert(sci->status.what == SsComplete);
1632
1633   vg_assert(VG_(is_running_thread)(tid));
1634
1635   /* Dump the syscall result back in the guest state.  This is
1636      a platform-specific action. */
1637   if (!(sci->flags & SfNoWriteResult))
1638      putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1639
1640   /* Situation now:
1641      - the guest state is now correctly modified following the syscall
1642      - modified args, original args and syscall status are still
1643        available in the syscallInfo[] entry for this syscall.
1644
1645      Now go on to do the post-syscall actions (read on down ..)
1646   */
1647   PRINT(" ");
1648   VG_(post_syscall)(tid);
1649   PRINT("\n");
1650}
1651
1652
1653/* Perform post syscall actions.  The expected state on entry is
1654   precisely as at the end of VG_(client_syscall), that is:
1655
1656   - guest state up to date following the syscall
1657   - modified args, original args and syscall status are still
1658     available in the syscallInfo[] entry for this syscall.
1659   - syscall status matches what's in the guest state.
1660
1661   There are two ways to get here: the normal way -- being called by
1662   VG_(client_syscall), and the unusual way, from
1663   VG_(fixup_guest_state_after_syscall_interrupted).
1664   Darwin: there's a third way, ML_(wqthread_continue).
1665*/
1666void VG_(post_syscall) (ThreadId tid)
1667{
1668   SyscallInfo*             sci;
1669   const SyscallTableEntry* ent;
1670   SyscallStatus            test_status;
1671   ThreadState*             tst;
1672   Word sysno;
1673
1674   /* Preliminaries */
1675   vg_assert(VG_(is_valid_tid)(tid));
1676   vg_assert(tid >= 1 && tid < VG_N_THREADS);
1677   vg_assert(VG_(is_running_thread)(tid));
1678
1679   tst = VG_(get_ThreadState)(tid);
1680   sci = & syscallInfo[tid];
1681
1682   /* m_signals.sigvgkill_handler might call here even when not in
1683      a syscall. */
1684   if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
1685      sci->status.what = SsIdle;
1686      return;
1687   }
1688
1689   /* Validate current syscallInfo entry.  In particular we require
1690      that the current .status matches what's actually in the guest
1691      state.  At least in the normal case where we have actually
1692      previously written the result into the guest state. */
1693   vg_assert(sci->status.what == SsComplete);
1694
1695   getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
1696   if (!(sci->flags & SfNoWriteResult))
1697      vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
1698   /* Failure of the above assertion on Darwin can indicate a problem
1699      in the syscall wrappers that pre-fail or pre-succeed the
1700      syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
1701      when they really should call SET_STATUS_from_SysRes.  The former
1702      create a UNIX-class syscall result on Darwin, which may not be
1703      correct for the syscall; if that's the case then this assertion
1704      fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
1705      non-Darwin platforms this assertion is should never fail, and this
1706      comment is completely irrelevant. */
1707   /* Ok, looks sane */
1708
1709   /* Get the system call number.  Because the pre-handler isn't
1710      allowed to mess with it, it should be the same for both the
1711      original and potentially-modified args. */
1712   vg_assert(sci->args.sysno == sci->orig_args.sysno);
1713   sysno = sci->args.sysno;
1714   ent = get_syscall_entry(sysno);
1715
1716   /* pre: status == Complete (asserted above) */
1717   /* Consider either success or failure.  Now run the post handler if:
1718      - it exists, and
1719      - Success or (Failure and PostOnFail is set)
1720   */
1721   if (ent->after
1722       && ((!sr_isError(sci->status.sres))
1723           || (sr_isError(sci->status.sres)
1724               && (sci->flags & SfPostOnFail) ))) {
1725
1726      (ent->after)( tid, &sci->args, &sci->status );
1727   }
1728
1729   /* Because the post handler might have changed the status (eg, the
1730      post-handler for sys_open can change the result from success to
1731      failure if the kernel supplied a fd that it doesn't like), once
1732      again dump the syscall result back in the guest state.*/
1733   if (!(sci->flags & SfNoWriteResult))
1734      putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1735
1736   /* Do any post-syscall actions required by the tool. */
1737   if (VG_(needs).syscall_wrapper) {
1738      UWord tmpv[8];
1739      tmpv[0] = sci->orig_args.arg1;
1740      tmpv[1] = sci->orig_args.arg2;
1741      tmpv[2] = sci->orig_args.arg3;
1742      tmpv[3] = sci->orig_args.arg4;
1743      tmpv[4] = sci->orig_args.arg5;
1744      tmpv[5] = sci->orig_args.arg6;
1745      tmpv[6] = sci->orig_args.arg7;
1746      tmpv[7] = sci->orig_args.arg8;
1747      VG_TDICT_CALL(tool_post_syscall, tid,
1748                    sysno,
1749                    &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
1750                    sci->status.sres);
1751   }
1752
1753   /* The syscall is done. */
1754   vg_assert(sci->status.what == SsComplete);
1755   sci->status.what = SsIdle;
1756
1757   /* The pre/post wrappers may have concluded that pending signals
1758      might have been created, and will have set SfPollAfter to
1759      request a poll for them once the syscall is done. */
1760   if (sci->flags & SfPollAfter)
1761      VG_(poll_signals)(tid);
1762
1763   /* Similarly, the wrappers might have asked for a yield
1764      afterwards. */
1765   if (sci->flags & SfYieldAfter)
1766      VG_(vg_yield)();
1767}
1768
1769
1770/* ---------------------------------------------------------------------
1771   Dealing with syscalls which get interrupted by a signal:
1772   VG_(fixup_guest_state_after_syscall_interrupted)
1773   ------------------------------------------------------------------ */
1774
1775/* Syscalls done on behalf of the client are finally handed off to the
1776   kernel in VG_(client_syscall) above, either by calling
1777   do_syscall_for_client (the async case), or by calling
1778   VG_(do_syscall6) (the sync case).
1779
1780   If the syscall is not interrupted by a signal (it may block and
1781   later unblock, but that's irrelevant here) then those functions
1782   eventually return and so control is passed to VG_(post_syscall).
1783   NB: not sure if the sync case can actually get interrupted, as it
1784   operates with all signals masked.
1785
1786   However, the syscall may get interrupted by an async-signal.  In
1787   that case do_syscall_for_client/VG_(do_syscall6) do not
1788   return.  Instead we wind up in m_signals.async_sighandler.  We need
1789   to fix up the guest state to make it look like the syscall was
1790   interrupted for guest.  So async_sighandler calls here, and this
1791   does the fixup.  Note that from here we wind up calling
1792   VG_(post_syscall) too.
1793*/
1794
1795
1796/* These are addresses within ML_(do_syscall_for_client_WRK).  See
1797   syscall-$PLAT.S for details.
1798*/
1799#if defined(VGO_linux)
1800  extern const Addr ML_(blksys_setup);
1801  extern const Addr ML_(blksys_restart);
1802  extern const Addr ML_(blksys_complete);
1803  extern const Addr ML_(blksys_committed);
1804  extern const Addr ML_(blksys_finished);
1805#elif defined(VGO_darwin)
1806  /* Darwin requires extra uglyness */
1807  extern const Addr ML_(blksys_setup_MACH);
1808  extern const Addr ML_(blksys_restart_MACH);
1809  extern const Addr ML_(blksys_complete_MACH);
1810  extern const Addr ML_(blksys_committed_MACH);
1811  extern const Addr ML_(blksys_finished_MACH);
1812  extern const Addr ML_(blksys_setup_MDEP);
1813  extern const Addr ML_(blksys_restart_MDEP);
1814  extern const Addr ML_(blksys_complete_MDEP);
1815  extern const Addr ML_(blksys_committed_MDEP);
1816  extern const Addr ML_(blksys_finished_MDEP);
1817  extern const Addr ML_(blksys_setup_UNIX);
1818  extern const Addr ML_(blksys_restart_UNIX);
1819  extern const Addr ML_(blksys_complete_UNIX);
1820  extern const Addr ML_(blksys_committed_UNIX);
1821  extern const Addr ML_(blksys_finished_UNIX);
1822#else
1823# error "Unknown OS"
1824#endif
1825
1826
1827/* Back up guest state to restart a system call. */
1828
1829void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
1830{
1831#if defined(VGP_x86_linux)
1832   arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
1833
1834   /* Make sure our caller is actually sane, and we're really backing
1835      back over a syscall.
1836
1837      int $0x80 == CD 80
1838   */
1839   {
1840      UChar *p = (UChar *)arch->vex.guest_EIP;
1841
1842      if (p[0] != 0xcd || p[1] != 0x80)
1843         VG_(message)(Vg_DebugMsg,
1844                      "?! restarting over syscall at %#x %02x %02x\n",
1845                      arch->vex.guest_EIP, p[0], p[1]);
1846
1847      vg_assert(p[0] == 0xcd && p[1] == 0x80);
1848   }
1849
1850#elif defined(VGP_amd64_linux)
1851   arch->vex.guest_RIP -= 2;             // sizeof(syscall)
1852
1853   /* Make sure our caller is actually sane, and we're really backing
1854      back over a syscall.
1855
1856      syscall == 0F 05
1857   */
1858   {
1859      UChar *p = (UChar *)arch->vex.guest_RIP;
1860
1861      if (p[0] != 0x0F || p[1] != 0x05)
1862         VG_(message)(Vg_DebugMsg,
1863                      "?! restarting over syscall at %#llx %02x %02x\n",
1864                      arch->vex.guest_RIP, p[0], p[1]);
1865
1866      vg_assert(p[0] == 0x0F && p[1] == 0x05);
1867   }
1868
1869#elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
1870   arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
1871
1872   /* Make sure our caller is actually sane, and we're really backing
1873      back over a syscall.
1874
1875      sc == 44 00 00 02
1876   */
1877   {
1878      UChar *p = (UChar *)arch->vex.guest_CIA;
1879
1880      if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
1881         VG_(message)(Vg_DebugMsg,
1882                      "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
1883                      arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
1884
1885      vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
1886   }
1887
1888#elif defined(VGP_arm_linux)
1889   if (arch->vex.guest_R15T & 1) {
1890      // Thumb mode.  SVC is a encoded as
1891      //   1101 1111 imm8
1892      // where imm8 is the SVC number, and we only accept 0.
1893      arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
1894      UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
1895      Bool   valid = p[0] == 0 && p[1] == 0xDF;
1896      if (!valid) {
1897         VG_(message)(Vg_DebugMsg,
1898                      "?! restarting over (Thumb) syscall that is not syscall "
1899                      "at %#llx %02x %02x\n",
1900                      arch->vex.guest_R15T - 1ULL, p[0], p[1]);
1901      }
1902      vg_assert(valid);
1903      // FIXME: NOTE, this really isn't right.  We need to back up
1904      // ITSTATE to what it was before the SVC instruction, but we
1905      // don't know what it was.  At least assert that it is now
1906      // zero, because if it is nonzero then it must also have
1907      // been nonzero for the SVC itself, which means it was
1908      // conditional.  Urk.
1909      vg_assert(arch->vex.guest_ITSTATE == 0);
1910   } else {
1911      // ARM mode.  SVC is encoded as
1912      //   cond 1111 imm24
1913      // where imm24 is the SVC number, and we only accept 0.
1914      arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
1915      UChar* p     = (UChar*)arch->vex.guest_R15T;
1916      Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
1917                     && (p[3] & 0xF) == 0xF;
1918      if (!valid) {
1919         VG_(message)(Vg_DebugMsg,
1920                      "?! restarting over (ARM) syscall that is not syscall "
1921                      "at %#llx %02x %02x %02x %02x\n",
1922                      arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
1923      }
1924      vg_assert(valid);
1925   }
1926
1927#elif defined(VGP_x86_darwin)
1928   arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
1929
1930   /* Make sure our caller is actually sane, and we're really backing
1931      back over a syscall.
1932
1933      int $0x80 == CD 80
1934      int $0x81 == CD 81
1935      int $0x82 == CD 82
1936      sysenter  == 0F 34
1937   */
1938   {
1939       UChar *p = (UChar *)arch->vex.guest_EIP;
1940       Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
1941                  || (p[0] == 0xCD && p[1] == 0x81)
1942                  || (p[0] == 0xCD && p[1] == 0x82)
1943                  || (p[0] == 0x0F && p[1] == 0x34);
1944       if (!ok)
1945           VG_(message)(Vg_DebugMsg,
1946                        "?! restarting over syscall at %#x %02x %02x\n",
1947                        arch->vex.guest_EIP, p[0], p[1]);
1948       vg_assert(ok);
1949   }
1950
1951#elif defined(VGP_amd64_darwin)
1952   // DDD: #warning GrP fixme amd64 restart unimplemented
1953   vg_assert(0);
1954
1955#elif defined(VGP_s390x_linux)
1956   arch->vex.guest_IA -= 2;             // sizeof(syscall)
1957
1958   /* Make sure our caller is actually sane, and we're really backing
1959      back over a syscall.
1960
1961      syscall == 0A <num>
1962   */
1963   {
1964      UChar *p = (UChar *)arch->vex.guest_IA;
1965      if (p[0] != 0x0A)
1966         VG_(message)(Vg_DebugMsg,
1967                      "?! restarting over syscall at %#llx %02x %02x\n",
1968                      arch->vex.guest_IA, p[0], p[1]);
1969
1970      vg_assert(p[0] == 0x0A);
1971   }
1972
1973#elif defined(VGP_mips32_linux)
1974
1975   arch->vex.guest_PC -= 4;             // sizeof(mips instr)
1976
1977   /* Make sure our caller is actually sane, and we're really backing
1978      back over a syscall.
1979
1980      syscall == 00 00 00 0C
1981      big endian
1982      syscall == 0C 00 00 00
1983   */
1984   {
1985      UChar *p = (UChar *)(arch->vex.guest_PC);
1986#     if defined (VG_LITTLEENDIAN)
1987      if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
1988         VG_(message)(Vg_DebugMsg,
1989                      "?! restarting over syscall at %#x %02x %02x %02x %02x\n",
1990                      arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
1991
1992      vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
1993#     elif defined (VG_BIGENDIAN)
1994      if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
1995         VG_(message)(Vg_DebugMsg,
1996                      "?! restarting over syscall at %#x %02x %02x %02x %02x\n",
1997                      arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
1998
1999      vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
2000#     else
2001#        error "Unknown endianness"
2002#     endif
2003   }
2004
2005#else
2006#  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
2007#endif
2008}
2009
2010
2011/*
2012   Fix up the guest state when a syscall is interrupted by a signal
2013   and so has been forced to return 'sysret'.
2014
2015   To do this, we determine the precise state of the syscall by
2016   looking at the (real) IP at the time the signal happened.  The
2017   syscall sequence looks like:
2018
2019     1. unblock signals
2020     2. perform syscall
2021     3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
2022     4. re-block signals
2023
2024   If a signal
2025   happens at      Then     Why?
2026   [1-2)           restart  nothing has happened (restart syscall)
2027   [2]             restart  syscall hasn't started, or kernel wants to restart
2028   [2-3)           save     syscall complete, but results not saved
2029   [3-4)           syscall complete, results saved
2030
2031   Sometimes we never want to restart an interrupted syscall (because
2032   sigaction says not to), so we only restart if "restart" is True.
2033
2034   This will also call VG_(post_syscall) if the syscall has actually
2035   completed (either because it was interrupted, or because it
2036   actually finished).  It will not call VG_(post_syscall) if the
2037   syscall is set up for restart, which means that the pre-wrapper may
2038   get called multiple times.
2039*/
2040
2041void
2042VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
2043                                                  Addr     ip,
2044                                                  SysRes   sres,
2045                                                  Bool     restart)
2046{
2047   /* Note that we don't know the syscall number here, since (1) in
2048      general there's no reliable way to get hold of it short of
2049      stashing it in the guest state before the syscall, and (2) in
2050      any case we don't need to know it for the actions done by this
2051      routine.
2052
2053      Furthermore, 'sres' is only used in the case where the syscall
2054      is complete, but the result has not been committed to the guest
2055      state yet.  In any other situation it will be meaningless and
2056      therefore ignored. */
2057
2058   ThreadState*     tst;
2059   SyscallStatus    canonical;
2060   ThreadArchState* th_regs;
2061   SyscallInfo*     sci;
2062
2063   /* Compute some Booleans indicating which range we're in. */
2064   Bool outside_range,
2065        in_setup_to_restart,      // [1,2) in the .S files
2066        at_restart,               // [2]   in the .S files
2067        in_complete_to_committed, // [3,4) in the .S files
2068        in_committed_to_finished; // [4,5) in the .S files
2069
2070#  if defined(VGO_linux)
2071   outside_range
2072      = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
2073   in_setup_to_restart
2074      = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
2075   at_restart
2076      = ip == ML_(blksys_restart);
2077   in_complete_to_committed
2078      = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2079   in_committed_to_finished
2080      = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2081#  elif defined(VGO_darwin)
2082   outside_range
2083      =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
2084      && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
2085      && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
2086   in_setup_to_restart
2087      =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
2088      || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
2089      || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
2090   at_restart
2091      =  (ip == ML_(blksys_restart_MACH))
2092      || (ip == ML_(blksys_restart_MDEP))
2093      || (ip == ML_(blksys_restart_UNIX));
2094   in_complete_to_committed
2095      =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
2096      || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
2097      || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
2098   in_committed_to_finished
2099      =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
2100      || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
2101      || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
2102   /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
2103#  else
2104#    error "Unknown OS"
2105#  endif
2106
2107   if (VG_(clo_trace_signals))
2108      VG_(message)( Vg_DebugMsg,
2109                    "interrupted_syscall: tid=%d, ip=0x%llx, "
2110                    "restart=%s, sres.isErr=%s, sres.val=%lld\n",
2111                    (Int)tid,
2112                    (ULong)ip,
2113                    restart ? "True" : "False",
2114                    sr_isError(sres) ? "True" : "False",
2115                    (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
2116
2117   vg_assert(VG_(is_valid_tid)(tid));
2118   vg_assert(tid >= 1 && tid < VG_N_THREADS);
2119   vg_assert(VG_(is_running_thread)(tid));
2120
2121   tst     = VG_(get_ThreadState)(tid);
2122   th_regs = &tst->arch;
2123   sci     = & syscallInfo[tid];
2124
2125   /* Figure out what the state of the syscall was by examining the
2126      (real) IP at the time of the signal, and act accordingly. */
2127   if (outside_range) {
2128      if (VG_(clo_trace_signals))
2129         VG_(message)( Vg_DebugMsg,
2130                       "  not in syscall at all: hmm, very suspicious\n" );
2131      /* Looks like we weren't in a syscall at all.  Hmm. */
2132      vg_assert(sci->status.what != SsIdle);
2133      return;
2134   }
2135
2136   /* We should not be here unless this thread had first started up
2137      the machinery for a syscall by calling VG_(client_syscall).
2138      Hence: */
2139   vg_assert(sci->status.what != SsIdle);
2140
2141   /* now, do one of four fixup actions, depending on where the IP has
2142      got to. */
2143
2144   if (in_setup_to_restart) {
2145      /* syscall hasn't even started; go around again */
2146      if (VG_(clo_trace_signals))
2147         VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
2148      vg_assert(sci->status.what == SsHandToKernel);
2149      ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2150   }
2151
2152   else
2153   if (at_restart) {
2154      /* We're either about to run the syscall, or it was interrupted
2155         and the kernel restarted it.  Restart if asked, otherwise
2156         EINTR it. */
2157      if (restart) {
2158         if (VG_(clo_trace_signals))
2159            VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
2160         ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2161      } else {
2162         if (VG_(clo_trace_signals))
2163            VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
2164         canonical = convert_SysRes_to_SyscallStatus(
2165                        VG_(mk_SysRes_Error)( VKI_EINTR )
2166                     );
2167         if (!(sci->flags & SfNoWriteResult))
2168            putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2169         sci->status = canonical;
2170         VG_(post_syscall)(tid);
2171      }
2172   }
2173
2174   else
2175   if (in_complete_to_committed) {
2176      /* Syscall complete, but result hasn't been written back yet.
2177         Write the SysRes we were supplied with back to the guest
2178         state. */
2179      if (VG_(clo_trace_signals))
2180         VG_(message)( Vg_DebugMsg,
2181                       "  completed, but uncommitted: committing\n");
2182      canonical = convert_SysRes_to_SyscallStatus( sres );
2183      if (!(sci->flags & SfNoWriteResult))
2184         putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2185      sci->status = canonical;
2186      VG_(post_syscall)(tid);
2187   }
2188
2189   else
2190   if (in_committed_to_finished) {
2191      /* Result committed, but the signal mask has not been restored;
2192         we expect our caller (the signal handler) will have fixed
2193         this up. */
2194      if (VG_(clo_trace_signals))
2195         VG_(message)( Vg_DebugMsg,
2196                       "  completed and committed: nothing to do\n");
2197      getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2198      vg_assert(sci->status.what == SsComplete);
2199      VG_(post_syscall)(tid);
2200   }
2201
2202   else
2203      VG_(core_panic)("?? strange syscall interrupt state?");
2204
2205   /* In all cases, the syscall is now finished (even if we called
2206      ML_(fixup_guest_state_to_restart_syscall), since that just
2207      re-positions the guest's IP for another go at it).  So we need
2208      to record that fact. */
2209   sci->status.what = SsIdle;
2210}
2211
2212
2213#if defined(VGO_darwin)
2214// Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2215// This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2216// This longjmps back to the scheduler.
2217void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2218{
2219   ThreadState*     tst;
2220   SyscallInfo*     sci;
2221
2222   VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2223
2224   PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
2225         VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2226
2227   vg_assert(VG_(is_valid_tid)(tid));
2228   vg_assert(tid >= 1 && tid < VG_N_THREADS);
2229   vg_assert(VG_(is_running_thread)(tid));
2230
2231   tst     = VG_(get_ThreadState)(tid);
2232   sci     = & syscallInfo[tid];
2233   vg_assert(sci->status.what != SsIdle);
2234   vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
2235
2236   // Pretend the syscall completed normally, but don't touch the thread state.
2237   sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2238   sci->flags |= SfNoWriteResult;
2239   VG_(post_syscall)(tid);
2240
2241   sci->status.what = SsIdle;
2242
2243   vg_assert(tst->sched_jmpbuf_valid);
2244   VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2245
2246   /* NOTREACHED */
2247   vg_assert(0);
2248}
2249#endif
2250
2251
2252/* ---------------------------------------------------------------------
2253   A place to store the where-to-call-when-really-done pointer
2254   ------------------------------------------------------------------ */
2255
2256// When the final thread is done, where shall I call to shutdown the
2257// system cleanly?  Is set once at startup (in m_main) and never
2258// changes after that.  Is basically a pointer to the exit
2259// continuation.  This is all just a nasty hack to avoid calling
2260// directly from m_syswrap to m_main at exit, since that would cause
2261// m_main to become part of a module cycle, which is silly.
2262void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2263       (ThreadId,VgSchedReturnCode)
2264   = NULL;
2265
2266/*--------------------------------------------------------------------*/
2267/*--- end                                                          ---*/
2268/*--------------------------------------------------------------------*/
2269