1
2/*--------------------------------------------------------------------*/
3/*--- Doing syscalls.                                  m_syscall.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2010 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics.h"
32#include "pub_core_libcassert.h"
33#include "pub_core_vki.h"
34#include "pub_core_vkiscnums.h"
35#include "pub_core_syscall.h"
36
37/* ---------------------------------------------------------------------
38   Building syscall return values.
39   ------------------------------------------------------------------ */
40
41#if defined(VGO_linux)
42
43/* Make a SysRes value from a syscall return value.  This is
44   Linux-specific.
45
46   From:
47   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
48   linux/i386/sysdep.h?
49   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
50
51   Linux uses a negative return value to indicate syscall errors,
52   unlike most Unices, which use the condition codes' carry flag.
53
54   Since version 2.1 the return value of a system call might be
55   negative even if the call succeeded.  E.g., the 'lseek' system call
56   might return a large offset.  Therefore we must not anymore test
57   for < 0, but test for a real error by making sure the value in %eax
58   is a real error number.  Linus said he will make sure the no
59   syscall returns a value in -1 .. -4095 as a valid result so we can
60   safely test with -4095.
61*/
62
63SysRes VG_(mk_SysRes_x86_linux) ( Int val ) {
64   SysRes res;
65   res._isError = val >= -4095 && val <= -1;
66   if (res._isError) {
67      res._val = (UInt)(-val);
68   } else {
69      res._val = (UInt)val;
70   }
71   return res;
72}
73
74/* Similarly .. */
75SysRes VG_(mk_SysRes_amd64_linux) ( Long val ) {
76   SysRes res;
77   res._isError = val >= -4095 && val <= -1;
78   if (res._isError) {
79      res._val = (ULong)(-val);
80   } else {
81      res._val = (ULong)val;
82   }
83   return res;
84}
85
86/* PPC uses the CR7.SO bit to flag an error (CR0 in IBM-speak) */
87/* Note this must be in the bottom bit of the second arg */
88SysRes VG_(mk_SysRes_ppc32_linux) ( UInt val, UInt cr0so ) {
89   SysRes res;
90   res._isError = (cr0so & 1) != 0;
91   res._val     = val;
92   return res;
93}
94
95/* As per ppc32 version, cr0.so must be in l.s.b. of 2nd arg */
96SysRes VG_(mk_SysRes_ppc64_linux) ( ULong val, ULong cr0so ) {
97   SysRes res;
98   res._isError = (cr0so & 1) != 0;
99   res._val     = val;
100   return res;
101}
102
103SysRes VG_(mk_SysRes_arm_linux) ( Int val ) {
104   SysRes res;
105   res._isError = val >= -4095 && val <= -1;
106   if (res._isError) {
107      res._val = (UInt)(-val);
108   } else {
109      res._val = (UInt)val;
110   }
111   return res;
112}
113
114/* Generic constructors. */
115SysRes VG_(mk_SysRes_Error) ( UWord err ) {
116   SysRes r;
117   r._isError = True;
118   r._val     = err;
119   return r;
120}
121
122SysRes VG_(mk_SysRes_Success) ( UWord res ) {
123   SysRes r;
124   r._isError = False;
125   r._val     = res;
126   return r;
127}
128
129
130#elif defined(VGO_aix5)
131
132/* AIX scheme: we have to record both 'res' (r3) and 'err' (r4).  If
133   'err' is nonzero then the call has failed, but it could still be
134   that AIX userspace will ignore 'err' and instead consult 'res' to
135   determine if the call failed.  So we have to record both. */
136SysRes VG_(mk_SysRes_ppc32_aix5) ( UInt res, UInt err ) {
137   SysRes r;
138   r.res     = res;
139   r.err     = err;
140   r.isError = r.err != 0;
141   return r;
142}
143
144SysRes VG_(mk_SysRes_ppc64_aix5) ( ULong res, ULong err ) {
145   SysRes r;
146   r.res     = res;
147   r.err     = err;
148   r.isError = r.err != 0;
149   return r;
150}
151
152/* Generic constructors. */
153SysRes VG_(mk_SysRes_Error) ( UWord err ) {
154   SysRes r;
155   r._res     = 0;
156   r._err     = err;
157   r._isError = True;
158   return r;
159}
160
161SysRes VG_(mk_SysRes_Success) ( UWord res ) {
162   SysRes r;
163   r._res     = res;
164   r._err     = 0;
165   r._isError = False;
166   return r;
167}
168
169
170#elif defined(VGO_darwin)
171
172/* Darwin: Some syscalls return a double-word result. */
173SysRes VG_(mk_SysRes_x86_darwin) ( UChar scclass, Bool isErr,
174                                   UInt wHI, UInt wLO )
175{
176   SysRes res;
177   res._wHI  = 0;
178   res._wLO  = 0;
179   res._mode = 0; /* invalid */
180   vg_assert(isErr == False || isErr == True);
181   vg_assert(sizeof(UWord) == sizeof(UInt));
182   switch (scclass) {
183      case VG_DARWIN_SYSCALL_CLASS_UNIX:
184         res._wLO  = wLO;
185         res._wHI  = wHI;
186         res._mode = isErr ? SysRes_UNIX_ERR : SysRes_UNIX_OK;
187         break;
188      case VG_DARWIN_SYSCALL_CLASS_MACH:
189         vg_assert(!isErr);
190         vg_assert(wHI == 0);
191         res._wLO  = wLO;
192         res._mode = SysRes_MACH;
193         break;
194      case VG_DARWIN_SYSCALL_CLASS_MDEP:
195         vg_assert(!isErr);
196         vg_assert(wHI == 0);
197         res._wLO  = wLO;
198         res._mode = SysRes_MDEP;
199         break;
200      default:
201         vg_assert(0);
202   }
203   return res;
204}
205
206SysRes VG_(mk_SysRes_amd64_darwin) ( UChar scclass, Bool isErr,
207                                     ULong wHI, ULong wLO )
208{
209   SysRes res;
210   res._wHI  = 0;
211   res._wLO  = 0;
212   res._mode = 0; /* invalid */
213   vg_assert(isErr == False || isErr == True);
214   vg_assert(sizeof(UWord) == sizeof(ULong));
215   switch (scclass) {
216      case VG_DARWIN_SYSCALL_CLASS_UNIX:
217         res._wLO  = wLO;
218         res._wHI  = wHI;
219         res._mode = isErr ? SysRes_UNIX_ERR : SysRes_UNIX_OK;
220         break;
221      case VG_DARWIN_SYSCALL_CLASS_MACH:
222         vg_assert(!isErr);
223         vg_assert(wHI == 0);
224         res._wLO  = wLO;
225         res._mode = SysRes_MACH;
226         break;
227      case VG_DARWIN_SYSCALL_CLASS_MDEP:
228         vg_assert(!isErr);
229         vg_assert(wHI == 0);
230         res._wLO  = wLO;
231         res._mode = SysRes_MDEP;
232         break;
233      default:
234         vg_assert(0);
235   }
236   return res;
237}
238
239/* Generic constructors.  We assume (without checking if this makes
240   any sense, from the caller's point of view) that these are for the
241   UNIX style of syscall. */
242SysRes VG_(mk_SysRes_Error) ( UWord err ) {
243   SysRes r;
244   r._wHI  = 0;
245   r._wLO  = err;
246   r._mode = SysRes_UNIX_ERR;
247   return r;
248}
249
250SysRes VG_(mk_SysRes_Success) ( UWord res ) {
251   SysRes r;
252   r._wHI  = 0;
253   r._wLO  = res;
254   r._mode = SysRes_UNIX_OK;
255   return r;
256}
257
258
259#else
260#  error "Unknown OS"
261#endif
262
263
264/* ---------------------------------------------------------------------
265   VG_(do_syscall): A function for doing syscalls.
266   ------------------------------------------------------------------ */
267
268#if defined(VGP_x86_linux)
269/* Incoming args (syscall number + up to 6 args) come on the stack.
270   (ie. the C calling convention).
271
272   The syscall number goes in %eax.  The args are passed to the syscall in
273   the regs %ebx, %ecx, %edx, %esi, %edi, %ebp, ie. the kernel's syscall
274   calling convention.
275
276   %eax gets the return value.  Not sure which registers the kernel
277   clobbers, so we preserve all the callee-save regs (%esi, %edi, %ebx,
278   %ebp).
279*/
280extern UWord do_syscall_WRK (
281          UWord syscall_no,
282          UWord a1, UWord a2, UWord a3,
283          UWord a4, UWord a5, UWord a6
284       );
285asm(
286".text\n"
287"do_syscall_WRK:\n"
288"	push	%esi\n"
289"	push	%edi\n"
290"	push	%ebx\n"
291"	push	%ebp\n"
292"	movl	16+ 4(%esp),%eax\n"
293"	movl	16+ 8(%esp),%ebx\n"
294"	movl	16+12(%esp),%ecx\n"
295"	movl	16+16(%esp),%edx\n"
296"	movl	16+20(%esp),%esi\n"
297"	movl	16+24(%esp),%edi\n"
298"	movl	16+28(%esp),%ebp\n"
299"	int	$0x80\n"
300"	popl	%ebp\n"
301"	popl	%ebx\n"
302"	popl	%edi\n"
303"	popl	%esi\n"
304"	ret\n"
305".previous\n"
306);
307
308#elif defined(VGP_amd64_linux)
309/* Incoming args (syscall number + up to 6 args) come in %rdi, %rsi,
310   %rdx, %rcx, %r8, %r9, and the last one on the stack (ie. the C
311   calling convention).
312
313   The syscall number goes in %rax.  The args are passed to the syscall in
314   the regs %rdi, %rsi, %rdx, %r10, %r8, %r9 (yes, really %r10, not %rcx),
315   ie. the kernel's syscall calling convention.
316
317   %rax gets the return value.  %rcx and %r11 are clobbered by the syscall;
318   no matter, they are caller-save (the syscall clobbers no callee-save
319   regs, so we don't have to do any register saving/restoring).
320*/
321extern UWord do_syscall_WRK (
322          UWord syscall_no,
323          UWord a1, UWord a2, UWord a3,
324          UWord a4, UWord a5, UWord a6
325       );
326asm(
327".text\n"
328"do_syscall_WRK:\n"
329        /* Convert function calling convention --> syscall calling
330           convention */
331"	movq	%rdi, %rax\n"
332"	movq	%rsi, %rdi\n"
333"	movq	%rdx, %rsi\n"
334"	movq	%rcx, %rdx\n"
335"	movq	%r8,  %r10\n"
336"	movq	%r9,  %r8\n"
337"	movq    8(%rsp), %r9\n"	 /* last arg from stack */
338"	syscall\n"
339"	ret\n"
340".previous\n"
341);
342
343#elif defined(VGP_ppc32_linux)
344/* Incoming args (syscall number + up to 6 args) come in %r3:%r9.
345
346   The syscall number goes in %r0.  The args are passed to the syscall in
347   the regs %r3:%r8, i.e. the kernel's syscall calling convention.
348
349   The %cr0.so bit flags an error.
350   We return the syscall return value in %r3, and the %cr0.so in
351   the lowest bit of %r4.
352   We return a ULong, of which %r3 is the high word, and %r4 the low.
353   No callee-save regs are clobbered, so no saving/restoring is needed.
354*/
355extern ULong do_syscall_WRK (
356          UWord syscall_no,
357          UWord a1, UWord a2, UWord a3,
358          UWord a4, UWord a5, UWord a6
359       );
360asm(
361".text\n"
362"do_syscall_WRK:\n"
363"        mr      0,3\n"
364"        mr      3,4\n"
365"        mr      4,5\n"
366"        mr      5,6\n"
367"        mr      6,7\n"
368"        mr      7,8\n"
369"        mr      8,9\n"
370"        sc\n"                  /* syscall: sets %cr0.so on error         */
371"        mfcr    4\n"           /* %cr -> low word of return var          */
372"        rlwinm  4,4,4,31,31\n" /* rotate flag bit so to lsb, and mask it */
373"        blr\n"                 /* and return                             */
374".previous\n"
375);
376
377#elif defined(VGP_ppc64_linux)
378/* Due to the need to return 65 bits of result, this is completely
379   different from the ppc32 case.  The single arg register points to a
380   7-word block containing the syscall # and the 6 args.  The syscall
381   result proper is put in [0] of the block, and %cr0.so is in the
382   bottom bit of [1]. */
383extern void do_syscall_WRK ( ULong* argblock );
384asm(
385".align   2\n"
386".globl   do_syscall_WRK\n"
387".section \".opd\",\"aw\"\n"
388".align   3\n"
389"do_syscall_WRK:\n"
390".quad    .do_syscall_WRK,.TOC.@tocbase,0\n"
391".previous\n"
392".type    .do_syscall_WRK,@function\n"
393".globl   .do_syscall_WRK\n"
394".do_syscall_WRK:\n"
395"        std  3,-16(1)\n"  /* stash arg */
396"        ld   8, 48(3)\n"  /* sc arg 6 */
397"        ld   7, 40(3)\n"  /* sc arg 5 */
398"        ld   6, 32(3)\n"  /* sc arg 4 */
399"        ld   5, 24(3)\n"  /* sc arg 3 */
400"        ld   4, 16(3)\n"  /* sc arg 2 */
401"        ld   0,  0(3)\n"  /* sc number */
402"        ld   3,  8(3)\n"  /* sc arg 1 */
403"        sc\n"             /* result in r3 and cr0.so */
404"        ld   5,-16(1)\n"  /* reacquire argblock ptr (r5 is caller-save) */
405"        std  3,0(5)\n"    /* argblock[0] = r3 */
406"        mfcr 3\n"
407"        srwi 3,3,28\n"
408"        andi. 3,3,1\n"
409"        std  3,8(5)\n"    /* argblock[1] = cr0.s0 & 1 */
410"        blr\n"
411);
412
413#elif defined(VGP_arm_linux)
414/* I think the conventions are:
415   args  in r0 r1 r2 r3 r4 r5
416   sysno in r7
417   return value in r0, w/ same conventions as x86-linux, viz r0 in
418   -4096 .. -1 is an error value.  All other values are success
419   values.
420*/
421extern UWord do_syscall_WRK (
422          UWord a1, UWord a2, UWord a3,
423          UWord a4, UWord a5, UWord a6,
424          UWord syscall_no
425       );
426asm(
427".text\n"
428"do_syscall_WRK:\n"
429"         push    {r4, r5, r7}\n"
430"         ldr     r4, [sp, #12]\n"
431"         ldr     r5, [sp, #16]\n"
432"         ldr     r7, [sp, #20]\n"
433"         svc     0x0\n"
434"         pop     {r4, r5, r7}\n"
435"         bx      lr\n"
436".previous\n"
437);
438
439#elif defined(VGP_ppc32_aix5)
440static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
441                             UWord sysno,
442                             UWord a1, UWord a2, UWord a3,
443                             UWord a4, UWord a5, UWord a6,
444                             UWord a7, UWord a8 )
445{
446   /* Syscalls on AIX are very similar to function calls:
447      - up to 8 args in r3-r10
448      - syscall number in r2
449      - kernel resumes at 'lr', so must set it appropriately beforehand
450      - r3 holds the result and r4 any applicable error code
451      See http://www.cs.utexas.edu/users/cart/publications/tr00-04.ps
452      and also 'man truss'.
453   */
454   /* For some reason gcc-3.3.2 doesn't preserve r31 across the asm
455      even though we state it to be trashed.  So use r27 instead. */
456   UWord args[9];
457   args[0] = sysno;
458   args[1] = a1; args[2] = a2;
459   args[3] = a3; args[4] = a4;
460   args[5] = a5; args[6] = a6;
461   args[7] = a7; args[8] = a8;
462
463   __asm__ __volatile__(
464
465      // establish base ptr
466      "mr   28,%0\n\t"
467
468      // save r2, lr
469      "mr   27,2\n\t" // save r2 in r27
470      "mflr 30\n\t"   // save lr in r30
471
472      // set syscall number and args
473      "lwz   2,  0(28)\n\t"
474      "lwz   3,  4(28)\n\t"
475      "lwz   4,  8(28)\n\t"
476      "lwz   5, 12(28)\n\t"
477      "lwz   6, 16(28)\n\t"
478      "lwz   7, 20(28)\n\t"
479      "lwz   8, 24(28)\n\t"
480      "lwz   9, 28(28)\n\t"
481      "lwz  10, 32(28)\n\t"
482
483      // set bit 3 of CR1 otherwise AIX 5.1 returns to the
484      // wrong address after the sc instruction
485      "crorc 6,6,6\n\t"
486
487      // set up LR to point just after the sc insn
488      ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
489      "mflr 29\n\t"
490      "addi 29,29,16\n\t"
491      "mtlr 29\n\t"
492
493      // do it!
494      "sc\n\t"
495
496      // result is now in r3; save it in args[0]
497      "stw  3,0(28)\n\t"
498      // error code in r4; save it in args[1]
499      "stw  4,4(28)\n\t"
500
501      // restore
502      "mr   2,27\n\t"
503      "mtlr 30\n\t"
504
505      : /*out*/
506      : /*in*/  "b" (&args[0])
507      : /*trash*/
508           /*temps*/    "r31","r30","r29","r28","r27",
509           /*args*/     "r3","r4","r5","r6","r7","r8","r9","r10",
510           /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
511                        "xer","ctr","cr0","cr1","cr2","cr3",
512                        "cr4","cr5","cr6","cr7"
513   );
514
515   *res_r3 = args[0];
516   *res_r4 = args[1];
517}
518
519#elif defined(VGP_ppc64_aix5)
520static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
521                             UWord sysno,
522                             UWord a1, UWord a2, UWord a3,
523                             UWord a4, UWord a5, UWord a6,
524                             UWord a7, UWord a8 )
525{
526   /* Same scheme as ppc32-aix5. */
527   UWord args[9];
528   args[0] = sysno;
529   args[1] = a1; args[2] = a2;
530   args[3] = a3; args[4] = a4;
531   args[5] = a5; args[6] = a6;
532   args[7] = a7; args[8] = a8;
533
534   __asm__ __volatile__(
535
536      // establish base ptr
537      "mr   28,%0\n\t"
538
539      // save r2, lr
540      "mr   27,2\n\t" // save r2 in r27
541      "mflr 30\n\t"   // save lr in r30
542
543      // set syscall number and args
544      "ld    2,  0(28)\n\t"
545      "ld    3,  8(28)\n\t"
546      "ld    4, 16(28)\n\t"
547      "ld    5, 24(28)\n\t"
548      "ld    6, 32(28)\n\t"
549      "ld    7, 40(28)\n\t"
550      "ld    8, 48(28)\n\t"
551      "ld    9, 56(28)\n\t"
552      "ld   10, 64(28)\n\t"
553
554      // set bit 3 of CR1 otherwise AIX 5.1 returns to the
555      // wrong address after the sc instruction
556      "crorc 6,6,6\n\t"
557
558      // set up LR to point just after the sc insn
559      ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
560      "mflr 29\n\t"
561      "addi 29,29,16\n\t"
562      "mtlr 29\n\t"
563
564      // do it!
565      "sc\n\t"
566
567      // result is now in r3; save it in args[0]
568      "std  3,0(28)\n\t"
569      // error code in r4; save it in args[1]
570      "std  4,8(28)\n\t"
571
572      // restore
573      "mr   2,27\n\t"
574      "mtlr 30\n\t"
575
576      : /*out*/
577      : /*in*/  "b" (&args[0])
578      : /*trash*/
579           /*temps*/    "r31","r30","r29","r28","r27",
580           /*args*/     "r3","r4","r5","r6","r7","r8","r9","r10",
581           /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
582                        "xer","ctr","cr0","cr1","cr2","cr3",
583                        "cr4","cr5","cr6","cr7"
584   );
585
586   *res_r3 = args[0];
587   *res_r4 = args[1];
588}
589
590#elif defined(VGP_x86_darwin)
591
592/* Incoming args (syscall number + up to 8 args) come in on the stack
593
594   The kernel's syscall calling convention is:
595   * the syscall number goes in eax
596   * the args are passed to the syscall on the stack,
597     pushed onto the stack R->L (that is, the usual x86
598     calling conventions, with the leftmost arg at the lowest
599     address)
600   Call instruction:
601   * UNIX: sysenter
602   * UNIX: int $0x80
603   * MACH: int $0x81
604   * MDEP: int $0x82
605   Note that the call type can be determined from the syscall number;
606   there is no need to inspect the actual instruction.  Although obviously
607   the instruction must match.
608   Return value:
609   * MACH,MDEP: the return value comes back in eax
610   * UNIX: the return value comes back in edx:eax (hi32:lo32)
611   Error:
612   * MACH,MDEP: no error is returned
613   * UNIX: the carry flag indicates success or failure
614
615   nb here, sizeof(UWord) == sizeof(UInt)
616*/
617
618__private_extern__ ULong
619do_syscall_unix_WRK ( UWord a1, UWord a2, UWord a3, /* 4(esp)..12(esp) */
620                      UWord a4, UWord a5, UWord a6, /* 16(esp)..24(esp) */
621                      UWord a7, UWord a8, /* 28(esp)..32(esp) */
622                      UWord syscall_no, /* 36(esp) */
623                      /*OUT*/UInt* errflag /* 40(esp) */ );
624// Unix syscall: 64-bit return in edx:eax, with LSB in eax
625// error indicated by carry flag: clear=good, set=bad
626asm(".private_extern _do_syscall_unix_WRK\n"
627    "_do_syscall_unix_WRK:\n"
628    "        movl    40(%esp), %ecx   \n"  /* assume syscall success */
629    "        movl    $0, (%ecx)       \n"
630    "        movl    36(%esp), %eax   \n"
631    "        int     $0x80            \n"
632    "        jnc     1f               \n"  /* jump if success */
633    "        movl    40(%esp), %ecx   \n"  /* syscall failed - set *errflag */
634    "        movl    $1, (%ecx)       \n"
635    "    1:  ret                      \n"
636    );
637
638__private_extern__ UInt
639do_syscall_mach_WRK ( UWord a1, UWord a2, UWord a3, /* 4(esp)..12(esp) */
640                      UWord a4, UWord a5, UWord a6, /* 16(esp)..24(esp) */
641                      UWord a7, UWord a8, /* 28(esp)..32(esp) */
642                      UWord syscall_no /* 36(esp) */ );
643// Mach trap: 32-bit result in %eax, no error flag
644asm(".private_extern _do_syscall_mach_WRK\n"
645    "_do_syscall_mach_WRK:\n"
646    "        movl    36(%esp), %eax   \n"
647    "        int     $0x81            \n"
648    "        ret                      \n"
649    );
650
651__private_extern__ UInt
652do_syscall_mdep_WRK ( UWord a1, UWord a2, UWord a3, /* 4(esp)..12(esp) */
653                      UWord a4, UWord a5, UWord a6, /* 16(esp)..24(esp) */
654                      UWord a7, UWord a8, /* 28(esp)..32(esp) */
655                      UWord syscall_no /* 36(esp) */ );
656// mdep trap: 32-bit result in %eax, no error flag
657asm(
658    ".private_extern _do_syscall_mdep_WRK\n"
659    "_do_syscall_mdep_WRK:\n"
660    "        movl    36(%esp), %eax   \n"
661    "        int     $0x82            \n"
662    "        ret                      \n"
663    );
664
665
666#elif defined(VGP_amd64_darwin)
667
668/* Incoming args (syscall number + up to 8 args) come in registers and stack
669
670   The kernel's syscall calling convention is:
671   * the syscall number goes in rax
672   * the args are passed to the syscall in registers and the stack
673   * the call instruction is 'syscall'
674   Return value:
675   * MACH,MDEP: the return value comes back in rax
676   * UNIX: the return value comes back in rdx:rax (hi64:lo64)
677   Error:
678   * MACH,MDEP: no error is returned
679   * UNIX: the carry flag indicates success or failure
680
681   nb here, sizeof(UWord) == sizeof(ULong)
682*/
683
684__private_extern__ UWord
685do_syscall_unix_WRK ( UWord a1, UWord a2, UWord a3, /* rdi, rsi, rdx */
686                      UWord a4, UWord a5, UWord a6, /* rcx, r8,  r9 */
687                      UWord a7, UWord a8,           /* 8(rsp), 16(rsp) */
688                      UWord syscall_no,             /* 24(rsp) */
689                      /*OUT*/ULong* errflag,        /* 32(rsp) */
690                      /*OUT*/ULong* res2 );         /* 40(rsp) */
691// Unix syscall: 128-bit return in rax:rdx, with LSB in rax
692// error indicated by carry flag: clear=good, set=bad
693asm(".private_extern _do_syscall_unix_WRK\n"
694    "_do_syscall_unix_WRK:\n"
695    "        movq    %rcx, %r10       \n"  /* pass rcx in r10 instead */
696    "        movq    32(%rsp), %rax   \n"  /* assume syscall success */
697    "        movq    $0, (%rax)       \n"
698    "        movq    24(%rsp), %rax   \n"  /* load syscall_no */
699    "        syscall                  \n"
700    "        jnc     1f               \n"  /* jump if success */
701    "        movq    32(%rsp), %rcx   \n"  /* syscall failed - set *errflag */
702    "        movq    $1, (%rcx)       \n"
703    "    1:  movq    40(%rsp), %rcx   \n"  /* save 2nd result word */
704    "        movq    %rdx, (%rcx)     \n"
705    "        retq                     \n"  /* return 1st result word */
706    );
707
708__private_extern__ UWord
709do_syscall_mach_WRK ( UWord a1, UWord a2, UWord a3, /* rdi, rsi, rdx */
710                      UWord a4, UWord a5, UWord a6, /* rcx, r8,  r9 */
711                      UWord a7, UWord a8,           /* 8(rsp), 16(rsp) */
712                      UWord syscall_no );           /* 24(rsp) */
713// Mach trap: 64-bit result, no error flag
714asm(".private_extern _do_syscall_mach_WRK\n"
715    "_do_syscall_mach_WRK:\n"
716    "        movq    %rcx, %r10       \n"  /* pass rcx in r10 instead */
717    "        movq    24(%rsp), %rax   \n"  /* load syscall_no */
718    "        syscall                  \n"
719    "        retq                     \n"
720    );
721
722#else
723#  error Unknown platform
724#endif
725
726
727/* Finally, the generic code.  This sends the call to the right
728   helper. */
729
730SysRes VG_(do_syscall) ( UWord sysno, UWord a1, UWord a2, UWord a3,
731                                      UWord a4, UWord a5, UWord a6,
732                                      UWord a7, UWord a8 )
733{
734#  if defined(VGP_x86_linux)
735   UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
736   return VG_(mk_SysRes_x86_linux)( val );
737
738#  elif defined(VGP_amd64_linux)
739   UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
740   return VG_(mk_SysRes_amd64_linux)( val );
741
742#  elif defined(VGP_ppc32_linux)
743   ULong ret     = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
744   UInt  val     = (UInt)(ret>>32);
745   UInt  cr0so   = (UInt)(ret);
746   return VG_(mk_SysRes_ppc32_linux)( val, cr0so );
747
748#  elif defined(VGP_ppc64_linux)
749   ULong argblock[7];
750   argblock[0] = sysno;
751   argblock[1] = a1;
752   argblock[2] = a2;
753   argblock[3] = a3;
754   argblock[4] = a4;
755   argblock[5] = a5;
756   argblock[6] = a6;
757   do_syscall_WRK( &argblock[0] );
758   return VG_(mk_SysRes_ppc64_linux)( argblock[0], argblock[1] );
759
760#  elif defined(VGP_arm_linux)
761   UWord val = do_syscall_WRK(a1,a2,a3,a4,a5,a6,sysno);
762   return VG_(mk_SysRes_arm_linux)( val );
763
764#  elif defined(VGP_ppc32_aix5)
765   UWord res;
766   UWord err;
767   do_syscall_WRK( &res, &err,
768		   sysno, a1, a2, a3, a4, a5, a6, a7, a8);
769   /* Try to set the error number to zero if the syscall hasn't
770      really failed. */
771   if (sysno == __NR_AIX5_kread
772       || sysno == __NR_AIX5_kwrite) {
773      if (res != (UWord)-1L)
774         err = 0;
775   }
776   else if (sysno == __NR_AIX5_sigprocmask
777            || sysno == __NR_AIX5__sigpending) {
778      if (res == 0)
779         err = 0;
780   }
781   return VG_(mk_SysRes_ppc32_aix5)( res, err );
782
783#  elif defined(VGP_ppc64_aix5)
784   UWord res;
785   UWord err;
786   do_syscall_WRK( &res, &err,
787		   sysno, a1, a2, a3, a4, a5, a6, a7, a8);
788   /* Try to set the error number to zero if the syscall hasn't
789      really failed. */
790   if (sysno == __NR_AIX5_kread
791       || sysno == __NR_AIX5_kwrite) {
792      if (res != (UWord)-1L)
793         err = 0;
794   }
795   else if (sysno == __NR_AIX5_sigprocmask
796            || sysno == __NR_AIX5__sigpending) {
797      if (res == 0)
798         err = 0;
799   }
800   return VG_(mk_SysRes_ppc64_aix5)( res, err );
801
802#  elif defined(VGP_x86_darwin)
803   UInt  wLO = 0, wHI = 0, err = 0;
804   ULong u64;
805   UChar scclass = VG_DARWIN_SYSNO_CLASS(sysno);
806   switch (scclass) {
807      case VG_DARWIN_SYSCALL_CLASS_UNIX:
808         u64 = do_syscall_unix_WRK(a1,a2,a3,a4,a5,a6,a7,a8,
809                                   VG_DARWIN_SYSNO_FOR_KERNEL(sysno), &err);
810         wLO = (UInt)u64;
811         wHI = (UInt)(u64 >> 32);
812         break;
813      case VG_DARWIN_SYSCALL_CLASS_MACH:
814         wLO = do_syscall_mach_WRK(a1,a2,a3,a4,a5,a6,a7,a8,
815                                   VG_DARWIN_SYSNO_FOR_KERNEL(sysno));
816         err = 0;
817         break;
818      case VG_DARWIN_SYSCALL_CLASS_MDEP:
819         wLO = do_syscall_mdep_WRK(a1,a2,a3,a4,a5,a6,a7,a8,
820                                   VG_DARWIN_SYSNO_FOR_KERNEL(sysno));
821         err = 0;
822         break;
823      default:
824         vg_assert(0);
825         break;
826   }
827   return VG_(mk_SysRes_x86_darwin)( scclass, err ? True : False, wHI, wLO );
828
829#  elif defined(VGP_amd64_darwin)
830   ULong wLO = 0, wHI = 0, err = 0;
831   UChar scclass = VG_DARWIN_SYSNO_CLASS(sysno);
832   switch (scclass) {
833      case VG_DARWIN_SYSCALL_CLASS_UNIX:
834         wLO = do_syscall_unix_WRK(a1,a2,a3,a4,a5,a6,a7,a8,
835                                   VG_DARWIN_SYSNO_FOR_KERNEL(sysno), &err, &wHI);
836         break;
837      case VG_DARWIN_SYSCALL_CLASS_MACH:
838      case VG_DARWIN_SYSCALL_CLASS_MDEP:
839         wLO = do_syscall_mach_WRK(a1,a2,a3,a4,a5,a6,a7,a8,
840                                   VG_DARWIN_SYSNO_FOR_KERNEL(sysno));
841         err = 0;
842         break;
843      default:
844         vg_assert(0);
845         break;
846   }
847   return VG_(mk_SysRes_amd64_darwin)( scclass, err ? True : False, wHI, wLO );
848
849#else
850#  error Unknown platform
851#endif
852}
853
854/* ---------------------------------------------------------------------
855   Names of errors.
856   ------------------------------------------------------------------ */
857
858/* Return a string which gives the name of an error value.  Note,
859   unlike the standard C syserror fn, the returned string is not
860   malloc-allocated or writable -- treat it as a constant.
861   TODO: implement this properly. */
862
863const HChar* VG_(strerror) ( UWord errnum )
864{
865   switch (errnum) {
866      case VKI_EPERM:       return "Operation not permitted";
867      case VKI_ENOENT:      return "No such file or directory";
868      case VKI_ESRCH:       return "No such process";
869      case VKI_EINTR:       return "Interrupted system call";
870      case VKI_EBADF:       return "Bad file number";
871      case VKI_EAGAIN:      return "Try again";
872      case VKI_ENOMEM:      return "Out of memory";
873      case VKI_EACCES:      return "Permission denied";
874      case VKI_EFAULT:      return "Bad address";
875      case VKI_EEXIST:      return "File exists";
876      case VKI_EINVAL:      return "Invalid argument";
877      case VKI_EMFILE:      return "Too many open files";
878      case VKI_ENOSYS:      return "Function not implemented";
879      case VKI_EOVERFLOW:   return "Value too large for defined data type";
880#     if defined(VKI_ERESTARTSYS)
881      case VKI_ERESTARTSYS: return "ERESTARTSYS";
882#     endif
883      default:              return "VG_(strerror): unknown error";
884   }
885}
886
887
888/*--------------------------------------------------------------------*/
889/*--- end                                                        ---*/
890/*--------------------------------------------------------------------*/
891