1
2/*--------------------------------------------------------------------*/
3/*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2013 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_x86_linux)
32
33/* TODO/FIXME jrs 20050207: assignments to the syscall return result
34   in interrupted_syscall() need to be reviewed.  They don't seem
35   to assign the shadow state.
36*/
37
38#include "pub_core_basics.h"
39#include "pub_core_vki.h"
40#include "pub_core_vkiscnums.h"
41#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
42#include "pub_core_threadstate.h"
43#include "pub_core_aspacemgr.h"
44#include "pub_core_debuglog.h"
45#include "pub_core_libcbase.h"
46#include "pub_core_libcassert.h"
47#include "pub_core_libcprint.h"
48#include "pub_core_libcproc.h"
49#include "pub_core_libcsignal.h"
50#include "pub_core_mallocfree.h"
51#include "pub_core_options.h"
52#include "pub_core_scheduler.h"
53#include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
54#include "pub_core_signals.h"
55#include "pub_core_syscall.h"
56#include "pub_core_syswrap.h"
57#include "pub_core_tooliface.h"
58#include "pub_core_stacks.h"        // VG_(register_stack)
59
60#include "priv_types_n_macros.h"
61#include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
62#include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
63#include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
64#include "priv_syswrap-main.h"
65
66
67/* ---------------------------------------------------------------------
68   clone() handling
69   ------------------------------------------------------------------ */
70
71/* Call f(arg1), but first switch stacks, using 'stack' as the new
72   stack, and use 'retaddr' as f's return-to address.  Also, clear all
73   the integer registers before entering f.*/
74__attribute__((noreturn))
75void ML_(call_on_new_stack_0_1) ( Addr stack,
76			          Addr retaddr,
77			          void (*f)(Word),
78                                  Word arg1 );
79//  4(%esp) == stack
80//  8(%esp) == retaddr
81// 12(%esp) == f
82// 16(%esp) == arg1
83asm(
84".text\n"
85".globl vgModuleLocal_call_on_new_stack_0_1\n"
86"vgModuleLocal_call_on_new_stack_0_1:\n"
87"   movl %esp, %esi\n"     // remember old stack pointer
88"   movl 4(%esi), %esp\n"  // set stack
89"   pushl 16(%esi)\n"      // arg1 to stack
90"   pushl  8(%esi)\n"      // retaddr to stack
91"   pushl 12(%esi)\n"      // f to stack
92"   movl $0, %eax\n"       // zero all GP regs
93"   movl $0, %ebx\n"
94"   movl $0, %ecx\n"
95"   movl $0, %edx\n"
96"   movl $0, %esi\n"
97"   movl $0, %edi\n"
98"   movl $0, %ebp\n"
99"   ret\n"                 // jump to f
100"   ud2\n"                 // should never get here
101".previous\n"
102);
103
104
105/*
106        Perform a clone system call.  clone is strange because it has
107        fork()-like return-twice semantics, so it needs special
108        handling here.
109
110        Upon entry, we have:
111
112            int (fn)(void*)     in  0+FSZ(%esp)
113            void* child_stack   in  4+FSZ(%esp)
114            int flags           in  8+FSZ(%esp)
115            void* arg           in 12+FSZ(%esp)
116            pid_t* child_tid    in 16+FSZ(%esp)
117            pid_t* parent_tid   in 20+FSZ(%esp)
118            void* tls_ptr       in 24+FSZ(%esp)
119
120        System call requires:
121
122            int    $__NR_clone  in %eax
123            int    flags        in %ebx
124            void*  child_stack  in %ecx
125            pid_t* parent_tid   in %edx
126            pid_t* child_tid    in %edi
127            void*  tls_ptr      in %esi
128
129	Returns an Int encoded in the linux-x86 way, not a SysRes.
130 */
131#define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
132#define __NR_CLONE        VG_STRINGIFY(__NR_clone)
133#define __NR_EXIT         VG_STRINGIFY(__NR_exit)
134
135extern
136Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
137                                 void* stack,
138                                 Int   flags,
139                                 void* arg,
140                                 Int*  child_tid,
141                                 Int*  parent_tid,
142                                 vki_modify_ldt_t * );
143asm(
144".text\n"
145".globl do_syscall_clone_x86_linux\n"
146"do_syscall_clone_x86_linux:\n"
147"        push    %ebx\n"
148"        push    %edi\n"
149"        push    %esi\n"
150
151         /* set up child stack with function and arg */
152"        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
153"        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
154"        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
155"        lea     -8(%ecx), %ecx\n"          /* make space on stack */
156"        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
157"        movl    %eax, 0(%ecx)\n"           /*   fn */
158
159         /* get other args to clone */
160"        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
161"        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
162"        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
163"        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
164"        movl    $"__NR_CLONE", %eax\n"
165"        int     $0x80\n"                   /* clone() */
166"        testl   %eax, %eax\n"              /* child if retval == 0 */
167"        jnz     1f\n"
168
169         /* CHILD - call thread function */
170"        popl    %eax\n"
171"        call    *%eax\n"                   /* call fn */
172
173         /* exit with result */
174"        movl    %eax, %ebx\n"              /* arg1: return value from fn */
175"        movl    $"__NR_EXIT", %eax\n"
176"        int     $0x80\n"
177
178         /* Hm, exit returned */
179"        ud2\n"
180
181"1:\n"   /* PARENT or ERROR */
182"        pop     %esi\n"
183"        pop     %edi\n"
184"        pop     %ebx\n"
185"        ret\n"
186".previous\n"
187);
188
189#undef FSZ
190#undef __NR_CLONE
191#undef __NR_EXIT
192
193
194// forward declarations
195static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
196static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
197
198/*
199   When a client clones, we need to keep track of the new thread.  This means:
200   1. allocate a ThreadId+ThreadState+stack for the the thread
201
202   2. initialize the thread's new VCPU state
203
204   3. create the thread using the same args as the client requested,
205   but using the scheduler entrypoint for EIP, and a separate stack
206   for ESP.
207 */
208static SysRes do_clone ( ThreadId ptid,
209                         UInt flags, Addr esp,
210                         Int* parent_tidptr,
211                         Int* child_tidptr,
212                         vki_modify_ldt_t *tlsinfo)
213{
214   static const Bool debug = False;
215
216   ThreadId     ctid = VG_(alloc_ThreadState)();
217   ThreadState* ptst = VG_(get_ThreadState)(ptid);
218   ThreadState* ctst = VG_(get_ThreadState)(ctid);
219   UWord*       stack;
220   NSegment const* seg;
221   SysRes       res;
222   Int          eax;
223   vki_sigset_t blockall, savedmask;
224
225   VG_(sigfillset)(&blockall);
226
227   vg_assert(VG_(is_running_thread)(ptid));
228   vg_assert(VG_(is_valid_tid)(ctid));
229
230   stack = (UWord*)ML_(allocstack)(ctid);
231   if (stack == NULL) {
232      res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
233      goto out;
234   }
235
236   /* Copy register state
237
238      Both parent and child return to the same place, and the code
239      following the clone syscall works out which is which, so we
240      don't need to worry about it.
241
242      The parent gets the child's new tid returned from clone, but the
243      child gets 0.
244
245      If the clone call specifies a NULL esp for the new thread, then
246      it actually gets a copy of the parent's esp.
247   */
248   /* Note: the clone call done by the Quadrics Elan3 driver specifies
249      clone flags of 0xF00, and it seems to rely on the assumption
250      that the child inherits a copy of the parent's GDT.
251      setup_child takes care of setting that up. */
252   setup_child( &ctst->arch, &ptst->arch, True );
253
254   /* Make sys_clone appear to have returned Success(0) in the
255      child. */
256   ctst->arch.vex.guest_EAX = 0;
257
258   if (esp != 0)
259      ctst->arch.vex.guest_ESP = esp;
260
261   ctst->os_state.parent = ptid;
262
263   /* inherit signal mask */
264   ctst->sig_mask     = ptst->sig_mask;
265   ctst->tmp_sig_mask = ptst->sig_mask;
266
267   /* Start the child with its threadgroup being the same as the
268      parent's.  This is so that any exit_group calls that happen
269      after the child is created but before it sets its
270      os_state.threadgroup field for real (in thread_wrapper in
271      syswrap-linux.c), really kill the new thread.  a.k.a this avoids
272      a race condition in which the thread is unkillable (via
273      exit_group) because its threadgroup is not set.  The race window
274      is probably only a few hundred or a few thousand cycles long.
275      See #226116. */
276   ctst->os_state.threadgroup = ptst->os_state.threadgroup;
277
278   /* We don't really know where the client stack is, because its
279      allocated by the client.  The best we can do is look at the
280      memory mappings and try to derive some useful information.  We
281      assume that esp starts near its highest possible value, and can
282      only go down to the start of the mmaped segment. */
283   seg = VG_(am_find_nsegment)((Addr)esp);
284   if (seg && seg->kind != SkResvn) {
285      ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(esp);
286      ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
287
288      VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
289
290      if (debug)
291	 VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
292		     ctid, seg->start, VG_PGROUNDUP(esp));
293   } else {
294      VG_(message)(Vg_UserMsg,
295                   "!? New thread %d starts with ESP(%#lx) unmapped\n",
296		   ctid, esp);
297      ctst->client_stack_szB  = 0;
298   }
299
300   /* Assume the clone will succeed, and tell any tool that wants to
301      know that this thread has come into existence.  We cannot defer
302      it beyond this point because sys_set_thread_area, just below,
303      causes tCheck to assert by making references to the new ThreadId
304      if we don't state the new thread exists prior to that point.
305      If the clone fails, we'll send out a ll_exit notification for it
306      at the out: label below, to clean up. */
307   vg_assert(VG_(owns_BigLock_LL)(ptid));
308   VG_TRACK ( pre_thread_ll_create, ptid, ctid );
309
310   if (flags & VKI_CLONE_SETTLS) {
311      if (debug)
312	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%d "
313                     "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
314		     tlsinfo, tlsinfo->entry_number,
315                     tlsinfo->base_addr, tlsinfo->limit,
316		     ptst->arch.vex.guest_ESP,
317		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
318      res = sys_set_thread_area(ctid, tlsinfo);
319      if (sr_isError(res))
320	 goto out;
321   }
322
323   flags &= ~VKI_CLONE_SETTLS;
324
325   /* start the thread with everything blocked */
326   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
327
328   /* Create the new thread */
329   eax = do_syscall_clone_x86_linux(
330            ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
331            child_tidptr, parent_tidptr, NULL
332         );
333   res = VG_(mk_SysRes_x86_linux)( eax );
334
335   VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
336
337  out:
338   if (sr_isError(res)) {
339      /* clone failed */
340      VG_(cleanup_thread)(&ctst->arch);
341      ctst->status = VgTs_Empty;
342      /* oops.  Better tell the tool the thread exited in a hurry :-) */
343      VG_TRACK( pre_thread_ll_exit, ctid );
344   }
345
346   return res;
347}
348
349
350/* ---------------------------------------------------------------------
351   LDT/GDT simulation
352   ------------------------------------------------------------------ */
353
354/* Details of the LDT simulation
355   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
356
357   When a program runs natively, the linux kernel allows each *thread*
358   in it to have its own LDT.  Almost all programs never do this --
359   it's wildly unportable, after all -- and so the kernel never
360   allocates the structure, which is just as well as an LDT occupies
361   64k of memory (8192 entries of size 8 bytes).
362
363   A thread may choose to modify its LDT entries, by doing the
364   __NR_modify_ldt syscall.  In such a situation the kernel will then
365   allocate an LDT structure for it.  Each LDT entry is basically a
366   (base, limit) pair.  A virtual address in a specific segment is
367   translated to a linear address by adding the segment's base value.
368   In addition, the virtual address must not exceed the limit value.
369
370   To use an LDT entry, a thread loads one of the segment registers
371   (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
372   .. 8191) it wants to use.  In fact, the required value is (index <<
373   3) + 7, but that's not important right now.  Any normal instruction
374   which includes an addressing mode can then be made relative to that
375   LDT entry by prefixing the insn with a so-called segment-override
376   prefix, a byte which indicates which of the 6 segment registers
377   holds the LDT index.
378
379   Now, a key constraint is that valgrind's address checks operate in
380   terms of linear addresses.  So we have to explicitly translate
381   virtual addrs into linear addrs, and that means doing a complete
382   LDT simulation.
383
384   Calls to modify_ldt are intercepted.  For each thread, we maintain
385   an LDT (with the same normally-never-allocated optimisation that
386   the kernel does).  This is updated as expected via calls to
387   modify_ldt.
388
389   When a thread does an amode calculation involving a segment
390   override prefix, the relevant LDT entry for the thread is
391   consulted.  It all works.
392
393   There is a conceptual problem, which appears when switching back to
394   native execution, either temporarily to pass syscalls to the
395   kernel, or permanently, when debugging V.  Problem at such points
396   is that it's pretty pointless to copy the simulated machine's
397   segment registers to the real machine, because we'd also need to
398   copy the simulated LDT into the real one, and that's prohibitively
399   expensive.
400
401   Fortunately it looks like no syscalls rely on the segment regs or
402   LDT being correct, so we can get away with it.  Apart from that the
403   simulation is pretty straightforward.  All 6 segment registers are
404   tracked, although only %ds, %es, %fs and %gs are allowed as
405   prefixes.  Perhaps it could be restricted even more than that -- I
406   am not sure what is and isn't allowed in user-mode.
407*/
408
409/* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
410   the Linux kernel's logic (cut-n-paste of code in
411   linux/kernel/ldt.c).  */
412
413static
414void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
415                              /* OUT */ VexGuestX86SegDescr* out,
416                                        Int oldmode )
417{
418   UInt entry_1, entry_2;
419   vg_assert(8 == sizeof(VexGuestX86SegDescr));
420
421   if (0)
422      VG_(printf)("translate_to_hw_format: base %#lx, limit %d\n",
423                  inn->base_addr, inn->limit );
424
425   /* Allow LDTs to be cleared by the user. */
426   if (inn->base_addr == 0 && inn->limit == 0) {
427      if (oldmode ||
428          (inn->contents == 0      &&
429           inn->read_exec_only == 1   &&
430           inn->seg_32bit == 0      &&
431           inn->limit_in_pages == 0   &&
432           inn->seg_not_present == 1   &&
433           inn->useable == 0 )) {
434         entry_1 = 0;
435         entry_2 = 0;
436         goto install;
437      }
438   }
439
440   entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
441             (inn->limit & 0x0ffff);
442   entry_2 = (inn->base_addr & 0xff000000) |
443             ((inn->base_addr & 0x00ff0000) >> 16) |
444             (inn->limit & 0xf0000) |
445             ((inn->read_exec_only ^ 1) << 9) |
446             (inn->contents << 10) |
447             ((inn->seg_not_present ^ 1) << 15) |
448             (inn->seg_32bit << 22) |
449             (inn->limit_in_pages << 23) |
450             0x7000;
451   if (!oldmode)
452      entry_2 |= (inn->useable << 20);
453
454   /* Install the new entry ...  */
455  install:
456   out->LdtEnt.Words.word1 = entry_1;
457   out->LdtEnt.Words.word2 = entry_2;
458}
459
460/* Create a zeroed-out GDT. */
461static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
462{
463   Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
464   return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxG.1", nbytes, 1);
465}
466
467/* Create a zeroed-out LDT. */
468static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
469{
470   Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
471   return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxL.1", nbytes, 1);
472}
473
474/* Free up an LDT or GDT allocated by the above fns. */
475static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
476{
477   vg_assert(dt);
478   VG_(arena_free)(VG_AR_CORE, (void*)dt);
479}
480
481/* Copy contents between two existing LDTs. */
482static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
483                               VexGuestX86SegDescr* dst )
484{
485   Int i;
486   vg_assert(src);
487   vg_assert(dst);
488   for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
489      dst[i] = src[i];
490}
491
492/* Copy contents between two existing GDTs. */
493static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
494                               VexGuestX86SegDescr* dst )
495{
496   Int i;
497   vg_assert(src);
498   vg_assert(dst);
499   for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
500      dst[i] = src[i];
501}
502
503/* Free this thread's DTs, if it has any. */
504static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
505{
506   vg_assert(sizeof(HWord) == sizeof(void*));
507
508   if (0)
509      VG_(printf)("deallocate_LGDTs_for_thread: "
510                  "ldt = 0x%lx, gdt = 0x%lx\n",
511                  vex->guest_LDT, vex->guest_GDT );
512
513   if (vex->guest_LDT != (HWord)NULL) {
514      free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
515      vex->guest_LDT = (HWord)NULL;
516   }
517
518   if (vex->guest_GDT != (HWord)NULL) {
519      free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
520      vex->guest_GDT = (HWord)NULL;
521   }
522}
523
524
525/*
526 * linux/kernel/ldt.c
527 *
528 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
529 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
530 */
531
532/*
533 * read_ldt() is not really atomic - this is not a problem since
534 * synchronization of reads and writes done to the LDT has to be
535 * assured by user-space anyway. Writes are atomic, to protect
536 * the security checks done on new descriptors.
537 */
538static
539SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
540{
541   SysRes res;
542   UInt   i, size;
543   UChar* ldt;
544
545   if (0)
546      VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
547                  tid, ptr, bytecount );
548
549   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
550   vg_assert(8 == sizeof(VexGuestX86SegDescr));
551
552   ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
553   res = VG_(mk_SysRes_Success)( 0 );
554   if (ldt == NULL)
555      /* LDT not allocated, meaning all entries are null */
556      goto out;
557
558   size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
559   if (size > bytecount)
560      size = bytecount;
561
562   res = VG_(mk_SysRes_Success)( size );
563   for (i = 0; i < size; i++)
564      ptr[i] = ldt[i];
565
566  out:
567   return res;
568}
569
570
571static
572SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
573{
574   SysRes res;
575   VexGuestX86SegDescr* ldt;
576   vki_modify_ldt_t* ldt_info;
577
578   if (0)
579      VG_(printf)("write_ldt: tid = %d, ptr = %p, "
580                  "bytecount = %d, oldmode = %d\n",
581                  tid, ptr, bytecount, oldmode );
582
583   vg_assert(8 == sizeof(VexGuestX86SegDescr));
584   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
585
586   ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
587   ldt_info = (vki_modify_ldt_t*)ptr;
588
589   res = VG_(mk_SysRes_Error)( VKI_EINVAL );
590   if (bytecount != sizeof(vki_modify_ldt_t))
591      goto out;
592
593   res = VG_(mk_SysRes_Error)( VKI_EINVAL );
594   if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
595      goto out;
596   if (ldt_info->contents == 3) {
597      if (oldmode)
598         goto out;
599      if (ldt_info->seg_not_present == 0)
600         goto out;
601   }
602
603   /* If this thread doesn't have an LDT, we'd better allocate it
604      now. */
605   if (ldt == NULL) {
606      ldt = alloc_zeroed_x86_LDT();
607      VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
608   }
609
610   /* Install the new entry ...  */
611   translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
612   res = VG_(mk_SysRes_Success)( 0 );
613
614  out:
615   return res;
616}
617
618
619static SysRes sys_modify_ldt ( ThreadId tid,
620                               Int func, void* ptr, UInt bytecount )
621{
622   SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
623
624   switch (func) {
625   case 0:
626      ret = read_ldt(tid, ptr, bytecount);
627      break;
628   case 1:
629      ret = write_ldt(tid, ptr, bytecount, 1);
630      break;
631   case 2:
632      VG_(unimplemented)("sys_modify_ldt: func == 2");
633      /* god knows what this is about */
634      /* ret = read_default_ldt(ptr, bytecount); */
635      /*UNREACHED*/
636      break;
637   case 0x11:
638      ret = write_ldt(tid, ptr, bytecount, 0);
639      break;
640   }
641   return ret;
642}
643
644
645static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
646{
647   Int                  idx;
648   VexGuestX86SegDescr* gdt;
649
650   vg_assert(8 == sizeof(VexGuestX86SegDescr));
651   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
652
653   if (info == NULL)
654      return VG_(mk_SysRes_Error)( VKI_EFAULT );
655
656   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
657
658   /* If the thread doesn't have a GDT, allocate it now. */
659   if (!gdt) {
660      gdt = alloc_zeroed_x86_GDT();
661      VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
662   }
663
664   idx = info->entry_number;
665
666   if (idx == -1) {
667      /* Find and use the first free entry.  Don't allocate entry
668         zero, because the hardware will never do that, and apparently
669         doing so confuses some code (perhaps stuff running on
670         Wine). */
671      for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
672         if (gdt[idx].LdtEnt.Words.word1 == 0
673             && gdt[idx].LdtEnt.Words.word2 == 0)
674            break;
675      }
676
677      if (idx == VEX_GUEST_X86_GDT_NENT)
678         return VG_(mk_SysRes_Error)( VKI_ESRCH );
679   } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
680      /* Similarly, reject attempts to use GDT[0]. */
681      return VG_(mk_SysRes_Error)( VKI_EINVAL );
682   }
683
684   translate_to_hw_format(info, &gdt[idx], 0);
685
686   VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
687             "set_thread_area(info->entry)",
688             (Addr) & info->entry_number, sizeof(unsigned int) );
689   info->entry_number = idx;
690   VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
691             (Addr) & info->entry_number, sizeof(unsigned int) );
692
693   return VG_(mk_SysRes_Success)( 0 );
694}
695
696
697static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
698{
699   Int idx;
700   VexGuestX86SegDescr* gdt;
701
702   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
703   vg_assert(8 == sizeof(VexGuestX86SegDescr));
704
705   if (info == NULL)
706      return VG_(mk_SysRes_Error)( VKI_EFAULT );
707
708   idx = info->entry_number;
709
710   if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
711      return VG_(mk_SysRes_Error)( VKI_EINVAL );
712
713   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
714
715   /* If the thread doesn't have a GDT, allocate it now. */
716   if (!gdt) {
717      gdt = alloc_zeroed_x86_GDT();
718      VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
719   }
720
721   info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
722                     ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
723                     gdt[idx].LdtEnt.Bits.BaseLow;
724   info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
725                   gdt[idx].LdtEnt.Bits.LimitLow;
726   info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
727   info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
728   info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
729   info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
730   info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
731   info->useable = gdt[idx].LdtEnt.Bits.Sys;
732   info->reserved = 0;
733
734   return VG_(mk_SysRes_Success)( 0 );
735}
736
737/* ---------------------------------------------------------------------
738   More thread stuff
739   ------------------------------------------------------------------ */
740
741void VG_(cleanup_thread) ( ThreadArchState* arch )
742{
743   /* Release arch-specific resources held by this thread. */
744   /* On x86, we have to dump the LDT and GDT. */
745   deallocate_LGDTs_for_thread( &arch->vex );
746}
747
748
749static void setup_child ( /*OUT*/ ThreadArchState *child,
750                          /*IN*/  ThreadArchState *parent,
751                          Bool inherit_parents_GDT )
752{
753   /* We inherit our parent's guest state. */
754   child->vex = parent->vex;
755   child->vex_shadow1 = parent->vex_shadow1;
756   child->vex_shadow2 = parent->vex_shadow2;
757
758   /* We inherit our parent's LDT. */
759   if (parent->vex.guest_LDT == (HWord)NULL) {
760      /* We hope this is the common case. */
761      child->vex.guest_LDT = (HWord)NULL;
762   } else {
763      /* No luck .. we have to take a copy of the parent's. */
764      child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
765      copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
766                        (VexGuestX86SegDescr*)child->vex.guest_LDT );
767   }
768
769   /* Either we start with an empty GDT (the usual case) or inherit a
770      copy of our parents' one (Quadrics Elan3 driver -style clone
771      only). */
772   child->vex.guest_GDT = (HWord)NULL;
773
774   if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
775      child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
776      copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
777                        (VexGuestX86SegDescr*)child->vex.guest_GDT );
778   }
779}
780
781
782/* ---------------------------------------------------------------------
783   PRE/POST wrappers for x86/Linux-specific syscalls
784   ------------------------------------------------------------------ */
785
786#define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
787#define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
788
789/* Add prototypes for the wrappers declared here, so that gcc doesn't
790   harass us for not having prototypes.  Really this is a kludge --
791   the right thing to do is to make these wrappers 'static' since they
792   aren't visible outside this file, but that requires even more macro
793   magic. */
794DECL_TEMPLATE(x86_linux, sys_stat64);
795DECL_TEMPLATE(x86_linux, sys_fstatat64);
796DECL_TEMPLATE(x86_linux, sys_fstat64);
797DECL_TEMPLATE(x86_linux, sys_lstat64);
798DECL_TEMPLATE(x86_linux, sys_clone);
799DECL_TEMPLATE(x86_linux, old_mmap);
800DECL_TEMPLATE(x86_linux, sys_mmap2);
801DECL_TEMPLATE(x86_linux, sys_sigreturn);
802DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
803DECL_TEMPLATE(x86_linux, sys_modify_ldt);
804DECL_TEMPLATE(x86_linux, sys_set_thread_area);
805DECL_TEMPLATE(x86_linux, sys_get_thread_area);
806DECL_TEMPLATE(x86_linux, sys_ptrace);
807DECL_TEMPLATE(x86_linux, sys_sigsuspend);
808DECL_TEMPLATE(x86_linux, old_select);
809DECL_TEMPLATE(x86_linux, sys_vm86old);
810DECL_TEMPLATE(x86_linux, sys_vm86);
811DECL_TEMPLATE(x86_linux, sys_syscall223);
812
813PRE(old_select)
814{
815   /* struct sel_arg_struct {
816      unsigned long n;
817      fd_set *inp, *outp, *exp;
818      struct timeval *tvp;
819      };
820   */
821   PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
822   PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
823   *flags |= SfMayBlock;
824   {
825      UInt* arg_struct = (UInt*)ARG1;
826      UInt a1, a2, a3, a4, a5;
827
828      a1 = arg_struct[0];
829      a2 = arg_struct[1];
830      a3 = arg_struct[2];
831      a4 = arg_struct[3];
832      a5 = arg_struct[4];
833
834      PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", a1,a2,a3,a4,a5);
835      if (a2 != (Addr)NULL)
836         PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
837      if (a3 != (Addr)NULL)
838         PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
839      if (a4 != (Addr)NULL)
840         PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
841      if (a5 != (Addr)NULL)
842         PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
843   }
844}
845
846PRE(sys_clone)
847{
848   UInt cloneflags;
849   Bool badarg = False;
850
851   PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
852   PRE_REG_READ2(int, "clone",
853                 unsigned long, flags,
854                 void *, child_stack);
855
856   if (ARG1 & VKI_CLONE_PARENT_SETTID) {
857      if (VG_(tdict).track_pre_reg_read) {
858         PRA3("clone", int *, parent_tidptr);
859      }
860      PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
861      if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
862                                             VKI_PROT_WRITE)) {
863         badarg = True;
864      }
865   }
866   if (ARG1 & VKI_CLONE_SETTLS) {
867      if (VG_(tdict).track_pre_reg_read) {
868         PRA4("clone", vki_modify_ldt_t *, tlsinfo);
869      }
870      PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
871      if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
872                                             VKI_PROT_READ)) {
873         badarg = True;
874      }
875   }
876   if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
877      if (VG_(tdict).track_pre_reg_read) {
878         PRA5("clone", int *, child_tidptr);
879      }
880      PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
881      if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
882                                             VKI_PROT_WRITE)) {
883         badarg = True;
884      }
885   }
886
887   if (badarg) {
888      SET_STATUS_Failure( VKI_EFAULT );
889      return;
890   }
891
892   cloneflags = ARG1;
893
894   if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
895      SET_STATUS_Failure( VKI_EINVAL );
896      return;
897   }
898
899   /* Be ultra-paranoid and filter out any clone-variants we don't understand:
900      - ??? specifies clone flags of 0x100011
901      - ??? specifies clone flags of 0x1200011.
902      - NPTL specifies clone flags of 0x7D0F00.
903      - The Quadrics Elan3 driver specifies clone flags of 0xF00.
904      - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
905      Everything else is rejected.
906   */
907   if (
908        1 ||
909        /* 11 Nov 05: for the time being, disable this ultra-paranoia.
910           The switch below probably does a good enough job. */
911          (cloneflags == 0x100011 || cloneflags == 0x1200011
912                                  || cloneflags == 0x7D0F00
913                                  || cloneflags == 0x790F00
914                                  || cloneflags == 0x3D0F00
915                                  || cloneflags == 0x410F00
916                                  || cloneflags == 0xF00
917                                  || cloneflags == 0xF21)) {
918     /* OK */
919   }
920   else {
921      /* Nah.  We don't like it.  Go away. */
922      goto reject;
923   }
924
925   /* Only look at the flags we really care about */
926   switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
927                         | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
928   case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
929      /* thread creation */
930      SET_STATUS_from_SysRes(
931         do_clone(tid,
932                  ARG1,         /* flags */
933                  (Addr)ARG2,   /* child ESP */
934                  (Int *)ARG3,  /* parent_tidptr */
935                  (Int *)ARG5,  /* child_tidptr */
936                  (vki_modify_ldt_t *)ARG4)); /* set_tls */
937      break;
938
939   case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
940      /* FALLTHROUGH - assume vfork == fork */
941      cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
942
943   case 0: /* plain fork */
944      SET_STATUS_from_SysRes(
945         ML_(do_fork_clone)(tid,
946                       cloneflags,      /* flags */
947                       (Int *)ARG3,     /* parent_tidptr */
948                       (Int *)ARG5));   /* child_tidptr */
949      break;
950
951   default:
952   reject:
953      /* should we just ENOSYS? */
954      VG_(message)(Vg_UserMsg, "\n");
955      VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
956      VG_(message)(Vg_UserMsg, "\n");
957      VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
958      VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
959      VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
960      VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
961      VG_(unimplemented)
962         ("Valgrind does not support general clone().");
963   }
964
965   if (SUCCESS) {
966      if (ARG1 & VKI_CLONE_PARENT_SETTID)
967         POST_MEM_WRITE(ARG3, sizeof(Int));
968      if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
969         POST_MEM_WRITE(ARG5, sizeof(Int));
970
971      /* Thread creation was successful; let the child have the chance
972         to run */
973      *flags |= SfYieldAfter;
974   }
975}
976
977PRE(sys_sigreturn)
978{
979   /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
980      an explanation of what follows. */
981
982   ThreadState* tst;
983   PRINT("sys_sigreturn ( )");
984
985   vg_assert(VG_(is_valid_tid)(tid));
986   vg_assert(tid >= 1 && tid < VG_N_THREADS);
987   vg_assert(VG_(is_running_thread)(tid));
988
989   /* Adjust esp to point to start of frame; skip back up over
990      sigreturn sequence's "popl %eax" and handler ret addr */
991   tst = VG_(get_ThreadState)(tid);
992   tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
993   /* XXX why does ESP change differ from rt_sigreturn case below? */
994
995   /* This is only so that the EIP is (might be) useful to report if
996      something goes wrong in the sigreturn */
997   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
998
999   /* Restore register state from frame and remove it */
1000   VG_(sigframe_destroy)(tid, False);
1001
1002   /* Tell the driver not to update the guest state with the "result",
1003      and set a bogus result to keep it happy. */
1004   *flags |= SfNoWriteResult;
1005   SET_STATUS_Success(0);
1006
1007   /* Check to see if any signals arose as a result of this. */
1008   *flags |= SfPollAfter;
1009}
1010
1011PRE(sys_rt_sigreturn)
1012{
1013   /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
1014      an explanation of what follows. */
1015
1016   ThreadState* tst;
1017   PRINT("sys_rt_sigreturn ( )");
1018
1019   vg_assert(VG_(is_valid_tid)(tid));
1020   vg_assert(tid >= 1 && tid < VG_N_THREADS);
1021   vg_assert(VG_(is_running_thread)(tid));
1022
1023   /* Adjust esp to point to start of frame; skip back up over handler
1024      ret addr */
1025   tst = VG_(get_ThreadState)(tid);
1026   tst->arch.vex.guest_ESP -= sizeof(Addr);
1027   /* XXX why does ESP change differ from sigreturn case above? */
1028
1029   /* This is only so that the EIP is (might be) useful to report if
1030      something goes wrong in the sigreturn */
1031   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1032
1033   /* Restore register state from frame and remove it */
1034   VG_(sigframe_destroy)(tid, True);
1035
1036   /* Tell the driver not to update the guest state with the "result",
1037      and set a bogus result to keep it happy. */
1038   *flags |= SfNoWriteResult;
1039   SET_STATUS_Success(0);
1040
1041   /* Check to see if any signals arose as a result of this. */
1042   *flags |= SfPollAfter;
1043}
1044
1045PRE(sys_modify_ldt)
1046{
1047   PRINT("sys_modify_ldt ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
1048   PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1049                 unsigned long, bytecount);
1050
1051   if (ARG1 == 0) {
1052      /* read the LDT into ptr */
1053      PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1054   }
1055   if (ARG1 == 1 || ARG1 == 0x11) {
1056      /* write the LDT with the entry pointed at by ptr */
1057      PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1058   }
1059   /* "do" the syscall ourselves; the kernel never sees it */
1060   SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1061
1062   if (ARG1 == 0 && SUCCESS && RES > 0) {
1063      POST_MEM_WRITE( ARG2, RES );
1064   }
1065}
1066
1067PRE(sys_set_thread_area)
1068{
1069   PRINT("sys_set_thread_area ( %#lx )", ARG1);
1070   PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1071   PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1072
1073   /* "do" the syscall ourselves; the kernel never sees it */
1074   SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1075}
1076
1077PRE(sys_get_thread_area)
1078{
1079   PRINT("sys_get_thread_area ( %#lx )", ARG1);
1080   PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1081   PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1082
1083   /* "do" the syscall ourselves; the kernel never sees it */
1084   SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1085
1086   if (SUCCESS) {
1087      POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1088   }
1089}
1090
1091// Parts of this are x86-specific, but the *PEEK* cases are generic.
1092//
1093// ARG3 is only used for pointers into the traced process's address
1094// space and for offsets into the traced process's struct
1095// user_regs_struct. It is never a pointer into this process's memory
1096// space, and we should therefore not check anything it points to.
1097PRE(sys_ptrace)
1098{
1099   PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
1100   PRE_REG_READ4(int, "ptrace",
1101                 long, request, long, pid, long, addr, long, data);
1102   switch (ARG1) {
1103   case VKI_PTRACE_PEEKTEXT:
1104   case VKI_PTRACE_PEEKDATA:
1105   case VKI_PTRACE_PEEKUSR:
1106      PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1107		     sizeof (long));
1108      break;
1109   case VKI_PTRACE_GETREGS:
1110      PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1111		     sizeof (struct vki_user_regs_struct));
1112      break;
1113   case VKI_PTRACE_GETFPREGS:
1114      PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1115		     sizeof (struct vki_user_i387_struct));
1116      break;
1117   case VKI_PTRACE_GETFPXREGS:
1118      PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1119                     sizeof(struct vki_user_fxsr_struct) );
1120      break;
1121   case VKI_PTRACE_GET_THREAD_AREA:
1122      PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
1123                     sizeof(struct vki_user_desc) );
1124      break;
1125   case VKI_PTRACE_SETREGS:
1126      PRE_MEM_READ( "ptrace(setregs)", ARG4,
1127		     sizeof (struct vki_user_regs_struct));
1128      break;
1129   case VKI_PTRACE_SETFPREGS:
1130      PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1131		     sizeof (struct vki_user_i387_struct));
1132      break;
1133   case VKI_PTRACE_SETFPXREGS:
1134      PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1135                     sizeof(struct vki_user_fxsr_struct) );
1136      break;
1137   case VKI_PTRACE_SET_THREAD_AREA:
1138      PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
1139                     sizeof(struct vki_user_desc) );
1140      break;
1141   case VKI_PTRACE_GETEVENTMSG:
1142      PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1143      break;
1144   case VKI_PTRACE_GETSIGINFO:
1145      PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1146      break;
1147   case VKI_PTRACE_SETSIGINFO:
1148      PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1149      break;
1150   case VKI_PTRACE_GETREGSET:
1151      ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
1152      break;
1153   case VKI_PTRACE_SETREGSET:
1154      ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
1155      break;
1156   default:
1157      break;
1158   }
1159}
1160
1161POST(sys_ptrace)
1162{
1163   switch (ARG1) {
1164   case VKI_PTRACE_PEEKTEXT:
1165   case VKI_PTRACE_PEEKDATA:
1166   case VKI_PTRACE_PEEKUSR:
1167      POST_MEM_WRITE( ARG4, sizeof (long));
1168      break;
1169   case VKI_PTRACE_GETREGS:
1170      POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1171      break;
1172   case VKI_PTRACE_GETFPREGS:
1173      POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1174      break;
1175   case VKI_PTRACE_GETFPXREGS:
1176      POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1177      break;
1178   case VKI_PTRACE_GET_THREAD_AREA:
1179      POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
1180      break;
1181   case VKI_PTRACE_GETEVENTMSG:
1182      POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1183      break;
1184   case VKI_PTRACE_GETSIGINFO:
1185      /* XXX: This is a simplification. Different parts of the
1186       * siginfo_t are valid depending on the type of signal.
1187       */
1188      POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1189      break;
1190   case VKI_PTRACE_GETREGSET:
1191      ML_(linux_POST_getregset)(tid, ARG3, ARG4);
1192      break;
1193   default:
1194      break;
1195   }
1196}
1197
1198PRE(old_mmap)
1199{
1200   /* struct mmap_arg_struct {
1201         unsigned long addr;
1202         unsigned long len;
1203         unsigned long prot;
1204         unsigned long flags;
1205         unsigned long fd;
1206         unsigned long offset;
1207   }; */
1208   UWord a1, a2, a3, a4, a5, a6;
1209   SysRes r;
1210
1211   UWord* args = (UWord*)ARG1;
1212   PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1213   PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1214
1215   a1 = args[1-1];
1216   a2 = args[2-1];
1217   a3 = args[3-1];
1218   a4 = args[4-1];
1219   a5 = args[5-1];
1220   a6 = args[6-1];
1221
1222   PRINT("old_mmap ( %#lx, %llu, %ld, %ld, %ld, %ld )",
1223         a1, (ULong)a2, a3, a4, a5, a6 );
1224
1225   r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1226   SET_STATUS_from_SysRes(r);
1227}
1228
1229PRE(sys_mmap2)
1230{
1231   SysRes r;
1232
1233   // Exactly like old_mmap() except:
1234   //  - all 6 args are passed in regs, rather than in a memory-block.
1235   //  - the file offset is specified in pagesize units rather than bytes,
1236   //    so that it can be used for files bigger than 2^32 bytes.
1237   // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
1238   // 4K-sized.  Assert that the page size is 4K here for safety.
1239   vg_assert(VKI_PAGE_SIZE == 4096);
1240   PRINT("sys_mmap2 ( %#lx, %llu, %ld, %ld, %ld, %ld )",
1241         ARG1, (ULong)ARG2, ARG3, ARG4, ARG5, ARG6 );
1242   PRE_REG_READ6(long, "mmap2",
1243                 unsigned long, start, unsigned long, length,
1244                 unsigned long, prot,  unsigned long, flags,
1245                 unsigned long, fd,    unsigned long, offset);
1246
1247   r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1248                                       4096 * (Off64T)ARG6 );
1249   SET_STATUS_from_SysRes(r);
1250}
1251
1252// XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1253// applicable to every architecture -- I think only to 32-bit archs.
1254// We're going to need something like linux/core_os32.h for such
1255// things, eventually, I think.  --njn
1256PRE(sys_lstat64)
1257{
1258   PRINT("sys_lstat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
1259   PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1260   PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1261   PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1262}
1263
1264POST(sys_lstat64)
1265{
1266   vg_assert(SUCCESS);
1267   if (RES == 0) {
1268      POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1269   }
1270}
1271
1272PRE(sys_stat64)
1273{
1274   FUSE_COMPATIBLE_MAY_BLOCK();
1275   PRINT("sys_stat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
1276   PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1277   PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1278   PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1279}
1280
1281POST(sys_stat64)
1282{
1283   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1284}
1285
1286PRE(sys_fstatat64)
1287{
1288   FUSE_COMPATIBLE_MAY_BLOCK();
1289   PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx )",ARG1,ARG2,(char*)ARG2,ARG3);
1290   PRE_REG_READ3(long, "fstatat64",
1291                 int, dfd, char *, file_name, struct stat64 *, buf);
1292   PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1293   PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1294}
1295
1296POST(sys_fstatat64)
1297{
1298   POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1299}
1300
1301PRE(sys_fstat64)
1302{
1303   PRINT("sys_fstat64 ( %ld, %#lx )",ARG1,ARG2);
1304   PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1305   PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1306}
1307
1308POST(sys_fstat64)
1309{
1310   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1311}
1312
1313/* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
1314   identical version. */
1315PRE(sys_sigsuspend)
1316{
1317   /* The C library interface to sigsuspend just takes a pointer to
1318      a signal mask but this system call has three arguments - the first
1319      two don't appear to be used by the kernel and are always passed as
1320      zero by glibc and the third is the first word of the signal mask
1321      so only 32 signals are supported.
1322
1323      In fact glibc normally uses rt_sigsuspend if it is available as
1324      that takes a pointer to the signal mask so supports more signals.
1325    */
1326   *flags |= SfMayBlock;
1327   PRINT("sys_sigsuspend ( %ld, %ld, %ld )", ARG1,ARG2,ARG3 );
1328   PRE_REG_READ3(int, "sigsuspend",
1329                 int, history0, int, history1,
1330                 vki_old_sigset_t, mask);
1331}
1332
1333PRE(sys_vm86old)
1334{
1335   PRINT("sys_vm86old ( %#lx )", ARG1);
1336   PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1337   PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1338}
1339
1340POST(sys_vm86old)
1341{
1342   POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1343}
1344
1345PRE(sys_vm86)
1346{
1347   PRINT("sys_vm86 ( %ld, %#lx )", ARG1,ARG2);
1348   PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1349   if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1350      PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1351}
1352
1353POST(sys_vm86)
1354{
1355   if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1356      POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1357}
1358
1359
1360/* ---------------------------------------------------------------
1361   PRE/POST wrappers for x86/Linux-variant specific syscalls
1362   ------------------------------------------------------------ */
1363
1364PRE(sys_syscall223)
1365{
1366   Int err;
1367
1368   /* 223 is used by sys_bproc.  If we're not on a declared bproc
1369      variant, fail in the usual way. */
1370
1371   if (!VG_(strstr)(VG_(clo_kernel_variant), "bproc")) {
1372      PRINT("non-existent syscall! (syscall 223)");
1373      PRE_REG_READ0(long, "ni_syscall(223)");
1374      SET_STATUS_Failure( VKI_ENOSYS );
1375      return;
1376   }
1377
1378   err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1379                                           ARG4, ARG5, ARG6 );
1380   if (err) {
1381      SET_STATUS_Failure( err );
1382      return;
1383   }
1384   /* Let it go through. */
1385   *flags |= SfMayBlock; /* who knows?  play safe. */
1386}
1387
1388POST(sys_syscall223)
1389{
1390   ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1391                                      ARG4, ARG5, ARG6 );
1392}
1393
1394#undef PRE
1395#undef POST
1396
1397
1398/* ---------------------------------------------------------------------
1399   The x86/Linux syscall table
1400   ------------------------------------------------------------------ */
1401
1402/* Add an x86-linux specific wrapper to a syscall table. */
1403#define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1404#define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1405
1406
1407// This table maps from __NR_xxx syscall numbers (from
1408// linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1409// wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1410//
1411// For those syscalls not handled by Valgrind, the annotation indicate its
1412// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1413// (unknown).
1414
1415static SyscallTableEntry syscall_table[] = {
1416//zz    //   (restart_syscall)                             // 0
1417   GENX_(__NR_exit,              sys_exit),           // 1
1418   GENX_(__NR_fork,              sys_fork),           // 2
1419   GENXY(__NR_read,              sys_read),           // 3
1420   GENX_(__NR_write,             sys_write),          // 4
1421
1422   GENXY(__NR_open,              sys_open),           // 5
1423   GENXY(__NR_close,             sys_close),          // 6
1424   GENXY(__NR_waitpid,           sys_waitpid),        // 7
1425   GENXY(__NR_creat,             sys_creat),          // 8
1426   GENX_(__NR_link,              sys_link),           // 9
1427
1428   GENX_(__NR_unlink,            sys_unlink),         // 10
1429   GENX_(__NR_execve,            sys_execve),         // 11
1430   GENX_(__NR_chdir,             sys_chdir),          // 12
1431   GENXY(__NR_time,              sys_time),           // 13
1432   GENX_(__NR_mknod,             sys_mknod),          // 14
1433
1434   GENX_(__NR_chmod,             sys_chmod),          // 15
1435//zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
1436   GENX_(__NR_break,             sys_ni_syscall),     // 17
1437//zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
1438   LINX_(__NR_lseek,             sys_lseek),          // 19
1439
1440   GENX_(__NR_getpid,            sys_getpid),         // 20
1441   LINX_(__NR_mount,             sys_mount),          // 21
1442   LINX_(__NR_umount,            sys_oldumount),      // 22
1443   LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
1444   LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
1445
1446   LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
1447   PLAXY(__NR_ptrace,            sys_ptrace),         // 26
1448   GENX_(__NR_alarm,             sys_alarm),          // 27
1449//zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
1450   GENX_(__NR_pause,             sys_pause),          // 29
1451
1452   LINX_(__NR_utime,             sys_utime),          // 30
1453   GENX_(__NR_stty,              sys_ni_syscall),     // 31
1454   GENX_(__NR_gtty,              sys_ni_syscall),     // 32
1455   GENX_(__NR_access,            sys_access),         // 33
1456   GENX_(__NR_nice,              sys_nice),           // 34
1457
1458   GENX_(__NR_ftime,             sys_ni_syscall),     // 35
1459   GENX_(__NR_sync,              sys_sync),           // 36
1460   GENX_(__NR_kill,              sys_kill),           // 37
1461   GENX_(__NR_rename,            sys_rename),         // 38
1462   GENX_(__NR_mkdir,             sys_mkdir),          // 39
1463
1464   GENX_(__NR_rmdir,             sys_rmdir),          // 40
1465   GENXY(__NR_dup,               sys_dup),            // 41
1466   LINXY(__NR_pipe,              sys_pipe),           // 42
1467   GENXY(__NR_times,             sys_times),          // 43
1468   GENX_(__NR_prof,              sys_ni_syscall),     // 44
1469//zz
1470   GENX_(__NR_brk,               sys_brk),            // 45
1471   LINX_(__NR_setgid,            sys_setgid16),       // 46
1472   LINX_(__NR_getgid,            sys_getgid16),       // 47
1473//zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
1474   LINX_(__NR_geteuid,           sys_geteuid16),      // 49
1475
1476   LINX_(__NR_getegid,           sys_getegid16),      // 50
1477   GENX_(__NR_acct,              sys_acct),           // 51
1478   LINX_(__NR_umount2,           sys_umount),         // 52
1479   GENX_(__NR_lock,              sys_ni_syscall),     // 53
1480   LINXY(__NR_ioctl,             sys_ioctl),          // 54
1481
1482   LINXY(__NR_fcntl,             sys_fcntl),          // 55
1483   GENX_(__NR_mpx,               sys_ni_syscall),     // 56
1484   GENX_(__NR_setpgid,           sys_setpgid),        // 57
1485   GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
1486//zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
1487//zz
1488   GENX_(__NR_umask,             sys_umask),          // 60
1489   GENX_(__NR_chroot,            sys_chroot),         // 61
1490//zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
1491   GENXY(__NR_dup2,              sys_dup2),           // 63
1492   GENX_(__NR_getppid,           sys_getppid),        // 64
1493
1494   GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
1495   GENX_(__NR_setsid,            sys_setsid),         // 66
1496   LINXY(__NR_sigaction,         sys_sigaction),      // 67
1497//zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
1498//zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
1499//zz
1500   LINX_(__NR_setreuid,          sys_setreuid16),     // 70
1501   LINX_(__NR_setregid,          sys_setregid16),     // 71
1502   PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
1503   LINXY(__NR_sigpending,        sys_sigpending),     // 73
1504   GENX_(__NR_sethostname,       sys_sethostname),    // 74
1505//zz
1506   GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
1507   GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
1508   GENXY(__NR_getrusage,         sys_getrusage),      // 77
1509   GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
1510   GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
1511
1512   LINXY(__NR_getgroups,         sys_getgroups16),    // 80
1513   LINX_(__NR_setgroups,         sys_setgroups16),    // 81
1514   PLAX_(__NR_select,            old_select),         // 82
1515   GENX_(__NR_symlink,           sys_symlink),        // 83
1516//zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
1517//zz
1518   GENX_(__NR_readlink,          sys_readlink),       // 85
1519//zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
1520//zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
1521//zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
1522//zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
1523//zz
1524   PLAX_(__NR_mmap,              old_mmap),           // 90
1525   GENXY(__NR_munmap,            sys_munmap),         // 91
1526   GENX_(__NR_truncate,          sys_truncate),       // 92
1527   GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
1528   GENX_(__NR_fchmod,            sys_fchmod),         // 94
1529
1530   LINX_(__NR_fchown,            sys_fchown16),       // 95
1531   GENX_(__NR_getpriority,       sys_getpriority),    // 96
1532   GENX_(__NR_setpriority,       sys_setpriority),    // 97
1533   GENX_(__NR_profil,            sys_ni_syscall),     // 98
1534   GENXY(__NR_statfs,            sys_statfs),         // 99
1535
1536   GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
1537   LINX_(__NR_ioperm,            sys_ioperm),         // 101
1538   LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
1539   LINXY(__NR_syslog,            sys_syslog),         // 103
1540   GENXY(__NR_setitimer,         sys_setitimer),      // 104
1541
1542   GENXY(__NR_getitimer,         sys_getitimer),      // 105
1543   GENXY(__NR_stat,              sys_newstat),        // 106
1544   GENXY(__NR_lstat,             sys_newlstat),       // 107
1545   GENXY(__NR_fstat,             sys_newfstat),       // 108
1546//zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
1547//zz
1548   GENX_(__NR_iopl,              sys_iopl),           // 110
1549   LINX_(__NR_vhangup,           sys_vhangup),        // 111
1550   GENX_(__NR_idle,              sys_ni_syscall),     // 112
1551   PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
1552   GENXY(__NR_wait4,             sys_wait4),          // 114
1553//zz
1554//zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
1555   LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
1556   LINXY(__NR_ipc,               sys_ipc),            // 117
1557   GENX_(__NR_fsync,             sys_fsync),          // 118
1558   PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
1559
1560   PLAX_(__NR_clone,             sys_clone),          // 120
1561//zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
1562   GENXY(__NR_uname,             sys_newuname),       // 122
1563   PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
1564   LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
1565
1566   GENXY(__NR_mprotect,          sys_mprotect),       // 125
1567   LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
1568//zz    // Nb: create_module() was removed 2.4-->2.6
1569   GENX_(__NR_create_module,     sys_ni_syscall),     // 127
1570   LINX_(__NR_init_module,       sys_init_module),    // 128
1571   LINX_(__NR_delete_module,     sys_delete_module),  // 129
1572//zz
1573//zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
1574   GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
1575   LINX_(__NR_quotactl,          sys_quotactl),       // 131
1576   GENX_(__NR_getpgid,           sys_getpgid),        // 132
1577   GENX_(__NR_fchdir,            sys_fchdir),         // 133
1578//zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
1579//zz
1580//zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
1581   LINX_(__NR_personality,       sys_personality),    // 136
1582   GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
1583   LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
1584   LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
1585
1586   LINXY(__NR__llseek,           sys_llseek),         // 140
1587   GENXY(__NR_getdents,          sys_getdents),       // 141
1588   GENX_(__NR__newselect,        sys_select),         // 142
1589   GENX_(__NR_flock,             sys_flock),          // 143
1590   GENX_(__NR_msync,             sys_msync),          // 144
1591
1592   GENXY(__NR_readv,             sys_readv),          // 145
1593   GENX_(__NR_writev,            sys_writev),         // 146
1594   GENX_(__NR_getsid,            sys_getsid),         // 147
1595   GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
1596   LINXY(__NR__sysctl,           sys_sysctl),         // 149
1597
1598   GENX_(__NR_mlock,             sys_mlock),          // 150
1599   GENX_(__NR_munlock,           sys_munlock),        // 151
1600   GENX_(__NR_mlockall,          sys_mlockall),       // 152
1601   LINX_(__NR_munlockall,        sys_munlockall),     // 153
1602   LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
1603
1604   LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
1605   LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
1606   LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
1607   LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
1608   LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
1609
1610   LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
1611   LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
1612   GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
1613   GENX_(__NR_mremap,            sys_mremap),         // 163
1614   LINX_(__NR_setresuid,         sys_setresuid16),    // 164
1615
1616   LINXY(__NR_getresuid,         sys_getresuid16),    // 165
1617   PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
1618   GENX_(__NR_query_module,      sys_ni_syscall),     // 167
1619   GENXY(__NR_poll,              sys_poll),           // 168
1620//zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
1621//zz
1622   LINX_(__NR_setresgid,         sys_setresgid16),    // 170
1623   LINXY(__NR_getresgid,         sys_getresgid16),    // 171
1624   LINXY(__NR_prctl,             sys_prctl),          // 172
1625   PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
1626   LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
1627
1628   LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
1629   LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
1630   LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
1631   LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
1632   LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
1633
1634   GENXY(__NR_pread64,           sys_pread64),        // 180
1635   GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
1636   LINX_(__NR_chown,             sys_chown16),        // 182
1637   GENXY(__NR_getcwd,            sys_getcwd),         // 183
1638   LINXY(__NR_capget,            sys_capget),         // 184
1639
1640   LINX_(__NR_capset,            sys_capset),         // 185
1641   GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
1642   LINXY(__NR_sendfile,          sys_sendfile),       // 187
1643   GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
1644   GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
1645
1646   // Nb: we treat vfork as fork
1647   GENX_(__NR_vfork,             sys_fork),           // 190
1648   GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
1649   PLAX_(__NR_mmap2,             sys_mmap2),          // 192
1650   GENX_(__NR_truncate64,        sys_truncate64),     // 193
1651   GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
1652
1653   PLAXY(__NR_stat64,            sys_stat64),         // 195
1654   PLAXY(__NR_lstat64,           sys_lstat64),        // 196
1655   PLAXY(__NR_fstat64,           sys_fstat64),        // 197
1656   GENX_(__NR_lchown32,          sys_lchown),         // 198
1657   GENX_(__NR_getuid32,          sys_getuid),         // 199
1658
1659   GENX_(__NR_getgid32,          sys_getgid),         // 200
1660   GENX_(__NR_geteuid32,         sys_geteuid),        // 201
1661   GENX_(__NR_getegid32,         sys_getegid),        // 202
1662   GENX_(__NR_setreuid32,        sys_setreuid),       // 203
1663   GENX_(__NR_setregid32,        sys_setregid),       // 204
1664
1665   GENXY(__NR_getgroups32,       sys_getgroups),      // 205
1666   GENX_(__NR_setgroups32,       sys_setgroups),      // 206
1667   GENX_(__NR_fchown32,          sys_fchown),         // 207
1668   LINX_(__NR_setresuid32,       sys_setresuid),      // 208
1669   LINXY(__NR_getresuid32,       sys_getresuid),      // 209
1670
1671   LINX_(__NR_setresgid32,       sys_setresgid),      // 210
1672   LINXY(__NR_getresgid32,       sys_getresgid),      // 211
1673   GENX_(__NR_chown32,           sys_chown),          // 212
1674   GENX_(__NR_setuid32,          sys_setuid),         // 213
1675   GENX_(__NR_setgid32,          sys_setgid),         // 214
1676
1677   LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
1678   LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
1679//zz    //   (__NR_pivot_root,        sys_pivot_root),     // 217 */Linux
1680   GENXY(__NR_mincore,           sys_mincore),        // 218
1681   GENX_(__NR_madvise,           sys_madvise),        // 219
1682
1683   GENXY(__NR_getdents64,        sys_getdents64),     // 220
1684   LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
1685   GENX_(222,                    sys_ni_syscall),     // 222
1686   PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
1687   LINX_(__NR_gettid,            sys_gettid),         // 224
1688
1689   LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
1690   LINX_(__NR_setxattr,          sys_setxattr),       // 226
1691   LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
1692   LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
1693   LINXY(__NR_getxattr,          sys_getxattr),       // 229
1694
1695   LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
1696   LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
1697   LINXY(__NR_listxattr,         sys_listxattr),      // 232
1698   LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
1699   LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
1700
1701   LINX_(__NR_removexattr,       sys_removexattr),    // 235
1702   LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
1703   LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
1704   LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
1705   LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
1706
1707   LINXY(__NR_futex,             sys_futex),             // 240
1708   LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
1709   LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
1710   PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
1711   PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
1712
1713   LINXY(__NR_io_setup,          sys_io_setup),       // 245
1714   LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
1715   LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
1716   LINX_(__NR_io_submit,         sys_io_submit),      // 248
1717   LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
1718
1719   LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
1720   GENX_(251,                    sys_ni_syscall),     // 251
1721   LINX_(__NR_exit_group,        sys_exit_group),     // 252
1722   LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
1723   LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
1724
1725   LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
1726   LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
1727//zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
1728   LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
1729   LINXY(__NR_timer_create,      sys_timer_create),      // 259
1730
1731   LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
1732   LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
1733   LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
1734   LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
1735   LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
1736
1737   LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
1738   LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
1739   LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
1740   GENXY(__NR_statfs64,          sys_statfs64),       // 268
1741   GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
1742
1743   LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
1744   GENX_(__NR_utimes,            sys_utimes),         // 271
1745   LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
1746   GENX_(__NR_vserver,           sys_ni_syscall),     // 273
1747   LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
1748
1749   LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
1750   LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
1751   LINXY(__NR_mq_open,           sys_mq_open),        // 277
1752   LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
1753   LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
1754
1755   LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
1756   LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
1757   LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
1758   GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
1759   LINXY(__NR_waitid,            sys_waitid),         // 284
1760
1761   GENX_(285,                    sys_ni_syscall),     // 285
1762   LINX_(__NR_add_key,           sys_add_key),        // 286
1763   LINX_(__NR_request_key,       sys_request_key),    // 287
1764   LINXY(__NR_keyctl,            sys_keyctl),         // 288
1765   LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
1766
1767   LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
1768   LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
1769   LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
1770   LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
1771//   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
1772
1773   LINXY(__NR_openat,		 sys_openat),           // 295
1774   LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
1775   LINX_(__NR_mknodat,		 sys_mknodat),          // 297
1776   LINX_(__NR_fchownat,		 sys_fchownat),         // 298
1777   LINX_(__NR_futimesat,	 sys_futimesat),        // 299
1778
1779   PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
1780   LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
1781   LINX_(__NR_renameat,		 sys_renameat),         // 302
1782   LINX_(__NR_linkat,		 sys_linkat),           // 303
1783   LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
1784
1785   LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
1786   LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
1787   LINX_(__NR_faccessat,	 sys_faccessat),        // 307
1788   LINX_(__NR_pselect6,		 sys_pselect6),         // 308
1789   LINXY(__NR_ppoll,		 sys_ppoll),            // 309
1790
1791//   LINX_(__NR_unshare,		 sys_unshare),          // 310
1792   LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
1793   LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
1794   LINX_(__NR_splice,            sys_splice),           // 313
1795   LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
1796
1797   LINX_(__NR_tee,               sys_tee),              // 315
1798   LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
1799   LINXY(__NR_move_pages,        sys_move_pages),       // 317
1800   LINXY(__NR_getcpu,            sys_getcpu),           // 318
1801   LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
1802
1803   LINX_(__NR_utimensat,         sys_utimensat),        // 320
1804   LINXY(__NR_signalfd,          sys_signalfd),         // 321
1805   LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
1806   LINXY(__NR_eventfd,           sys_eventfd),          // 323
1807   LINX_(__NR_fallocate,         sys_fallocate),        // 324
1808
1809   LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
1810   LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
1811   LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
1812   LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
1813   LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
1814
1815   LINXY(__NR_dup3,              sys_dup3),             // 330
1816   LINXY(__NR_pipe2,             sys_pipe2),            // 331
1817   LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
1818   LINXY(__NR_preadv,            sys_preadv),           // 333
1819   LINX_(__NR_pwritev,           sys_pwritev),          // 334
1820
1821   LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
1822   LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
1823   LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
1824   LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
1825   LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
1826
1827   LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
1828   LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
1829   LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
1830   LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
1831//   LINX_(__NR_syncfs,            sys_ni_syscall),       // 344
1832
1833   LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
1834//   LINX_(__NR_setns,             sys_ni_syscall),       // 346
1835   LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
1836   LINX_(__NR_process_vm_writev, sys_process_vm_writev) // 348
1837};
1838
1839SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
1840{
1841   const UInt syscall_table_size
1842      = sizeof(syscall_table) / sizeof(syscall_table[0]);
1843
1844   /* Is it in the contiguous initial section of the table? */
1845   if (sysno < syscall_table_size) {
1846      SyscallTableEntry* sys = &syscall_table[sysno];
1847      if (sys->before == NULL)
1848         return NULL; /* no entry */
1849      else
1850         return sys;
1851   }
1852
1853   /* Can't find a wrapper */
1854   return NULL;
1855}
1856
1857#endif // defined(VGP_x86_linux)
1858
1859/*--------------------------------------------------------------------*/
1860/*--- end                                                          ---*/
1861/*--------------------------------------------------------------------*/
1862