1
2/*--------------------------------------------------------------------*/
3/*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2015 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_x86_linux)
32
33/* TODO/FIXME jrs 20050207: assignments to the syscall return result
34   in interrupted_syscall() need to be reviewed.  They don't seem
35   to assign the shadow state.
36*/
37
38#include "pub_core_basics.h"
39#include "pub_core_vki.h"
40#include "pub_core_vkiscnums.h"
41#include "pub_core_threadstate.h"
42#include "pub_core_aspacemgr.h"
43#include "pub_core_debuglog.h"
44#include "pub_core_libcbase.h"
45#include "pub_core_libcassert.h"
46#include "pub_core_libcprint.h"
47#include "pub_core_libcproc.h"
48#include "pub_core_libcsignal.h"
49#include "pub_core_mallocfree.h"
50#include "pub_core_options.h"
51#include "pub_core_scheduler.h"
52#include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
53#include "pub_core_signals.h"
54#include "pub_core_syscall.h"
55#include "pub_core_syswrap.h"
56#include "pub_core_tooliface.h"
57
58#include "priv_types_n_macros.h"
59#include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
60#include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
61#include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
62#include "priv_syswrap-main.h"
63
64
65/* ---------------------------------------------------------------------
66   clone() handling
67   ------------------------------------------------------------------ */
68
69/* Call f(arg1), but first switch stacks, using 'stack' as the new
70   stack, and use 'retaddr' as f's return-to address.  Also, clear all
71   the integer registers before entering f.*/
72__attribute__((noreturn))
73void ML_(call_on_new_stack_0_1) ( Addr stack,
74			          Addr retaddr,
75			          void (*f)(Word),
76                                  Word arg1 );
77//  4(%esp) == stack
78//  8(%esp) == retaddr
79// 12(%esp) == f
80// 16(%esp) == arg1
81asm(
82".text\n"
83".globl vgModuleLocal_call_on_new_stack_0_1\n"
84"vgModuleLocal_call_on_new_stack_0_1:\n"
85"   movl %esp, %esi\n"     // remember old stack pointer
86"   movl 4(%esi), %esp\n"  // set stack, assume %esp is now 16-byte aligned
87"   subl $12, %esp\n"      // skip 12 bytes
88"   pushl 16(%esi)\n"      // arg1 to stack, %esp is 16-byte aligned
89"   pushl  8(%esi)\n"      // retaddr to stack
90"   pushl 12(%esi)\n"      // f to stack
91"   movl $0, %eax\n"       // zero all GP regs
92"   movl $0, %ebx\n"
93"   movl $0, %ecx\n"
94"   movl $0, %edx\n"
95"   movl $0, %esi\n"
96"   movl $0, %edi\n"
97"   movl $0, %ebp\n"
98"   ret\n"                 // jump to f
99"   ud2\n"                 // should never get here
100".previous\n"
101);
102
103
104/*
105        Perform a clone system call.  clone is strange because it has
106        fork()-like return-twice semantics, so it needs special
107        handling here.
108
109        Upon entry, we have:
110
111            int (fn)(void*)     in  0+FSZ(%esp)
112            void* child_stack   in  4+FSZ(%esp)
113            int flags           in  8+FSZ(%esp)
114            void* arg           in 12+FSZ(%esp)
115            pid_t* child_tid    in 16+FSZ(%esp)
116            pid_t* parent_tid   in 20+FSZ(%esp)
117            void* tls_ptr       in 24+FSZ(%esp)
118
119        System call requires:
120
121            int    $__NR_clone  in %eax
122            int    flags        in %ebx
123            void*  child_stack  in %ecx
124            pid_t* parent_tid   in %edx
125            pid_t* child_tid    in %edi
126            void*  tls_ptr      in %esi
127
128	Returns an Int encoded in the linux-x86 way, not a SysRes.
129 */
130#define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
131#define __NR_CLONE        VG_STRINGIFY(__NR_clone)
132#define __NR_EXIT         VG_STRINGIFY(__NR_exit)
133
134extern
135Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
136                                 void* stack,
137                                 Int   flags,
138                                 void* arg,
139                                 Int*  child_tid,
140                                 Int*  parent_tid,
141                                 vki_modify_ldt_t * );
142asm(
143".text\n"
144".globl do_syscall_clone_x86_linux\n"
145"do_syscall_clone_x86_linux:\n"
146"        push    %ebx\n"
147"        push    %edi\n"
148"        push    %esi\n"
149
150         /* set up child stack with function and arg */
151"        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
152"        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
153"        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
154"        andl    $-16, %ecx\n"              /* align to 16-byte */
155"        lea     -20(%ecx), %ecx\n"         /* allocate 16*n+4 bytes on stack */
156"        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
157"        movl    %eax, 0(%ecx)\n"           /*   fn */
158
159         /* get other args to clone */
160"        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
161"        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
162"        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
163"        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
164"        movl    $"__NR_CLONE", %eax\n"
165"        int     $0x80\n"                   /* clone() */
166"        testl   %eax, %eax\n"              /* child if retval == 0 */
167"        jnz     1f\n"
168
169         /* CHILD - call thread function */
170"        popl    %eax\n"                    /* child %esp is 16-byte aligned */
171"        call    *%eax\n"                   /* call fn */
172
173         /* exit with result */
174"        movl    %eax, %ebx\n"              /* arg1: return value from fn */
175"        movl    $"__NR_EXIT", %eax\n"
176"        int     $0x80\n"
177
178         /* Hm, exit returned */
179"        ud2\n"
180
181"1:\n"   /* PARENT or ERROR */
182"        pop     %esi\n"
183"        pop     %edi\n"
184"        pop     %ebx\n"
185"        ret\n"
186".previous\n"
187);
188
189#undef FSZ
190#undef __NR_CLONE
191#undef __NR_EXIT
192
193
194// forward declarations
195static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
196static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
197
198/*
199   When a client clones, we need to keep track of the new thread.  This means:
200   1. allocate a ThreadId+ThreadState+stack for the thread
201
202   2. initialize the thread's new VCPU state
203
204   3. create the thread using the same args as the client requested,
205   but using the scheduler entrypoint for EIP, and a separate stack
206   for ESP.
207 */
208static SysRes do_clone ( ThreadId ptid,
209                         UInt flags, Addr esp,
210                         Int* parent_tidptr,
211                         Int* child_tidptr,
212                         vki_modify_ldt_t *tlsinfo)
213{
214   static const Bool debug = False;
215
216   ThreadId     ctid = VG_(alloc_ThreadState)();
217   ThreadState* ptst = VG_(get_ThreadState)(ptid);
218   ThreadState* ctst = VG_(get_ThreadState)(ctid);
219   UWord*       stack;
220   SysRes       res;
221   Int          eax;
222   vki_sigset_t blockall, savedmask;
223
224   VG_(sigfillset)(&blockall);
225
226   vg_assert(VG_(is_running_thread)(ptid));
227   vg_assert(VG_(is_valid_tid)(ctid));
228
229   stack = (UWord*)ML_(allocstack)(ctid);
230   if (stack == NULL) {
231      res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
232      goto out;
233   }
234
235   /* Copy register state
236
237      Both parent and child return to the same place, and the code
238      following the clone syscall works out which is which, so we
239      don't need to worry about it.
240
241      The parent gets the child's new tid returned from clone, but the
242      child gets 0.
243
244      If the clone call specifies a NULL esp for the new thread, then
245      it actually gets a copy of the parent's esp.
246   */
247   /* Note: the clone call done by the Quadrics Elan3 driver specifies
248      clone flags of 0xF00, and it seems to rely on the assumption
249      that the child inherits a copy of the parent's GDT.
250      setup_child takes care of setting that up. */
251   setup_child( &ctst->arch, &ptst->arch, True );
252
253   /* Make sys_clone appear to have returned Success(0) in the
254      child. */
255   ctst->arch.vex.guest_EAX = 0;
256
257   if (esp != 0)
258      ctst->arch.vex.guest_ESP = esp;
259
260   ctst->os_state.parent = ptid;
261
262   /* inherit signal mask */
263   ctst->sig_mask     = ptst->sig_mask;
264   ctst->tmp_sig_mask = ptst->sig_mask;
265
266   /* Start the child with its threadgroup being the same as the
267      parent's.  This is so that any exit_group calls that happen
268      after the child is created but before it sets its
269      os_state.threadgroup field for real (in thread_wrapper in
270      syswrap-linux.c), really kill the new thread.  a.k.a this avoids
271      a race condition in which the thread is unkillable (via
272      exit_group) because its threadgroup is not set.  The race window
273      is probably only a few hundred or a few thousand cycles long.
274      See #226116. */
275   ctst->os_state.threadgroup = ptst->os_state.threadgroup;
276
277   ML_(guess_and_register_stack) (esp, ctst);
278
279   /* Assume the clone will succeed, and tell any tool that wants to
280      know that this thread has come into existence.  We cannot defer
281      it beyond this point because sys_set_thread_area, just below,
282      causes tCheck to assert by making references to the new ThreadId
283      if we don't state the new thread exists prior to that point.
284      If the clone fails, we'll send out a ll_exit notification for it
285      at the out: label below, to clean up. */
286   vg_assert(VG_(owns_BigLock_LL)(ptid));
287   VG_TRACK ( pre_thread_ll_create, ptid, ctid );
288
289   if (flags & VKI_CLONE_SETTLS) {
290      if (debug)
291	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%u "
292                     "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
293		     tlsinfo, tlsinfo->entry_number,
294                     tlsinfo->base_addr, tlsinfo->limit,
295		     ptst->arch.vex.guest_ESP,
296		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
297      res = sys_set_thread_area(ctid, tlsinfo);
298      if (sr_isError(res))
299	 goto out;
300   }
301
302   flags &= ~VKI_CLONE_SETTLS;
303
304   /* start the thread with everything blocked */
305   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
306
307   /* Create the new thread */
308   eax = do_syscall_clone_x86_linux(
309            ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
310            child_tidptr, parent_tidptr, NULL
311         );
312   res = VG_(mk_SysRes_x86_linux)( eax );
313
314   VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
315
316  out:
317   if (sr_isError(res)) {
318      /* clone failed */
319      VG_(cleanup_thread)(&ctst->arch);
320      ctst->status = VgTs_Empty;
321      /* oops.  Better tell the tool the thread exited in a hurry :-) */
322      VG_TRACK( pre_thread_ll_exit, ctid );
323   }
324
325   return res;
326}
327
328
329/* ---------------------------------------------------------------------
330   LDT/GDT simulation
331   ------------------------------------------------------------------ */
332
333/* Details of the LDT simulation
334   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335
336   When a program runs natively, the linux kernel allows each *thread*
337   in it to have its own LDT.  Almost all programs never do this --
338   it's wildly unportable, after all -- and so the kernel never
339   allocates the structure, which is just as well as an LDT occupies
340   64k of memory (8192 entries of size 8 bytes).
341
342   A thread may choose to modify its LDT entries, by doing the
343   __NR_modify_ldt syscall.  In such a situation the kernel will then
344   allocate an LDT structure for it.  Each LDT entry is basically a
345   (base, limit) pair.  A virtual address in a specific segment is
346   translated to a linear address by adding the segment's base value.
347   In addition, the virtual address must not exceed the limit value.
348
349   To use an LDT entry, a thread loads one of the segment registers
350   (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
351   .. 8191) it wants to use.  In fact, the required value is (index <<
352   3) + 7, but that's not important right now.  Any normal instruction
353   which includes an addressing mode can then be made relative to that
354   LDT entry by prefixing the insn with a so-called segment-override
355   prefix, a byte which indicates which of the 6 segment registers
356   holds the LDT index.
357
358   Now, a key constraint is that valgrind's address checks operate in
359   terms of linear addresses.  So we have to explicitly translate
360   virtual addrs into linear addrs, and that means doing a complete
361   LDT simulation.
362
363   Calls to modify_ldt are intercepted.  For each thread, we maintain
364   an LDT (with the same normally-never-allocated optimisation that
365   the kernel does).  This is updated as expected via calls to
366   modify_ldt.
367
368   When a thread does an amode calculation involving a segment
369   override prefix, the relevant LDT entry for the thread is
370   consulted.  It all works.
371
372   There is a conceptual problem, which appears when switching back to
373   native execution, either temporarily to pass syscalls to the
374   kernel, or permanently, when debugging V.  Problem at such points
375   is that it's pretty pointless to copy the simulated machine's
376   segment registers to the real machine, because we'd also need to
377   copy the simulated LDT into the real one, and that's prohibitively
378   expensive.
379
380   Fortunately it looks like no syscalls rely on the segment regs or
381   LDT being correct, so we can get away with it.  Apart from that the
382   simulation is pretty straightforward.  All 6 segment registers are
383   tracked, although only %ds, %es, %fs and %gs are allowed as
384   prefixes.  Perhaps it could be restricted even more than that -- I
385   am not sure what is and isn't allowed in user-mode.
386*/
387
388/* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
389   the Linux kernel's logic (cut-n-paste of code in
390   linux/kernel/ldt.c).  */
391
392static
393void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
394                              /* OUT */ VexGuestX86SegDescr* out,
395                                        Int oldmode )
396{
397   UInt entry_1, entry_2;
398   vg_assert(8 == sizeof(VexGuestX86SegDescr));
399
400   if (0)
401      VG_(printf)("translate_to_hw_format: base %#lx, limit %u\n",
402                  inn->base_addr, inn->limit );
403
404   /* Allow LDTs to be cleared by the user. */
405   if (inn->base_addr == 0 && inn->limit == 0) {
406      if (oldmode ||
407          (inn->contents == 0      &&
408           inn->read_exec_only == 1   &&
409           inn->seg_32bit == 0      &&
410           inn->limit_in_pages == 0   &&
411           inn->seg_not_present == 1   &&
412           inn->useable == 0 )) {
413         entry_1 = 0;
414         entry_2 = 0;
415         goto install;
416      }
417   }
418
419   entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
420             (inn->limit & 0x0ffff);
421   entry_2 = (inn->base_addr & 0xff000000) |
422             ((inn->base_addr & 0x00ff0000) >> 16) |
423             (inn->limit & 0xf0000) |
424             ((inn->read_exec_only ^ 1) << 9) |
425             (inn->contents << 10) |
426             ((inn->seg_not_present ^ 1) << 15) |
427             (inn->seg_32bit << 22) |
428             (inn->limit_in_pages << 23) |
429             0x7000;
430   if (!oldmode)
431      entry_2 |= (inn->useable << 20);
432
433   /* Install the new entry ...  */
434  install:
435   out->LdtEnt.Words.word1 = entry_1;
436   out->LdtEnt.Words.word2 = entry_2;
437}
438
439/* Create a zeroed-out GDT. */
440static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
441{
442   Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
443   return VG_(calloc)("di.syswrap-x86.azxG.1", nbytes, 1);
444}
445
446/* Create a zeroed-out LDT. */
447static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
448{
449   Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
450   return VG_(calloc)("di.syswrap-x86.azxL.1", nbytes, 1);
451}
452
453/* Free up an LDT or GDT allocated by the above fns. */
454static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
455{
456   vg_assert(dt);
457   VG_(free)(dt);
458}
459
460/* Copy contents between two existing LDTs. */
461static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
462                               VexGuestX86SegDescr* dst )
463{
464   Int i;
465   vg_assert(src);
466   vg_assert(dst);
467   for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
468      dst[i] = src[i];
469}
470
471/* Copy contents between two existing GDTs. */
472static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
473                               VexGuestX86SegDescr* dst )
474{
475   Int i;
476   vg_assert(src);
477   vg_assert(dst);
478   for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
479      dst[i] = src[i];
480}
481
482/* Free this thread's DTs, if it has any. */
483static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
484{
485   vg_assert(sizeof(HWord) == sizeof(void*));
486
487   if (0)
488      VG_(printf)("deallocate_LGDTs_for_thread: "
489                  "ldt = 0x%lx, gdt = 0x%lx\n",
490                  vex->guest_LDT, vex->guest_GDT );
491
492   if (vex->guest_LDT != (HWord)NULL) {
493      free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
494      vex->guest_LDT = (HWord)NULL;
495   }
496
497   if (vex->guest_GDT != (HWord)NULL) {
498      free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
499      vex->guest_GDT = (HWord)NULL;
500   }
501}
502
503
504/*
505 * linux/kernel/ldt.c
506 *
507 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
508 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
509 */
510
511/*
512 * read_ldt() is not really atomic - this is not a problem since
513 * synchronization of reads and writes done to the LDT has to be
514 * assured by user-space anyway. Writes are atomic, to protect
515 * the security checks done on new descriptors.
516 */
517static
518SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
519{
520   SysRes res;
521   UInt   i, size;
522   UChar* ldt;
523
524   if (0)
525      VG_(printf)("read_ldt: tid = %u, ptr = %p, bytecount = %u\n",
526                  tid, ptr, bytecount );
527
528   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
529   vg_assert(8 == sizeof(VexGuestX86SegDescr));
530
531   ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
532   res = VG_(mk_SysRes_Success)( 0 );
533   if (ldt == NULL)
534      /* LDT not allocated, meaning all entries are null */
535      goto out;
536
537   size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
538   if (size > bytecount)
539      size = bytecount;
540
541   res = VG_(mk_SysRes_Success)( size );
542   for (i = 0; i < size; i++)
543      ptr[i] = ldt[i];
544
545  out:
546   return res;
547}
548
549
550static
551SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
552{
553   SysRes res;
554   VexGuestX86SegDescr* ldt;
555   vki_modify_ldt_t* ldt_info;
556
557   if (0)
558      VG_(printf)("write_ldt: tid = %u, ptr = %p, "
559                  "bytecount = %u, oldmode = %d\n",
560                  tid, ptr, bytecount, oldmode );
561
562   vg_assert(8 == sizeof(VexGuestX86SegDescr));
563   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
564
565   ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
566   ldt_info = (vki_modify_ldt_t*)ptr;
567
568   res = VG_(mk_SysRes_Error)( VKI_EINVAL );
569   if (bytecount != sizeof(vki_modify_ldt_t))
570      goto out;
571
572   res = VG_(mk_SysRes_Error)( VKI_EINVAL );
573   if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
574      goto out;
575   if (ldt_info->contents == 3) {
576      if (oldmode)
577         goto out;
578      if (ldt_info->seg_not_present == 0)
579         goto out;
580   }
581
582   /* If this thread doesn't have an LDT, we'd better allocate it
583      now. */
584   if (ldt == NULL) {
585      ldt = alloc_zeroed_x86_LDT();
586      VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
587   }
588
589   /* Install the new entry ...  */
590   translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
591   res = VG_(mk_SysRes_Success)( 0 );
592
593  out:
594   return res;
595}
596
597
598static SysRes sys_modify_ldt ( ThreadId tid,
599                               Int func, void* ptr, UInt bytecount )
600{
601   SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
602
603   switch (func) {
604   case 0:
605      ret = read_ldt(tid, ptr, bytecount);
606      break;
607   case 1:
608      ret = write_ldt(tid, ptr, bytecount, 1);
609      break;
610   case 2:
611      VG_(unimplemented)("sys_modify_ldt: func == 2");
612      /* god knows what this is about */
613      /* ret = read_default_ldt(ptr, bytecount); */
614      /*UNREACHED*/
615      break;
616   case 0x11:
617      ret = write_ldt(tid, ptr, bytecount, 0);
618      break;
619   }
620   return ret;
621}
622
623
624static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
625{
626   Int                  idx;
627   VexGuestX86SegDescr* gdt;
628
629   vg_assert(8 == sizeof(VexGuestX86SegDescr));
630   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
631
632   if (info == NULL)
633      return VG_(mk_SysRes_Error)( VKI_EFAULT );
634
635   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
636
637   /* If the thread doesn't have a GDT, allocate it now. */
638   if (!gdt) {
639      gdt = alloc_zeroed_x86_GDT();
640      VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
641   }
642
643   idx = info->entry_number;
644
645   if (idx == -1) {
646      /* Find and use the first free entry.  Don't allocate entry
647         zero, because the hardware will never do that, and apparently
648         doing so confuses some code (perhaps stuff running on
649         Wine). */
650      for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
651         if (gdt[idx].LdtEnt.Words.word1 == 0
652             && gdt[idx].LdtEnt.Words.word2 == 0)
653            break;
654      }
655
656      if (idx == VEX_GUEST_X86_GDT_NENT)
657         return VG_(mk_SysRes_Error)( VKI_ESRCH );
658   } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
659      /* Similarly, reject attempts to use GDT[0]. */
660      return VG_(mk_SysRes_Error)( VKI_EINVAL );
661   }
662
663   translate_to_hw_format(info, &gdt[idx], 0);
664
665   VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
666             "set_thread_area(info->entry)",
667             (Addr) & info->entry_number, sizeof(unsigned int) );
668   info->entry_number = idx;
669   VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
670             (Addr) & info->entry_number, sizeof(unsigned int) );
671
672   return VG_(mk_SysRes_Success)( 0 );
673}
674
675
676static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
677{
678   Int idx;
679   VexGuestX86SegDescr* gdt;
680
681   vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
682   vg_assert(8 == sizeof(VexGuestX86SegDescr));
683
684   if (info == NULL)
685      return VG_(mk_SysRes_Error)( VKI_EFAULT );
686
687   idx = info->entry_number;
688
689   if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
690      return VG_(mk_SysRes_Error)( VKI_EINVAL );
691
692   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
693
694   /* If the thread doesn't have a GDT, allocate it now. */
695   if (!gdt) {
696      gdt = alloc_zeroed_x86_GDT();
697      VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
698   }
699
700   info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
701                     ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
702                     gdt[idx].LdtEnt.Bits.BaseLow;
703   info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
704                   gdt[idx].LdtEnt.Bits.LimitLow;
705   info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
706   info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
707   info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
708   info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
709   info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
710   info->useable = gdt[idx].LdtEnt.Bits.Sys;
711   info->reserved = 0;
712
713   return VG_(mk_SysRes_Success)( 0 );
714}
715
716/* ---------------------------------------------------------------------
717   More thread stuff
718   ------------------------------------------------------------------ */
719
720void VG_(cleanup_thread) ( ThreadArchState* arch )
721{
722   /* Release arch-specific resources held by this thread. */
723   /* On x86, we have to dump the LDT and GDT. */
724   deallocate_LGDTs_for_thread( &arch->vex );
725}
726
727
728static void setup_child ( /*OUT*/ ThreadArchState *child,
729                          /*IN*/  ThreadArchState *parent,
730                          Bool inherit_parents_GDT )
731{
732   /* We inherit our parent's guest state. */
733   child->vex = parent->vex;
734   child->vex_shadow1 = parent->vex_shadow1;
735   child->vex_shadow2 = parent->vex_shadow2;
736
737   /* We inherit our parent's LDT. */
738   if (parent->vex.guest_LDT == (HWord)NULL) {
739      /* We hope this is the common case. */
740      child->vex.guest_LDT = (HWord)NULL;
741   } else {
742      /* No luck .. we have to take a copy of the parent's. */
743      child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
744      copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
745                        (VexGuestX86SegDescr*)child->vex.guest_LDT );
746   }
747
748   /* Either we start with an empty GDT (the usual case) or inherit a
749      copy of our parents' one (Quadrics Elan3 driver -style clone
750      only). */
751   child->vex.guest_GDT = (HWord)NULL;
752
753   if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
754      child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
755      copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
756                        (VexGuestX86SegDescr*)child->vex.guest_GDT );
757   }
758}
759
760
761/* ---------------------------------------------------------------------
762   PRE/POST wrappers for x86/Linux-specific syscalls
763   ------------------------------------------------------------------ */
764
765#define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
766#define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
767
768/* Add prototypes for the wrappers declared here, so that gcc doesn't
769   harass us for not having prototypes.  Really this is a kludge --
770   the right thing to do is to make these wrappers 'static' since they
771   aren't visible outside this file, but that requires even more macro
772   magic. */
773DECL_TEMPLATE(x86_linux, sys_stat64);
774DECL_TEMPLATE(x86_linux, sys_fstatat64);
775DECL_TEMPLATE(x86_linux, sys_fstat64);
776DECL_TEMPLATE(x86_linux, sys_lstat64);
777DECL_TEMPLATE(x86_linux, sys_clone);
778DECL_TEMPLATE(x86_linux, old_mmap);
779DECL_TEMPLATE(x86_linux, sys_mmap2);
780DECL_TEMPLATE(x86_linux, sys_sigreturn);
781DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
782DECL_TEMPLATE(x86_linux, sys_modify_ldt);
783DECL_TEMPLATE(x86_linux, sys_set_thread_area);
784DECL_TEMPLATE(x86_linux, sys_get_thread_area);
785DECL_TEMPLATE(x86_linux, sys_ptrace);
786DECL_TEMPLATE(x86_linux, sys_sigsuspend);
787DECL_TEMPLATE(x86_linux, old_select);
788DECL_TEMPLATE(x86_linux, sys_vm86old);
789DECL_TEMPLATE(x86_linux, sys_vm86);
790DECL_TEMPLATE(x86_linux, sys_syscall223);
791
792PRE(old_select)
793{
794   /* struct sel_arg_struct {
795      unsigned long n;
796      fd_set *inp, *outp, *exp;
797      struct timeval *tvp;
798      };
799   */
800   PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
801   PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
802   *flags |= SfMayBlock;
803   {
804      UInt* arg_struct = (UInt*)ARG1;
805      UInt a1, a2, a3, a4, a5;
806
807      a1 = arg_struct[0];
808      a2 = arg_struct[1];
809      a3 = arg_struct[2];
810      a4 = arg_struct[3];
811      a5 = arg_struct[4];
812
813      PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", (Int)a1,a2,a3,a4,a5);
814      if (a2 != (Addr)NULL)
815         PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
816      if (a3 != (Addr)NULL)
817         PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
818      if (a4 != (Addr)NULL)
819         PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
820      if (a5 != (Addr)NULL)
821         PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
822   }
823}
824
825PRE(sys_clone)
826{
827   UInt cloneflags;
828   Bool badarg = False;
829
830   PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
831   PRE_REG_READ2(int, "clone",
832                 unsigned long, flags,
833                 void *, child_stack);
834
835   if (ARG1 & VKI_CLONE_PARENT_SETTID) {
836      if (VG_(tdict).track_pre_reg_read) {
837         PRA3("clone", int *, parent_tidptr);
838      }
839      PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
840      if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
841                                             VKI_PROT_WRITE)) {
842         badarg = True;
843      }
844   }
845   if (ARG1 & VKI_CLONE_SETTLS) {
846      if (VG_(tdict).track_pre_reg_read) {
847         PRA4("clone", vki_modify_ldt_t *, tlsinfo);
848      }
849      PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
850      if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
851                                             VKI_PROT_READ)) {
852         badarg = True;
853      }
854   }
855   if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
856      if (VG_(tdict).track_pre_reg_read) {
857         PRA5("clone", int *, child_tidptr);
858      }
859      PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
860      if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
861                                             VKI_PROT_WRITE)) {
862         badarg = True;
863      }
864   }
865
866   if (badarg) {
867      SET_STATUS_Failure( VKI_EFAULT );
868      return;
869   }
870
871   cloneflags = ARG1;
872
873   if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
874      SET_STATUS_Failure( VKI_EINVAL );
875      return;
876   }
877
878   /* Be ultra-paranoid and filter out any clone-variants we don't understand:
879      - ??? specifies clone flags of 0x100011
880      - ??? specifies clone flags of 0x1200011.
881      - NPTL specifies clone flags of 0x7D0F00.
882      - The Quadrics Elan3 driver specifies clone flags of 0xF00.
883      - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
884      Everything else is rejected.
885   */
886   if (
887        1 ||
888        /* 11 Nov 05: for the time being, disable this ultra-paranoia.
889           The switch below probably does a good enough job. */
890          (cloneflags == 0x100011 || cloneflags == 0x1200011
891                                  || cloneflags == 0x7D0F00
892                                  || cloneflags == 0x790F00
893                                  || cloneflags == 0x3D0F00
894                                  || cloneflags == 0x410F00
895                                  || cloneflags == 0xF00
896                                  || cloneflags == 0xF21)) {
897     /* OK */
898   }
899   else {
900      /* Nah.  We don't like it.  Go away. */
901      goto reject;
902   }
903
904   /* Only look at the flags we really care about */
905   switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
906                         | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
907   case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
908      /* thread creation */
909      SET_STATUS_from_SysRes(
910         do_clone(tid,
911                  ARG1,         /* flags */
912                  (Addr)ARG2,   /* child ESP */
913                  (Int *)ARG3,  /* parent_tidptr */
914                  (Int *)ARG5,  /* child_tidptr */
915                  (vki_modify_ldt_t *)ARG4)); /* set_tls */
916      break;
917
918   case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
919      /* FALLTHROUGH - assume vfork == fork */
920      cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
921
922   case 0: /* plain fork */
923      SET_STATUS_from_SysRes(
924         ML_(do_fork_clone)(tid,
925                       cloneflags,      /* flags */
926                       (Int *)ARG3,     /* parent_tidptr */
927                       (Int *)ARG5));   /* child_tidptr */
928      break;
929
930   default:
931   reject:
932      /* should we just ENOSYS? */
933      VG_(message)(Vg_UserMsg, "\n");
934      VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
935      VG_(message)(Vg_UserMsg, "\n");
936      VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
937      VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
938      VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
939      VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
940      VG_(unimplemented)
941         ("Valgrind does not support general clone().");
942   }
943
944   if (SUCCESS) {
945      if (ARG1 & VKI_CLONE_PARENT_SETTID)
946         POST_MEM_WRITE(ARG3, sizeof(Int));
947      if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
948         POST_MEM_WRITE(ARG5, sizeof(Int));
949
950      /* Thread creation was successful; let the child have the chance
951         to run */
952      *flags |= SfYieldAfter;
953   }
954}
955
956PRE(sys_sigreturn)
957{
958   /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
959      an explanation of what follows. */
960
961   ThreadState* tst;
962   PRINT("sys_sigreturn ( )");
963
964   vg_assert(VG_(is_valid_tid)(tid));
965   vg_assert(tid >= 1 && tid < VG_N_THREADS);
966   vg_assert(VG_(is_running_thread)(tid));
967
968   /* Adjust esp to point to start of frame; skip back up over
969      sigreturn sequence's "popl %eax" and handler ret addr */
970   tst = VG_(get_ThreadState)(tid);
971   tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
972   /* XXX why does ESP change differ from rt_sigreturn case below? */
973
974   /* This is only so that the EIP is (might be) useful to report if
975      something goes wrong in the sigreturn */
976   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
977
978   /* Restore register state from frame and remove it */
979   VG_(sigframe_destroy)(tid, False);
980
981   /* Tell the driver not to update the guest state with the "result",
982      and set a bogus result to keep it happy. */
983   *flags |= SfNoWriteResult;
984   SET_STATUS_Success(0);
985
986   /* Check to see if any signals arose as a result of this. */
987   *flags |= SfPollAfter;
988}
989
990PRE(sys_rt_sigreturn)
991{
992   /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
993      an explanation of what follows. */
994
995   ThreadState* tst;
996   PRINT("sys_rt_sigreturn ( )");
997
998   vg_assert(VG_(is_valid_tid)(tid));
999   vg_assert(tid >= 1 && tid < VG_N_THREADS);
1000   vg_assert(VG_(is_running_thread)(tid));
1001
1002   /* Adjust esp to point to start of frame; skip back up over handler
1003      ret addr */
1004   tst = VG_(get_ThreadState)(tid);
1005   tst->arch.vex.guest_ESP -= sizeof(Addr);
1006   /* XXX why does ESP change differ from sigreturn case above? */
1007
1008   /* This is only so that the EIP is (might be) useful to report if
1009      something goes wrong in the sigreturn */
1010   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1011
1012   /* Restore register state from frame and remove it */
1013   VG_(sigframe_destroy)(tid, True);
1014
1015   /* Tell the driver not to update the guest state with the "result",
1016      and set a bogus result to keep it happy. */
1017   *flags |= SfNoWriteResult;
1018   SET_STATUS_Success(0);
1019
1020   /* Check to see if any signals arose as a result of this. */
1021   *flags |= SfPollAfter;
1022}
1023
1024PRE(sys_modify_ldt)
1025{
1026   PRINT("sys_modify_ldt ( %ld, %#lx, %lu )", SARG1, ARG2, ARG3);
1027   PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1028                 unsigned long, bytecount);
1029
1030   if (ARG1 == 0) {
1031      /* read the LDT into ptr */
1032      PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1033   }
1034   if (ARG1 == 1 || ARG1 == 0x11) {
1035      /* write the LDT with the entry pointed at by ptr */
1036      PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1037   }
1038   /* "do" the syscall ourselves; the kernel never sees it */
1039   SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1040
1041   if (ARG1 == 0 && SUCCESS && RES > 0) {
1042      POST_MEM_WRITE( ARG2, RES );
1043   }
1044}
1045
1046PRE(sys_set_thread_area)
1047{
1048   PRINT("sys_set_thread_area ( %#lx )", ARG1);
1049   PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1050   PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1051
1052   /* "do" the syscall ourselves; the kernel never sees it */
1053   SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1054}
1055
1056PRE(sys_get_thread_area)
1057{
1058   PRINT("sys_get_thread_area ( %#lx )", ARG1);
1059   PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1060   PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1061
1062   /* "do" the syscall ourselves; the kernel never sees it */
1063   SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1064
1065   if (SUCCESS) {
1066      POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1067   }
1068}
1069
1070// Parts of this are x86-specific, but the *PEEK* cases are generic.
1071//
1072// ARG3 is only used for pointers into the traced process's address
1073// space and for offsets into the traced process's struct
1074// user_regs_struct. It is never a pointer into this process's memory
1075// space, and we should therefore not check anything it points to.
1076PRE(sys_ptrace)
1077{
1078   PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", SARG1, SARG2, ARG3, ARG4);
1079   PRE_REG_READ4(int, "ptrace",
1080                 long, request, long, pid, unsigned long, addr,
1081                 unsigned long, data);
1082   switch (ARG1) {
1083   case VKI_PTRACE_PEEKTEXT:
1084   case VKI_PTRACE_PEEKDATA:
1085   case VKI_PTRACE_PEEKUSR:
1086      PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1087		     sizeof (long));
1088      break;
1089   case VKI_PTRACE_GETREGS:
1090      PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1091		     sizeof (struct vki_user_regs_struct));
1092      break;
1093   case VKI_PTRACE_GETFPREGS:
1094      PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1095		     sizeof (struct vki_user_i387_struct));
1096      break;
1097   case VKI_PTRACE_GETFPXREGS:
1098      PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1099                     sizeof(struct vki_user_fxsr_struct) );
1100      break;
1101   case VKI_PTRACE_GET_THREAD_AREA:
1102      PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
1103                     sizeof(struct vki_user_desc) );
1104      break;
1105   case VKI_PTRACE_SETREGS:
1106      PRE_MEM_READ( "ptrace(setregs)", ARG4,
1107		     sizeof (struct vki_user_regs_struct));
1108      break;
1109   case VKI_PTRACE_SETFPREGS:
1110      PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1111		     sizeof (struct vki_user_i387_struct));
1112      break;
1113   case VKI_PTRACE_SETFPXREGS:
1114      PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1115                     sizeof(struct vki_user_fxsr_struct) );
1116      break;
1117   case VKI_PTRACE_SET_THREAD_AREA:
1118      PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
1119                     sizeof(struct vki_user_desc) );
1120      break;
1121   case VKI_PTRACE_GETEVENTMSG:
1122      PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1123      break;
1124   case VKI_PTRACE_GETSIGINFO:
1125      PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1126      break;
1127   case VKI_PTRACE_SETSIGINFO:
1128      PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1129      break;
1130   case VKI_PTRACE_GETREGSET:
1131      ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
1132      break;
1133   case VKI_PTRACE_SETREGSET:
1134      ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
1135      break;
1136   default:
1137      break;
1138   }
1139}
1140
1141POST(sys_ptrace)
1142{
1143   switch (ARG1) {
1144   case VKI_PTRACE_PEEKTEXT:
1145   case VKI_PTRACE_PEEKDATA:
1146   case VKI_PTRACE_PEEKUSR:
1147      POST_MEM_WRITE( ARG4, sizeof (long));
1148      break;
1149   case VKI_PTRACE_GETREGS:
1150      POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1151      break;
1152   case VKI_PTRACE_GETFPREGS:
1153      POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1154      break;
1155   case VKI_PTRACE_GETFPXREGS:
1156      POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1157      break;
1158   case VKI_PTRACE_GET_THREAD_AREA:
1159      POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
1160      break;
1161   case VKI_PTRACE_GETEVENTMSG:
1162      POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1163      break;
1164   case VKI_PTRACE_GETSIGINFO:
1165      /* XXX: This is a simplification. Different parts of the
1166       * siginfo_t are valid depending on the type of signal.
1167       */
1168      POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1169      break;
1170   case VKI_PTRACE_GETREGSET:
1171      ML_(linux_POST_getregset)(tid, ARG3, ARG4);
1172      break;
1173   default:
1174      break;
1175   }
1176}
1177
1178PRE(old_mmap)
1179{
1180   /* struct mmap_arg_struct {
1181         unsigned long addr;
1182         unsigned long len;
1183         unsigned long prot;
1184         unsigned long flags;
1185         unsigned long fd;
1186         unsigned long offset;
1187   }; */
1188   UWord a1, a2, a3, a4, a5, a6;
1189   SysRes r;
1190
1191   UWord* args = (UWord*)ARG1;
1192   PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1193   PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1194
1195   a1 = args[1-1];
1196   a2 = args[2-1];
1197   a3 = args[3-1];
1198   a4 = args[4-1];
1199   a5 = args[5-1];
1200   a6 = args[6-1];
1201
1202   PRINT("old_mmap ( %#lx, %lu, %ld, %ld, %ld, %ld )",
1203         a1, a2, (Word)a3, (Word)a4, (Word)a5, (Word)a6 );
1204
1205   r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1206   SET_STATUS_from_SysRes(r);
1207}
1208
1209PRE(sys_mmap2)
1210{
1211   SysRes r;
1212
1213   // Exactly like old_mmap() except:
1214   //  - all 6 args are passed in regs, rather than in a memory-block.
1215   //  - the file offset is specified in pagesize units rather than bytes,
1216   //    so that it can be used for files bigger than 2^32 bytes.
1217   // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
1218   // 4K-sized.  Assert that the page size is 4K here for safety.
1219   vg_assert(VKI_PAGE_SIZE == 4096);
1220   PRINT("sys_mmap2 ( %#lx, %lu, %lu, %lu, %lu, %lu )",
1221         ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
1222   PRE_REG_READ6(long, "mmap2",
1223                 unsigned long, start, unsigned long, length,
1224                 unsigned long, prot,  unsigned long, flags,
1225                 unsigned long, fd,    unsigned long, offset);
1226
1227   r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1228                                       4096 * (Off64T)ARG6 );
1229   SET_STATUS_from_SysRes(r);
1230}
1231
1232// XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1233// applicable to every architecture -- I think only to 32-bit archs.
1234// We're going to need something like linux/core_os32.h for such
1235// things, eventually, I think.  --njn
1236PRE(sys_lstat64)
1237{
1238   PRINT("sys_lstat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1239   PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1240   PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1241   PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1242}
1243
1244POST(sys_lstat64)
1245{
1246   vg_assert(SUCCESS);
1247   if (RES == 0) {
1248      POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1249   }
1250}
1251
1252PRE(sys_stat64)
1253{
1254   FUSE_COMPATIBLE_MAY_BLOCK();
1255   PRINT("sys_stat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1256   PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1257   PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1258   PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1259}
1260
1261POST(sys_stat64)
1262{
1263   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1264}
1265
1266PRE(sys_fstatat64)
1267{
1268   FUSE_COMPATIBLE_MAY_BLOCK();
1269   // ARG4 =  int flags;  Flags are or'ed together, therefore writing them
1270   // as a hex constant is more meaningful.
1271   PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx, %#lx )",
1272         SARG1, ARG2, (HChar*)ARG2, ARG3, ARG4);
1273   PRE_REG_READ4(long, "fstatat64",
1274                 int, dfd, char *, file_name, struct stat64 *, buf, int, flags);
1275   PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1276   PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1277}
1278
1279POST(sys_fstatat64)
1280{
1281   POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1282}
1283
1284PRE(sys_fstat64)
1285{
1286   PRINT("sys_fstat64 ( %lu, %#lx )", ARG1, ARG2);
1287   PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1288   PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1289}
1290
1291POST(sys_fstat64)
1292{
1293   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1294}
1295
1296/* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
1297   identical version. */
1298PRE(sys_sigsuspend)
1299{
1300   /* The C library interface to sigsuspend just takes a pointer to
1301      a signal mask but this system call has three arguments - the first
1302      two don't appear to be used by the kernel and are always passed as
1303      zero by glibc and the third is the first word of the signal mask
1304      so only 32 signals are supported.
1305
1306      In fact glibc normally uses rt_sigsuspend if it is available as
1307      that takes a pointer to the signal mask so supports more signals.
1308    */
1309   *flags |= SfMayBlock;
1310   PRINT("sys_sigsuspend ( %ld, %ld, %lu )", SARG1, SARG2, ARG3 );
1311   PRE_REG_READ3(int, "sigsuspend",
1312                 int, history0, int, history1,
1313                 vki_old_sigset_t, mask);
1314}
1315
1316PRE(sys_vm86old)
1317{
1318   PRINT("sys_vm86old ( %#lx )", ARG1);
1319   PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1320   PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1321}
1322
1323POST(sys_vm86old)
1324{
1325   POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1326}
1327
1328PRE(sys_vm86)
1329{
1330   PRINT("sys_vm86 ( %lu, %#lx )", ARG1, ARG2);
1331   PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1332   if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1333      PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1334}
1335
1336POST(sys_vm86)
1337{
1338   if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1339      POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1340}
1341
1342
1343/* ---------------------------------------------------------------
1344   PRE/POST wrappers for x86/Linux-variant specific syscalls
1345   ------------------------------------------------------------ */
1346
1347PRE(sys_syscall223)
1348{
1349   Int err;
1350
1351   /* 223 is used by sys_bproc.  If we're not on a declared bproc
1352      variant, fail in the usual way. */
1353
1354   if (!KernelVariantiS(KernelVariant_bproc, VG_(clo_kernel_variant))) {
1355      PRINT("non-existent syscall! (syscall 223)");
1356      PRE_REG_READ0(long, "ni_syscall(223)");
1357      SET_STATUS_Failure( VKI_ENOSYS );
1358      return;
1359   }
1360
1361   err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1362                                           ARG4, ARG5, ARG6 );
1363   if (err) {
1364      SET_STATUS_Failure( err );
1365      return;
1366   }
1367   /* Let it go through. */
1368   *flags |= SfMayBlock; /* who knows?  play safe. */
1369}
1370
1371POST(sys_syscall223)
1372{
1373   ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1374                                      ARG4, ARG5, ARG6 );
1375}
1376
1377#undef PRE
1378#undef POST
1379
1380
1381/* ---------------------------------------------------------------------
1382   The x86/Linux syscall table
1383   ------------------------------------------------------------------ */
1384
1385/* Add an x86-linux specific wrapper to a syscall table. */
1386#define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1387#define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1388
1389
1390// This table maps from __NR_xxx syscall numbers (from
1391// linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1392// wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1393//
1394// For those syscalls not handled by Valgrind, the annotation indicate its
1395// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1396// (unknown).
1397
1398static SyscallTableEntry syscall_table[] = {
1399//zz    //   (restart_syscall)                             // 0
1400   GENX_(__NR_exit,              sys_exit),           // 1
1401   GENX_(__NR_fork,              sys_fork),           // 2
1402   GENXY(__NR_read,              sys_read),           // 3
1403   GENX_(__NR_write,             sys_write),          // 4
1404
1405   GENXY(__NR_open,              sys_open),           // 5
1406   GENXY(__NR_close,             sys_close),          // 6
1407   GENXY(__NR_waitpid,           sys_waitpid),        // 7
1408   GENXY(__NR_creat,             sys_creat),          // 8
1409   GENX_(__NR_link,              sys_link),           // 9
1410
1411   GENX_(__NR_unlink,            sys_unlink),         // 10
1412   GENX_(__NR_execve,            sys_execve),         // 11
1413   GENX_(__NR_chdir,             sys_chdir),          // 12
1414   GENXY(__NR_time,              sys_time),           // 13
1415   GENX_(__NR_mknod,             sys_mknod),          // 14
1416
1417   GENX_(__NR_chmod,             sys_chmod),          // 15
1418//zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
1419   GENX_(__NR_break,             sys_ni_syscall),     // 17
1420//zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
1421   LINX_(__NR_lseek,             sys_lseek),          // 19
1422
1423   GENX_(__NR_getpid,            sys_getpid),         // 20
1424   LINX_(__NR_mount,             sys_mount),          // 21
1425   LINX_(__NR_umount,            sys_oldumount),      // 22
1426   LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
1427   LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
1428
1429   LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
1430   PLAXY(__NR_ptrace,            sys_ptrace),         // 26
1431   GENX_(__NR_alarm,             sys_alarm),          // 27
1432//zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
1433   GENX_(__NR_pause,             sys_pause),          // 29
1434
1435   LINX_(__NR_utime,             sys_utime),          // 30
1436   GENX_(__NR_stty,              sys_ni_syscall),     // 31
1437   GENX_(__NR_gtty,              sys_ni_syscall),     // 32
1438   GENX_(__NR_access,            sys_access),         // 33
1439   GENX_(__NR_nice,              sys_nice),           // 34
1440
1441   GENX_(__NR_ftime,             sys_ni_syscall),     // 35
1442   GENX_(__NR_sync,              sys_sync),           // 36
1443   GENX_(__NR_kill,              sys_kill),           // 37
1444   GENX_(__NR_rename,            sys_rename),         // 38
1445   GENX_(__NR_mkdir,             sys_mkdir),          // 39
1446
1447   GENX_(__NR_rmdir,             sys_rmdir),          // 40
1448   GENXY(__NR_dup,               sys_dup),            // 41
1449   LINXY(__NR_pipe,              sys_pipe),           // 42
1450   GENXY(__NR_times,             sys_times),          // 43
1451   GENX_(__NR_prof,              sys_ni_syscall),     // 44
1452//zz
1453   GENX_(__NR_brk,               sys_brk),            // 45
1454   LINX_(__NR_setgid,            sys_setgid16),       // 46
1455   LINX_(__NR_getgid,            sys_getgid16),       // 47
1456//zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
1457   LINX_(__NR_geteuid,           sys_geteuid16),      // 49
1458
1459   LINX_(__NR_getegid,           sys_getegid16),      // 50
1460   GENX_(__NR_acct,              sys_acct),           // 51
1461   LINX_(__NR_umount2,           sys_umount),         // 52
1462   GENX_(__NR_lock,              sys_ni_syscall),     // 53
1463   LINXY(__NR_ioctl,             sys_ioctl),          // 54
1464
1465   LINXY(__NR_fcntl,             sys_fcntl),          // 55
1466   GENX_(__NR_mpx,               sys_ni_syscall),     // 56
1467   GENX_(__NR_setpgid,           sys_setpgid),        // 57
1468   GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
1469//zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
1470//zz
1471   GENX_(__NR_umask,             sys_umask),          // 60
1472   GENX_(__NR_chroot,            sys_chroot),         // 61
1473//zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
1474   GENXY(__NR_dup2,              sys_dup2),           // 63
1475   GENX_(__NR_getppid,           sys_getppid),        // 64
1476
1477   GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
1478   GENX_(__NR_setsid,            sys_setsid),         // 66
1479   LINXY(__NR_sigaction,         sys_sigaction),      // 67
1480//zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
1481//zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
1482//zz
1483   LINX_(__NR_setreuid,          sys_setreuid16),     // 70
1484   LINX_(__NR_setregid,          sys_setregid16),     // 71
1485   PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
1486   LINXY(__NR_sigpending,        sys_sigpending),     // 73
1487   GENX_(__NR_sethostname,       sys_sethostname),    // 74
1488//zz
1489   GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
1490   GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
1491   GENXY(__NR_getrusage,         sys_getrusage),      // 77
1492   GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
1493   GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
1494
1495   LINXY(__NR_getgroups,         sys_getgroups16),    // 80
1496   LINX_(__NR_setgroups,         sys_setgroups16),    // 81
1497   PLAX_(__NR_select,            old_select),         // 82
1498   GENX_(__NR_symlink,           sys_symlink),        // 83
1499//zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
1500//zz
1501   GENX_(__NR_readlink,          sys_readlink),       // 85
1502//zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
1503//zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
1504//zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
1505//zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
1506//zz
1507   PLAX_(__NR_mmap,              old_mmap),           // 90
1508   GENXY(__NR_munmap,            sys_munmap),         // 91
1509   GENX_(__NR_truncate,          sys_truncate),       // 92
1510   GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
1511   GENX_(__NR_fchmod,            sys_fchmod),         // 94
1512
1513   LINX_(__NR_fchown,            sys_fchown16),       // 95
1514   GENX_(__NR_getpriority,       sys_getpriority),    // 96
1515   GENX_(__NR_setpriority,       sys_setpriority),    // 97
1516   GENX_(__NR_profil,            sys_ni_syscall),     // 98
1517   GENXY(__NR_statfs,            sys_statfs),         // 99
1518
1519   GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
1520   LINX_(__NR_ioperm,            sys_ioperm),         // 101
1521   LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
1522   LINXY(__NR_syslog,            sys_syslog),         // 103
1523   GENXY(__NR_setitimer,         sys_setitimer),      // 104
1524
1525   GENXY(__NR_getitimer,         sys_getitimer),      // 105
1526   GENXY(__NR_stat,              sys_newstat),        // 106
1527   GENXY(__NR_lstat,             sys_newlstat),       // 107
1528   GENXY(__NR_fstat,             sys_newfstat),       // 108
1529//zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
1530//zz
1531   GENX_(__NR_iopl,              sys_iopl),           // 110
1532   LINX_(__NR_vhangup,           sys_vhangup),        // 111
1533   GENX_(__NR_idle,              sys_ni_syscall),     // 112
1534   PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
1535   GENXY(__NR_wait4,             sys_wait4),          // 114
1536//zz
1537//zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
1538   LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
1539   LINXY(__NR_ipc,               sys_ipc),            // 117
1540   GENX_(__NR_fsync,             sys_fsync),          // 118
1541   PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
1542
1543   PLAX_(__NR_clone,             sys_clone),          // 120
1544//zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
1545   GENXY(__NR_uname,             sys_newuname),       // 122
1546   PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
1547   LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
1548
1549   GENXY(__NR_mprotect,          sys_mprotect),       // 125
1550   LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
1551//zz    // Nb: create_module() was removed 2.4-->2.6
1552   GENX_(__NR_create_module,     sys_ni_syscall),     // 127
1553   LINX_(__NR_init_module,       sys_init_module),    // 128
1554   LINX_(__NR_delete_module,     sys_delete_module),  // 129
1555//zz
1556//zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
1557   GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
1558   LINX_(__NR_quotactl,          sys_quotactl),       // 131
1559   GENX_(__NR_getpgid,           sys_getpgid),        // 132
1560   GENX_(__NR_fchdir,            sys_fchdir),         // 133
1561//zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
1562//zz
1563//zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
1564   LINX_(__NR_personality,       sys_personality),    // 136
1565   GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
1566   LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
1567   LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
1568
1569   LINXY(__NR__llseek,           sys_llseek),         // 140
1570   GENXY(__NR_getdents,          sys_getdents),       // 141
1571   GENX_(__NR__newselect,        sys_select),         // 142
1572   GENX_(__NR_flock,             sys_flock),          // 143
1573   GENX_(__NR_msync,             sys_msync),          // 144
1574
1575   GENXY(__NR_readv,             sys_readv),          // 145
1576   GENX_(__NR_writev,            sys_writev),         // 146
1577   GENX_(__NR_getsid,            sys_getsid),         // 147
1578   GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
1579   LINXY(__NR__sysctl,           sys_sysctl),         // 149
1580
1581   GENX_(__NR_mlock,             sys_mlock),          // 150
1582   GENX_(__NR_munlock,           sys_munlock),        // 151
1583   GENX_(__NR_mlockall,          sys_mlockall),       // 152
1584   LINX_(__NR_munlockall,        sys_munlockall),     // 153
1585   LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
1586
1587   LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
1588   LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
1589   LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
1590   LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
1591   LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
1592
1593   LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
1594   LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
1595   GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
1596   GENX_(__NR_mremap,            sys_mremap),         // 163
1597   LINX_(__NR_setresuid,         sys_setresuid16),    // 164
1598
1599   LINXY(__NR_getresuid,         sys_getresuid16),    // 165
1600   PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
1601   GENX_(__NR_query_module,      sys_ni_syscall),     // 167
1602   GENXY(__NR_poll,              sys_poll),           // 168
1603//zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
1604//zz
1605   LINX_(__NR_setresgid,         sys_setresgid16),    // 170
1606   LINXY(__NR_getresgid,         sys_getresgid16),    // 171
1607   LINXY(__NR_prctl,             sys_prctl),          // 172
1608   PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
1609   LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
1610
1611   LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
1612   LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
1613   LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
1614   LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
1615   LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
1616
1617   GENXY(__NR_pread64,           sys_pread64),        // 180
1618   GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
1619   LINX_(__NR_chown,             sys_chown16),        // 182
1620   GENXY(__NR_getcwd,            sys_getcwd),         // 183
1621   LINXY(__NR_capget,            sys_capget),         // 184
1622
1623   LINX_(__NR_capset,            sys_capset),         // 185
1624   GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
1625   LINXY(__NR_sendfile,          sys_sendfile),       // 187
1626   GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
1627   GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
1628
1629   // Nb: we treat vfork as fork
1630   GENX_(__NR_vfork,             sys_fork),           // 190
1631   GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
1632   PLAX_(__NR_mmap2,             sys_mmap2),          // 192
1633   GENX_(__NR_truncate64,        sys_truncate64),     // 193
1634   GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
1635
1636   PLAXY(__NR_stat64,            sys_stat64),         // 195
1637   PLAXY(__NR_lstat64,           sys_lstat64),        // 196
1638   PLAXY(__NR_fstat64,           sys_fstat64),        // 197
1639   GENX_(__NR_lchown32,          sys_lchown),         // 198
1640   GENX_(__NR_getuid32,          sys_getuid),         // 199
1641
1642   GENX_(__NR_getgid32,          sys_getgid),         // 200
1643   GENX_(__NR_geteuid32,         sys_geteuid),        // 201
1644   GENX_(__NR_getegid32,         sys_getegid),        // 202
1645   GENX_(__NR_setreuid32,        sys_setreuid),       // 203
1646   GENX_(__NR_setregid32,        sys_setregid),       // 204
1647
1648   GENXY(__NR_getgroups32,       sys_getgroups),      // 205
1649   GENX_(__NR_setgroups32,       sys_setgroups),      // 206
1650   GENX_(__NR_fchown32,          sys_fchown),         // 207
1651   LINX_(__NR_setresuid32,       sys_setresuid),      // 208
1652   LINXY(__NR_getresuid32,       sys_getresuid),      // 209
1653
1654   LINX_(__NR_setresgid32,       sys_setresgid),      // 210
1655   LINXY(__NR_getresgid32,       sys_getresgid),      // 211
1656   GENX_(__NR_chown32,           sys_chown),          // 212
1657   GENX_(__NR_setuid32,          sys_setuid),         // 213
1658   GENX_(__NR_setgid32,          sys_setgid),         // 214
1659
1660   LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
1661   LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
1662   LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
1663   GENXY(__NR_mincore,           sys_mincore),        // 218
1664   GENX_(__NR_madvise,           sys_madvise),        // 219
1665
1666   GENXY(__NR_getdents64,        sys_getdents64),     // 220
1667   LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
1668   GENX_(222,                    sys_ni_syscall),     // 222
1669   PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
1670   LINX_(__NR_gettid,            sys_gettid),         // 224
1671
1672   LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
1673   LINX_(__NR_setxattr,          sys_setxattr),       // 226
1674   LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
1675   LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
1676   LINXY(__NR_getxattr,          sys_getxattr),       // 229
1677
1678   LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
1679   LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
1680   LINXY(__NR_listxattr,         sys_listxattr),      // 232
1681   LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
1682   LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
1683
1684   LINX_(__NR_removexattr,       sys_removexattr),    // 235
1685   LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
1686   LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
1687   LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
1688   LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
1689
1690   LINXY(__NR_futex,             sys_futex),             // 240
1691   LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
1692   LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
1693   PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
1694   PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
1695
1696   LINXY(__NR_io_setup,          sys_io_setup),       // 245
1697   LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
1698   LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
1699   LINX_(__NR_io_submit,         sys_io_submit),      // 248
1700   LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
1701
1702   LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
1703   GENX_(251,                    sys_ni_syscall),     // 251
1704   LINX_(__NR_exit_group,        sys_exit_group),     // 252
1705   LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
1706   LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
1707
1708   LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
1709   LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
1710//zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
1711   LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
1712   LINXY(__NR_timer_create,      sys_timer_create),      // 259
1713
1714   LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
1715   LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
1716   LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
1717   LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
1718   LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
1719
1720   LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
1721   LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
1722   LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
1723   GENXY(__NR_statfs64,          sys_statfs64),       // 268
1724   GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
1725
1726   LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
1727   GENX_(__NR_utimes,            sys_utimes),         // 271
1728   LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
1729   GENX_(__NR_vserver,           sys_ni_syscall),     // 273
1730   LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
1731
1732   LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
1733   LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
1734   LINXY(__NR_mq_open,           sys_mq_open),        // 277
1735   LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
1736   LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
1737
1738   LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
1739   LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
1740   LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
1741   GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
1742   LINXY(__NR_waitid,            sys_waitid),         // 284
1743
1744   GENX_(285,                    sys_ni_syscall),     // 285
1745   LINX_(__NR_add_key,           sys_add_key),        // 286
1746   LINX_(__NR_request_key,       sys_request_key),    // 287
1747   LINXY(__NR_keyctl,            sys_keyctl),         // 288
1748   LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
1749
1750   LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
1751   LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
1752   LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
1753   LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
1754//   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
1755
1756   LINXY(__NR_openat,		 sys_openat),           // 295
1757   LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
1758   LINX_(__NR_mknodat,		 sys_mknodat),          // 297
1759   LINX_(__NR_fchownat,		 sys_fchownat),         // 298
1760   LINX_(__NR_futimesat,	 sys_futimesat),        // 299
1761
1762   PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
1763   LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
1764   LINX_(__NR_renameat,		 sys_renameat),         // 302
1765   LINX_(__NR_linkat,		 sys_linkat),           // 303
1766   LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
1767
1768   LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
1769   LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
1770   LINX_(__NR_faccessat,	 sys_faccessat),        // 307
1771   LINX_(__NR_pselect6,		 sys_pselect6),         // 308
1772   LINXY(__NR_ppoll,		 sys_ppoll),            // 309
1773
1774   LINX_(__NR_unshare,		 sys_unshare),          // 310
1775   LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
1776   LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
1777   LINX_(__NR_splice,            sys_splice),           // 313
1778   LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
1779
1780   LINX_(__NR_tee,               sys_tee),              // 315
1781   LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
1782   LINXY(__NR_move_pages,        sys_move_pages),       // 317
1783   LINXY(__NR_getcpu,            sys_getcpu),           // 318
1784   LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
1785
1786   LINX_(__NR_utimensat,         sys_utimensat),        // 320
1787   LINXY(__NR_signalfd,          sys_signalfd),         // 321
1788   LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
1789   LINXY(__NR_eventfd,           sys_eventfd),          // 323
1790   LINX_(__NR_fallocate,         sys_fallocate),        // 324
1791
1792   LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
1793   LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
1794   LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
1795   LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
1796   LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
1797
1798   LINXY(__NR_dup3,              sys_dup3),             // 330
1799   LINXY(__NR_pipe2,             sys_pipe2),            // 331
1800   LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
1801   LINXY(__NR_preadv,            sys_preadv),           // 333
1802   LINX_(__NR_pwritev,           sys_pwritev),          // 334
1803
1804   LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
1805   LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
1806   LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
1807   LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
1808   LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
1809
1810   LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
1811   LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
1812   LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
1813   LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
1814   LINX_(__NR_syncfs,            sys_syncfs),           // 344
1815
1816   LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
1817//   LINX_(__NR_setns,             sys_ni_syscall),       // 346
1818   LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
1819   LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 348
1820   LINX_(__NR_kcmp,              sys_kcmp),             // 349
1821
1822//   LIN__(__NR_finit_module,      sys_ni_syscall),       // 350
1823//   LIN__(__NR_sched_setattr,     sys_ni_syscall),       // 351
1824//   LIN__(__NR_sched_getattr,     sys_ni_syscall),       // 352
1825//   LIN__(__NR_renameat2,         sys_ni_syscall),       // 353
1826//   LIN__(__NR_seccomp,           sys_ni_syscall),       // 354
1827
1828   LINXY(__NR_getrandom,         sys_getrandom),        // 355
1829   LINXY(__NR_memfd_create,      sys_memfd_create)      // 356
1830//   LIN__(__NR_bpf,               sys_ni_syscall)        // 357
1831};
1832
1833SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
1834{
1835   const UInt syscall_table_size
1836      = sizeof(syscall_table) / sizeof(syscall_table[0]);
1837
1838   /* Is it in the contiguous initial section of the table? */
1839   if (sysno < syscall_table_size) {
1840      SyscallTableEntry* sys = &syscall_table[sysno];
1841      if (sys->before == NULL)
1842         return NULL; /* no entry */
1843      else
1844         return sys;
1845   }
1846
1847   /* Can't find a wrapper */
1848   return NULL;
1849}
1850
1851#endif // defined(VGP_x86_linux)
1852
1853/*--------------------------------------------------------------------*/
1854/*--- end                                                          ---*/
1855/*--------------------------------------------------------------------*/
1856