1
2/*--------------------------------------------------------------------*/
3/*--- Wrappers for generic Unix system calls                       ---*/
4/*---                                            syswrap-generic.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2011 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#if defined(VGO_linux) || defined(VGO_darwin)
33
34#include "pub_core_basics.h"
35#include "pub_core_vki.h"
36#include "pub_core_vkiscnums.h"
37#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
38#include "pub_core_threadstate.h"
39#include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40#include "pub_core_aspacemgr.h"
41#include "pub_core_transtab.h"      // VG_(discard_translations)
42#include "pub_core_xarray.h"
43#include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44#include "pub_core_debuglog.h"
45#include "pub_core_errormgr.h"
46#include "pub_tool_gdbserver.h"     // VG_(gdbserver)
47#include "pub_core_libcbase.h"
48#include "pub_core_libcassert.h"
49#include "pub_core_libcfile.h"
50#include "pub_core_libcprint.h"
51#include "pub_core_libcproc.h"
52#include "pub_core_libcsignal.h"
53#include "pub_core_machine.h"       // VG_(get_SP)
54#include "pub_core_mallocfree.h"
55#include "pub_core_options.h"
56#include "pub_core_scheduler.h"
57#include "pub_core_signals.h"
58#include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59#include "pub_core_syscall.h"
60#include "pub_core_syswrap.h"
61#include "pub_core_tooliface.h"
62#include "pub_core_ume.h"
63
64#include "priv_types_n_macros.h"
65#include "priv_syswrap-generic.h"
66
67#include "config.h"
68
69
70/* Returns True iff address range is something the client can
71   plausibly mess with: all of it is either already belongs to the
72   client or is free or a reservation. */
73
74Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
75                                   const Char *syscallname)
76{
77   Bool ret;
78
79   if (size == 0)
80      return True;
81
82   ret = VG_(am_is_valid_for_client_or_free_or_resvn)
83            (start,size,VKI_PROT_NONE);
84
85   if (0)
86      VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
87		  syscallname, start, start+size-1, (Int)ret);
88
89   if (!ret && syscallname != NULL) {
90      VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
91                               "to modify addresses %#lx-%#lx\n",
92                               syscallname, start, start+size-1);
93      if (VG_(clo_verbosity) > 1) {
94         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
95      }
96   }
97
98   return ret;
99}
100
101
102Bool ML_(client_signal_OK)(Int sigNo)
103{
104   /* signal 0 is OK for kill */
105   Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
106
107   //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
108
109   return ret;
110}
111
112
113/* Handy small function to help stop wrappers from segfaulting when
114   presented with bogus client addresses.  Is not used for generating
115   user-visible errors. */
116
117Bool ML_(safe_to_deref) ( void* start, SizeT size )
118{
119   return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
120}
121
122
123/* ---------------------------------------------------------------------
124   Doing mmap, mremap
125   ------------------------------------------------------------------ */
126
127/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
128   munmap, mprotect (and mremap??) work at the page level.  So addresses
129   and lengths must be adjusted for this. */
130
131/* Mash around start and length so that the area exactly covers
132   an integral number of pages.  If we don't do that, memcheck's
133   idea of addressible memory diverges from that of the
134   kernel's, which causes the leak detector to crash. */
135static
136void page_align_addr_and_len( Addr* a, SizeT* len)
137{
138   Addr ra;
139
140   ra = VG_PGROUNDDN(*a);
141   *len = VG_PGROUNDUP(*a + *len) - ra;
142   *a = ra;
143}
144
145static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
146                                UInt flags, Int fd, Off64T offset)
147{
148   Bool d;
149
150   /* 'a' is the return value from a real kernel mmap, hence: */
151   vg_assert(VG_IS_PAGE_ALIGNED(a));
152   /* whereas len is whatever the syscall supplied.  So: */
153   len = VG_PGROUNDUP(len);
154
155   d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
156
157   if (d)
158      VG_(discard_translations)( (Addr64)a, (ULong)len,
159                                 "notify_core_of_mmap" );
160}
161
162static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
163{
164   SizeT fourgig = (1ULL << 32);
165   SizeT guardpage = 10 * fourgig;
166   Bool rr, ww, xx;
167
168   /* 'a' is the return value from a real kernel mmap, hence: */
169   vg_assert(VG_IS_PAGE_ALIGNED(a));
170   /* whereas len is whatever the syscall supplied.  So: */
171   len = VG_PGROUNDUP(len);
172
173   rr = toBool(prot & VKI_PROT_READ);
174   ww = toBool(prot & VKI_PROT_WRITE);
175   xx = toBool(prot & VKI_PROT_EXEC);
176
177#ifdef VGA_amd64
178   if (len >= fourgig + 2 * guardpage) {
179     VG_(printf)("Valgrind: ignoring NaCl's mmap(84G)\n");
180     return;
181   }
182#endif  // VGA_amd64
183   VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
184}
185
186
187/* When a client mmap has been successfully done, this function must
188   be called.  It notifies both aspacem and the tool of the new
189   mapping.
190
191   JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
192   it is called from is POST(sys_io_setup).  In particular,
193   ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
194   client mmap.  But it doesn't call this function; instead it does the
195   relevant notifications itself.  Here, we just pass di_handle=0 to
196   notify_tool_of_mmap as we have no better information.  But really this
197   function should be done away with; problem is I don't understand what
198   POST(sys_io_setup) does or how it works.
199
200   [However, this function is used lots for Darwin, because
201    ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
202 */
203void
204ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
205                                    UInt flags, Int fd, Off64T offset )
206{
207   // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
208   // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
209   // Should it?  --njn
210   notify_core_of_mmap(a, len, prot, flags, fd, offset);
211   notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
212}
213
214void
215ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
216{
217   Bool d;
218
219   page_align_addr_and_len(&a, &len);
220   d = VG_(am_notify_munmap)(a, len);
221   VG_TRACK( die_mem_munmap, a, len );
222   VG_(di_notify_munmap)( a, len );
223   if (d)
224      VG_(discard_translations)( (Addr64)a, (ULong)len,
225                                 "ML_(notify_core_and_tool_of_munmap)" );
226}
227
228void
229ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
230{
231   Bool rr = toBool(prot & VKI_PROT_READ);
232   Bool ww = toBool(prot & VKI_PROT_WRITE);
233   Bool xx = toBool(prot & VKI_PROT_EXEC);
234   Bool d;
235
236   page_align_addr_and_len(&a, &len);
237   d = VG_(am_notify_mprotect)(a, len, prot);
238   VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
239   VG_(di_notify_mprotect)( a, len, prot );
240   if (d)
241      VG_(discard_translations)( (Addr64)a, (ULong)len,
242                                 "ML_(notify_core_and_tool_of_mprotect)" );
243}
244
245
246
247#if HAVE_MREMAP
248/* Expand (or shrink) an existing mapping, potentially moving it at
249   the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
250*/
251static
252SysRes do_mremap( Addr old_addr, SizeT old_len,
253                  Addr new_addr, SizeT new_len,
254                  UWord flags, ThreadId tid )
255{
256#  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
257
258   Bool      ok, d;
259   NSegment const* old_seg;
260   Addr      advised;
261   Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
262   Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
263
264   if (0)
265      VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
266                  old_addr,old_len,new_addr,new_len,
267                  flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
268                  flags & VKI_MREMAP_FIXED ? "FIXED" : "");
269   if (0)
270      VG_(am_show_nsegments)(0, "do_remap: before");
271
272   if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
273      goto eINVAL;
274
275   if (!VG_IS_PAGE_ALIGNED(old_addr))
276      goto eINVAL;
277
278   old_len = VG_PGROUNDUP(old_len);
279   new_len = VG_PGROUNDUP(new_len);
280
281   if (new_len == 0)
282      goto eINVAL;
283
284   /* kernel doesn't reject this, but we do. */
285   if (old_len == 0)
286      goto eINVAL;
287
288   /* reject wraparounds */
289   if (old_addr + old_len < old_addr)
290      goto eINVAL;
291   if (f_fixed == True && new_addr + new_len < new_len)
292      goto eINVAL;
293
294   /* kernel rejects all fixed, no-move requests (which are
295      meaningless). */
296   if (f_fixed == True && f_maymove == False)
297      goto eINVAL;
298
299   /* Stay away from non-client areas. */
300   if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
301      goto eINVAL;
302
303   /* In all remaining cases, if the old range does not fall within a
304      single segment, fail. */
305   old_seg = VG_(am_find_nsegment)( old_addr );
306   if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
307      goto eINVAL;
308   if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
309      goto eINVAL;
310
311   vg_assert(old_len > 0);
312   vg_assert(new_len > 0);
313   vg_assert(VG_IS_PAGE_ALIGNED(old_len));
314   vg_assert(VG_IS_PAGE_ALIGNED(new_len));
315   vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
316
317   /* There are 3 remaining cases:
318
319      * maymove == False
320
321        new space has to be at old address, so:
322            - shrink    -> unmap end
323            - same size -> do nothing
324            - grow      -> if can grow in-place, do so, else fail
325
326      * maymove == True, fixed == False
327
328        new space can be anywhere, so:
329            - shrink    -> unmap end
330            - same size -> do nothing
331            - grow      -> if can grow in-place, do so, else
332                           move to anywhere large enough, else fail
333
334      * maymove == True, fixed == True
335
336        new space must be at new address, so:
337
338            - if new address is not page aligned, fail
339            - if new address range overlaps old one, fail
340            - if new address range cannot be allocated, fail
341            - else move to new address range with new size
342            - else fail
343   */
344
345   if (f_maymove == False) {
346      /* new space has to be at old address */
347      if (new_len < old_len)
348         goto shrink_in_place;
349      if (new_len > old_len)
350         goto grow_in_place_or_fail;
351      goto same_in_place;
352   }
353
354   if (f_maymove == True && f_fixed == False) {
355      /* new space can be anywhere */
356      if (new_len < old_len)
357         goto shrink_in_place;
358      if (new_len > old_len)
359         goto grow_in_place_or_move_anywhere_or_fail;
360      goto same_in_place;
361   }
362
363   if (f_maymove == True && f_fixed == True) {
364      /* new space can only be at the new address */
365      if (!VG_IS_PAGE_ALIGNED(new_addr))
366         goto eINVAL;
367      if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
368         /* no overlap */
369      } else {
370         goto eINVAL;
371      }
372      if (new_addr == 0)
373         goto eINVAL;
374         /* VG_(am_get_advisory_client_simple) interprets zero to mean
375            non-fixed, which is not what we want */
376      advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
377      if (!ok || advised != new_addr)
378         goto eNOMEM;
379      ok = VG_(am_relocate_nooverlap_client)
380              ( &d, old_addr, old_len, new_addr, new_len );
381      if (ok) {
382         VG_TRACK( copy_mem_remap, old_addr, new_addr,
383                                   MIN_SIZET(old_len,new_len) );
384         if (new_len > old_len)
385            VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
386                      old_seg->hasR, old_seg->hasW, old_seg->hasX,
387                      0/*di_handle*/ );
388         VG_TRACK(die_mem_munmap, old_addr, old_len);
389         if (d) {
390            VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
391            VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
392         }
393         return VG_(mk_SysRes_Success)( new_addr );
394      }
395      goto eNOMEM;
396   }
397
398   /* end of the 3 cases */
399   /*NOTREACHED*/ vg_assert(0);
400
401  grow_in_place_or_move_anywhere_or_fail:
402   {
403   /* try growing it in-place */
404   Addr   needA = old_addr + old_len;
405   SSizeT needL = new_len - old_len;
406
407   vg_assert(needL > 0);
408   if (needA == 0)
409      goto eINVAL;
410      /* VG_(am_get_advisory_client_simple) interprets zero to mean
411         non-fixed, which is not what we want */
412   advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
413   if (ok) {
414      /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
415         this-or-nothing) is too lenient, and may allow us to trash
416         the next segment along.  So make very sure that the proposed
417         new area really is free.  This is perhaps overly
418         conservative, but it fixes #129866. */
419      NSegment const* segLo = VG_(am_find_nsegment)( needA );
420      NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
421      if (segLo == NULL || segHi == NULL
422          || segLo != segHi || segLo->kind != SkFree)
423         ok = False;
424   }
425   if (ok && advised == needA) {
426      ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
427      if (ok) {
428         VG_TRACK( new_mem_mmap, needA, needL,
429                                 old_seg->hasR,
430                                 old_seg->hasW, old_seg->hasX,
431                                 0/*di_handle*/ );
432         if (d)
433            VG_(discard_translations)( needA, needL, "do_remap(3)" );
434         return VG_(mk_SysRes_Success)( old_addr );
435      }
436   }
437
438   /* that failed.  Look elsewhere. */
439   advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
440   if (ok) {
441      Bool oldR = old_seg->hasR;
442      Bool oldW = old_seg->hasW;
443      Bool oldX = old_seg->hasX;
444      /* assert new area does not overlap old */
445      vg_assert(advised+new_len-1 < old_addr
446                || advised > old_addr+old_len-1);
447      ok = VG_(am_relocate_nooverlap_client)
448              ( &d, old_addr, old_len, advised, new_len );
449      if (ok) {
450         VG_TRACK( copy_mem_remap, old_addr, advised,
451                                   MIN_SIZET(old_len,new_len) );
452         if (new_len > old_len)
453            VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
454                      oldR, oldW, oldX, 0/*di_handle*/ );
455         VG_TRACK(die_mem_munmap, old_addr, old_len);
456         if (d) {
457            VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
458            VG_(discard_translations)( advised, new_len, "do_remap(5)" );
459         }
460         return VG_(mk_SysRes_Success)( advised );
461      }
462   }
463   goto eNOMEM;
464   }
465   /*NOTREACHED*/ vg_assert(0);
466
467  grow_in_place_or_fail:
468   {
469   Addr  needA = old_addr + old_len;
470   SizeT needL = new_len - old_len;
471   if (needA == 0)
472      goto eINVAL;
473      /* VG_(am_get_advisory_client_simple) interprets zero to mean
474         non-fixed, which is not what we want */
475   advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
476   if (ok) {
477      /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
478         this-or-nothing) is too lenient, and may allow us to trash
479         the next segment along.  So make very sure that the proposed
480         new area really is free. */
481      NSegment const* segLo = VG_(am_find_nsegment)( needA );
482      NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
483      if (segLo == NULL || segHi == NULL
484          || segLo != segHi || segLo->kind != SkFree)
485         ok = False;
486   }
487   if (!ok || advised != needA)
488      goto eNOMEM;
489   ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
490   if (!ok)
491      goto eNOMEM;
492   VG_TRACK( new_mem_mmap, needA, needL,
493                           old_seg->hasR, old_seg->hasW, old_seg->hasX,
494                           0/*di_handle*/ );
495   if (d)
496      VG_(discard_translations)( needA, needL, "do_remap(6)" );
497   return VG_(mk_SysRes_Success)( old_addr );
498   }
499   /*NOTREACHED*/ vg_assert(0);
500
501  shrink_in_place:
502   {
503   SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
504   if (sr_isError(sres))
505      return sres;
506   VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
507   if (d)
508      VG_(discard_translations)( old_addr+new_len, old_len-new_len,
509                                 "do_remap(7)" );
510   return VG_(mk_SysRes_Success)( old_addr );
511   }
512   /*NOTREACHED*/ vg_assert(0);
513
514  same_in_place:
515   return VG_(mk_SysRes_Success)( old_addr );
516   /*NOTREACHED*/ vg_assert(0);
517
518  eINVAL:
519   return VG_(mk_SysRes_Error)( VKI_EINVAL );
520  eNOMEM:
521   return VG_(mk_SysRes_Error)( VKI_ENOMEM );
522
523#  undef MIN_SIZET
524}
525#endif /* HAVE_MREMAP */
526
527
528/* ---------------------------------------------------------------------
529   File-descriptor tracking
530   ------------------------------------------------------------------ */
531
532/* One of these is allocated for each open file descriptor.  */
533typedef struct OpenFd
534{
535   Int fd;                        /* The file descriptor */
536   Char *pathname;                /* NULL if not a regular file or unknown */
537   ExeContext *where;             /* NULL if inherited from parent */
538   struct OpenFd *next, *prev;
539} OpenFd;
540
541/* List of allocated file descriptors. */
542static OpenFd *allocated_fds = NULL;
543
544/* Count of open file descriptors. */
545static Int fd_count = 0;
546
547
548/* Note the fact that a file descriptor was just closed. */
549static
550void record_fd_close(Int fd)
551{
552   OpenFd *i = allocated_fds;
553
554   if (fd >= VG_(fd_hard_limit))
555      return;			/* Valgrind internal */
556
557   while(i) {
558      if(i->fd == fd) {
559         if(i->prev)
560            i->prev->next = i->next;
561         else
562            allocated_fds = i->next;
563         if(i->next)
564            i->next->prev = i->prev;
565         if(i->pathname)
566            VG_(arena_free) (VG_AR_CORE, i->pathname);
567         VG_(arena_free) (VG_AR_CORE, i);
568         fd_count--;
569         break;
570      }
571      i = i->next;
572   }
573}
574
575/* Note the fact that a file descriptor was just opened.  If the
576   tid is -1, this indicates an inherited fd.  If the pathname is NULL,
577   this either indicates a non-standard file (i.e. a pipe or socket or
578   some such thing) or that we don't know the filename.  If the fd is
579   already open, then we're probably doing a dup2() to an existing fd,
580   so just overwrite the existing one. */
581void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
582{
583   OpenFd *i;
584
585   if (fd >= VG_(fd_hard_limit))
586      return;			/* Valgrind internal */
587
588   /* Check to see if this fd is already open. */
589   i = allocated_fds;
590   while (i) {
591      if (i->fd == fd) {
592         if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
593         break;
594      }
595      i = i->next;
596   }
597
598   /* Not already one: allocate an OpenFd */
599   if (i == NULL) {
600      i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
601
602      i->prev = NULL;
603      i->next = allocated_fds;
604      if(allocated_fds) allocated_fds->prev = i;
605      allocated_fds = i;
606      fd_count++;
607   }
608
609   i->fd = fd;
610   i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
611   i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
612}
613
614// Record opening of an fd, and find its name.
615void ML_(record_fd_open_named)(ThreadId tid, Int fd)
616{
617   static HChar buf[VKI_PATH_MAX];
618   Char* name;
619   if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
620      name = buf;
621   else
622      name = NULL;
623
624   ML_(record_fd_open_with_given_name)(tid, fd, name);
625}
626
627// Record opening of a nameless fd.
628void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
629{
630   ML_(record_fd_open_with_given_name)(tid, fd, NULL);
631}
632
633static
634Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
635{
636   if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
637      VG_(sprintf)(name, "<unknown>");
638   } else {
639      VG_(sprintf)(name, "%s", sa->sun_path);
640   }
641
642   return name;
643}
644
645static
646Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
647{
648   if (sa == NULL || len == 0) {
649      VG_(sprintf)(name, "<unknown>");
650   } else {
651      UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
652      if (addr == 0) {
653         VG_(sprintf)(name, "<unbound>");
654      } else {
655         VG_(sprintf)(name, "%u.%u.%u.%u:%u",
656                      (addr>>24) & 0xFF, (addr>>16) & 0xFF,
657                      (addr>>8) & 0xFF, addr & 0xFF,
658                      VG_(ntohs)(sa->sin_port));
659      }
660   }
661
662   return name;
663}
664
665/*
666 * Try get some details about a socket.
667 */
668static void
669getsockdetails(Int fd)
670{
671   union u {
672      struct vki_sockaddr a;
673      struct vki_sockaddr_in in;
674      struct vki_sockaddr_un un;
675   } laddr;
676   UInt llen;
677
678   llen = sizeof(laddr);
679   VG_(memset)(&laddr, 0, llen);
680
681   if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
682      switch(laddr.a.sa_family) {
683      case VKI_AF_INET: {
684         static char lname[32];
685         static char pname[32];
686         struct vki_sockaddr_in paddr;
687         UInt plen = sizeof(struct vki_sockaddr_in);
688
689         if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
690            VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
691                         inet2name(&(laddr.in), llen, lname),
692                         inet2name(&paddr, plen, pname));
693         } else {
694            VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
695                         fd, inet2name(&(laddr.in), llen, lname));
696         }
697         return;
698         }
699      case VKI_AF_UNIX: {
700         static char lname[256];
701         VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
702                      unix2name(&(laddr.un), llen, lname));
703         return;
704         }
705      default:
706         VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
707                      laddr.a.sa_family, fd);
708         return;
709      }
710   }
711
712   VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
713}
714
715
716/* Dump out a summary, and a more detailed list, of open file descriptors. */
717void VG_(show_open_fds) (void)
718{
719   OpenFd *i = allocated_fds;
720
721   VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
722
723   while (i) {
724      if (i->pathname) {
725         VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
726                      i->pathname);
727      } else {
728         Int val;
729         UInt len = sizeof(val);
730
731         if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
732             == -1) {
733            VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
734         } else {
735            getsockdetails(i->fd);
736         }
737      }
738
739      if(i->where) {
740         VG_(pp_ExeContext)(i->where);
741         VG_(message)(Vg_UserMsg, "\n");
742      } else {
743         VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
744         VG_(message)(Vg_UserMsg, "\n");
745      }
746
747      i = i->next;
748   }
749
750   VG_(message)(Vg_UserMsg, "\n");
751}
752
753/* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
754   have /proc support compiled in, or a non-Linux kernel), then we need to
755   find out what file descriptors we inherited from our parent process the
756   hard way - by checking each fd in turn. */
757static
758void init_preopened_fds_without_proc_self_fd(void)
759{
760   struct vki_rlimit lim;
761   UInt count;
762   Int i;
763
764   if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
765      /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
766         an arbitrarily high number.  1024 happens to be the limit in
767         the 2.4 Linux kernels. */
768      count = 1024;
769   } else {
770      count = lim.rlim_cur;
771   }
772
773   for (i = 0; i < count; i++)
774      if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
775         ML_(record_fd_open_named)(-1, i);
776}
777
778/* Initialize the list of open file descriptors with the file descriptors
779   we inherited from out parent process. */
780
781void VG_(init_preopened_fds)(void)
782{
783// DDD: should probably use HAVE_PROC here or similar, instead.
784#if defined(VGO_linux)
785   Int ret;
786   struct vki_dirent d;
787   SysRes f;
788
789   f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
790   if (sr_isError(f)) {
791      init_preopened_fds_without_proc_self_fd();
792      return;
793   }
794
795   while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
796      if (ret == -1)
797         goto out;
798
799      if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
800         Char* s;
801         Int fno = VG_(strtoll10)(d.d_name, &s);
802         if (*s == '\0') {
803            if (fno != sr_Res(f))
804               if (VG_(clo_track_fds))
805                  ML_(record_fd_open_named)(-1, fno);
806         } else {
807            VG_(message)(Vg_DebugMsg,
808               "Warning: invalid file name in /proc/self/fd: %s\n",
809               d.d_name);
810         }
811      }
812
813      VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
814   }
815
816  out:
817   VG_(close)(sr_Res(f));
818
819#elif defined(VGO_darwin)
820   init_preopened_fds_without_proc_self_fd();
821
822#else
823#  error Unknown OS
824#endif
825}
826
827static
828Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
829{
830   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
831   Char *result = VG_(arena_malloc) ( aid, cc, len );
832   VG_(strcpy) ( result, s1 );
833   VG_(strcat) ( result, s2 );
834   return result;
835}
836
837static
838void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
839                            Char *msg, Addr base, SizeT size )
840{
841   Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
842                              "socketcall.sendmsg", msg, VG_AR_CORE );
843   PRE_MEM_READ( outmsg, base, size );
844   VG_(arena_free) ( VG_AR_CORE, outmsg );
845}
846
847static
848void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
849                             Char *msg, Addr base, SizeT size )
850{
851   Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
852                              "socketcall.recvmsg", msg, VG_AR_CORE );
853   if ( read )
854      PRE_MEM_READ( outmsg, base, size );
855   else
856      PRE_MEM_WRITE( outmsg, base, size );
857   VG_(arena_free) ( VG_AR_CORE, outmsg );
858}
859
860static
861void post_mem_write_recvmsg ( ThreadId tid, Bool read,
862                              Char *fieldName, Addr base, SizeT size )
863{
864   if ( !read )
865      POST_MEM_WRITE( base, size );
866}
867
868static
869void msghdr_foreachfield (
870        ThreadId tid,
871        struct vki_msghdr *msg,
872        void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
873     )
874{
875   if ( !msg )
876      return;
877
878   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
879   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
880   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
881   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
882   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
883   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
884   foreach_func ( tid, False, "(msg)", (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
885
886   if ( msg->msg_name )
887      foreach_func ( tid, False,
888                     "(msg.msg_name)",
889                     (Addr)msg->msg_name, msg->msg_namelen );
890
891   if ( msg->msg_iov ) {
892      struct vki_iovec *iov = msg->msg_iov;
893      UInt i;
894
895      foreach_func ( tid, True,
896                     "(msg.msg_iov)",
897                     (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
898
899      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
900         foreach_func ( tid, False,
901                        "(msg.msg_iov[i])",
902                        (Addr)iov->iov_base, iov->iov_len );
903   }
904
905   if ( msg->msg_control )
906      foreach_func ( tid, False,
907                     "(msg.msg_control)",
908                     (Addr)msg->msg_control, msg->msg_controllen );
909}
910
911static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
912{
913   struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
914
915   while (cm) {
916      if (cm->cmsg_level == VKI_SOL_SOCKET &&
917          cm->cmsg_type == VKI_SCM_RIGHTS ) {
918         Int *fds = (Int *) VKI_CMSG_DATA(cm);
919         Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
920                         / sizeof(int);
921         Int i;
922
923         for (i = 0; i < fdc; i++)
924            if(VG_(clo_track_fds))
925               // XXX: must we check the range on these fds with
926               //      ML_(fd_allowed)()?
927               ML_(record_fd_open_named)(tid, fds[i]);
928      }
929
930      cm = VKI_CMSG_NXTHDR(msg, cm);
931   }
932}
933
934/* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
935static
936void pre_mem_read_sockaddr ( ThreadId tid,
937                             Char *description,
938                             struct vki_sockaddr *sa, UInt salen )
939{
940   Char *outmsg;
941   struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
942   struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
943   struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
944
945   /* NULL/zero-length sockaddrs are legal */
946   if ( sa == NULL || salen == 0 ) return;
947
948   outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
949                                VG_(strlen)( description ) + 30 );
950
951   VG_(sprintf) ( outmsg, description, "sa_family" );
952   PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
953
954   switch (sa->sa_family) {
955
956      case VKI_AF_UNIX:
957         VG_(sprintf) ( outmsg, description, "sun_path" );
958         PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
959         // GrP fixme max of sun_len-2? what about nul char?
960         break;
961
962      case VKI_AF_INET:
963         VG_(sprintf) ( outmsg, description, "sin_port" );
964         PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
965         VG_(sprintf) ( outmsg, description, "sin_addr" );
966         PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
967         break;
968
969      case VKI_AF_INET6:
970         VG_(sprintf) ( outmsg, description, "sin6_port" );
971         PRE_MEM_READ( outmsg,
972            (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
973         VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
974         PRE_MEM_READ( outmsg,
975            (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
976         VG_(sprintf) ( outmsg, description, "sin6_addr" );
977         PRE_MEM_READ( outmsg,
978            (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
979         VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
980         PRE_MEM_READ( outmsg,
981            (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
982         break;
983
984      default:
985         VG_(sprintf) ( outmsg, description, "" );
986         PRE_MEM_READ( outmsg, (Addr) sa, salen );
987         break;
988   }
989
990   VG_(arena_free) ( VG_AR_CORE, outmsg );
991}
992
993/* Dereference a pointer to a UInt. */
994static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
995{
996   UInt* a_p = (UInt*)a;
997   PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
998   if (a_p == NULL)
999      return 0;
1000   else
1001      return *a_p;
1002}
1003
1004void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1005                                  Char* buf_s, Char* buflen_s )
1006{
1007   if (VG_(tdict).track_pre_mem_write) {
1008      UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1009      if (buflen_in > 0) {
1010         VG_(tdict).track_pre_mem_write(
1011            Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1012      }
1013   }
1014}
1015
1016void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1017                                   Addr buf_p, Addr buflen_p, Char* s )
1018{
1019   if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1020      UInt buflen_out = deref_UInt( tid, buflen_p, s);
1021      if (buflen_out > 0 && buf_p != (Addr)NULL) {
1022         VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1023      }
1024   }
1025}
1026
1027/* ---------------------------------------------------------------------
1028   Data seg end, for brk()
1029   ------------------------------------------------------------------ */
1030
1031/*   +--------+------------+
1032     | anon   |    resvn   |
1033     +--------+------------+
1034
1035     ^     ^  ^
1036     |     |  boundary is page aligned
1037     |     VG_(brk_limit) -- no alignment constraint
1038     VG_(brk_base) -- page aligned -- does not move
1039
1040     Both the anon part and the reservation part are always at least
1041     one page.
1042*/
1043
1044/* Set the new data segment end to NEWBRK.  If this succeeds, return
1045   NEWBRK, else return the current data segment end. */
1046
1047static Addr do_brk ( Addr newbrk )
1048{
1049   NSegment const* aseg;
1050   NSegment const* rseg;
1051   Addr newbrkP;
1052   SizeT delta;
1053   Bool ok;
1054   Bool debug = False;
1055
1056   if (debug)
1057      VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1058		  VG_(brk_base), VG_(brk_limit), newbrk);
1059
1060#  if 0
1061   if (0) show_segments("in_brk");
1062#  endif
1063
1064   if (newbrk < VG_(brk_base))
1065      /* Clearly impossible. */
1066      goto bad;
1067
1068   if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1069      /* shrinking the data segment.  Be lazy and don't munmap the
1070         excess area. */
1071      NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1072      if (seg && seg->hasT)
1073         VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1074                                    "do_brk(shrink)" );
1075      /* Since we're being lazy and not unmapping pages, we have to
1076         zero out the area, so that if the area later comes back into
1077         circulation, it will be filled with zeroes, as if it really
1078         had been unmapped and later remapped.  Be a bit paranoid and
1079         try hard to ensure we're not going to segfault by doing the
1080         write - check both ends of the range are in the same segment
1081         and that segment is writable. */
1082      if (seg) {
1083         /* pre: newbrk < VG_(brk_limit)
1084              => newbrk <= VG_(brk_limit)-1 */
1085         NSegment const * seg2;
1086         vg_assert(newbrk < VG_(brk_limit));
1087         seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1088         if (seg2 && seg == seg2 && seg->hasW)
1089            VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1090      }
1091
1092      VG_(brk_limit) = newbrk;
1093      return newbrk;
1094   }
1095
1096   /* otherwise we're expanding the brk segment. */
1097   if (VG_(brk_limit) > VG_(brk_base))
1098      aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1099   else
1100      aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1101   rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
1102
1103   /* These should be assured by setup_client_dataseg in m_main. */
1104   vg_assert(aseg);
1105   vg_assert(rseg);
1106   vg_assert(aseg->kind == SkAnonC);
1107   vg_assert(rseg->kind == SkResvn);
1108   vg_assert(aseg->end+1 == rseg->start);
1109
1110   vg_assert(newbrk >= VG_(brk_base));
1111   if (newbrk <= rseg->start) {
1112      /* still fits within the anon segment. */
1113      VG_(brk_limit) = newbrk;
1114      return newbrk;
1115   }
1116
1117   if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1118      /* request is too large -- the resvn would fall below 1 page,
1119         which isn't allowed. */
1120      goto bad;
1121   }
1122
1123   newbrkP = VG_PGROUNDUP(newbrk);
1124   vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1125   delta = newbrkP - rseg->start;
1126   vg_assert(delta > 0);
1127   vg_assert(VG_IS_PAGE_ALIGNED(delta));
1128
1129   ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
1130   if (!ok) goto bad;
1131
1132   VG_(brk_limit) = newbrk;
1133   return newbrk;
1134
1135  bad:
1136   return VG_(brk_limit);
1137}
1138
1139
1140/* ---------------------------------------------------------------------
1141   Vet file descriptors for sanity
1142   ------------------------------------------------------------------ */
1143/*
1144> - what does the "Bool soft" parameter mean?
1145
1146(Tom Hughes, 3 Oct 05):
1147
1148Whether or not to consider a file descriptor invalid if it is above
1149the current soft limit.
1150
1151Basically if we are testing whether a newly created file descriptor is
1152valid (in a post handler) then we set soft to true, and if we are
1153testing whether a file descriptor that is about to be used (in a pre
1154handler) is valid [viz, an already-existing fd] then we set it to false.
1155
1156The point is that if the (virtual) soft limit is lowered then any
1157existing descriptors can still be read/written/closed etc (so long as
1158they are below the valgrind reserved descriptors) but no new
1159descriptors can be created above the new soft limit.
1160
1161(jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1162*/
1163
1164/* Return true if we're allowed to use or create this fd */
1165Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
1166{
1167   Bool allowed = True;
1168
1169   /* hard limits always apply */
1170   if (fd < 0 || fd >= VG_(fd_hard_limit))
1171      allowed = False;
1172
1173   /* hijacking the output fds is never allowed */
1174   if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1175      allowed = False;
1176
1177   /* if creating a new fd (rather than using an existing one), the
1178      soft limit must also be observed */
1179   if (isNewFd && fd >= VG_(fd_soft_limit))
1180      allowed = False;
1181
1182   /* this looks like it ought to be included, but causes problems: */
1183   /*
1184   if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1185      allowed = False;
1186   */
1187   /* The difficulty is as follows: consider a program P which expects
1188      to be able to mess with (redirect) its own stderr (fd 2).
1189      Usually to deal with P we would issue command line flags to send
1190      logging somewhere other than stderr, so as not to disrupt P.
1191      The problem is that -d unilaterally hijacks stderr with no
1192      consultation with P.  And so, if this check is enabled, P will
1193      work OK normally but fail if -d is issued.
1194
1195      Basically -d is a hack and you take your chances when using it.
1196      It's very useful for low level debugging -- particularly at
1197      startup -- and having its presence change the behaviour of the
1198      client is exactly what we don't want.  */
1199
1200   /* croak? */
1201   if ((!allowed) && VG_(showing_core_errors)() ) {
1202      VG_(message)(Vg_UserMsg,
1203         "Warning: invalid file descriptor %d in syscall %s()\n",
1204         fd, syscallname);
1205      if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1206	 VG_(message)(Vg_UserMsg,
1207            "   Use --log-fd=<number> to select an alternative log fd.\n");
1208      if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1209	 VG_(message)(Vg_UserMsg,
1210            "   Use --xml-fd=<number> to select an alternative XML "
1211            "output fd.\n");
1212      // DDD: consider always printing this stack trace, it's useful.
1213      // Also consider also making this a proper core error, ie.
1214      // suppressible and all that.
1215      if (VG_(clo_verbosity) > 1) {
1216         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1217      }
1218   }
1219
1220   return allowed;
1221}
1222
1223
1224/* ---------------------------------------------------------------------
1225   Deal with a bunch of socket-related syscalls
1226   ------------------------------------------------------------------ */
1227
1228/* ------ */
1229
1230void
1231ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1232                                  UWord arg0, UWord arg1,
1233                                  UWord arg2, UWord arg3 )
1234{
1235   /* int socketpair(int d, int type, int protocol, int sv[2]); */
1236   PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1237                  arg3, 2*sizeof(int) );
1238}
1239
1240SysRes
1241ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1242                                   SysRes res,
1243                                   UWord arg0, UWord arg1,
1244                                   UWord arg2, UWord arg3 )
1245{
1246   SysRes r = res;
1247   Int fd1 = ((Int*)arg3)[0];
1248   Int fd2 = ((Int*)arg3)[1];
1249   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1250   POST_MEM_WRITE( arg3, 2*sizeof(int) );
1251   if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1252       !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1253      VG_(close)(fd1);
1254      VG_(close)(fd2);
1255      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1256   } else {
1257      POST_MEM_WRITE( arg3, 2*sizeof(int) );
1258      if (VG_(clo_track_fds)) {
1259         ML_(record_fd_open_nameless)(tid, fd1);
1260         ML_(record_fd_open_nameless)(tid, fd2);
1261      }
1262   }
1263   return r;
1264}
1265
1266/* ------ */
1267
1268SysRes
1269ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1270{
1271   SysRes r = res;
1272   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1273   if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1274      VG_(close)(sr_Res(res));
1275      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1276   } else {
1277      if (VG_(clo_track_fds))
1278         ML_(record_fd_open_nameless)(tid, sr_Res(res));
1279   }
1280   return r;
1281}
1282
1283/* ------ */
1284
1285void
1286ML_(generic_PRE_sys_bind) ( ThreadId tid,
1287                            UWord arg0, UWord arg1, UWord arg2 )
1288{
1289   /* int bind(int sockfd, struct sockaddr *my_addr,
1290               int addrlen); */
1291   pre_mem_read_sockaddr(
1292      tid, "socketcall.bind(my_addr.%s)",
1293      (struct vki_sockaddr *) arg1, arg2
1294   );
1295}
1296
1297/* ------ */
1298
1299void
1300ML_(generic_PRE_sys_accept) ( ThreadId tid,
1301                              UWord arg0, UWord arg1, UWord arg2 )
1302{
1303   /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1304   Addr addr_p     = arg1;
1305   Addr addrlen_p  = arg2;
1306   if (addr_p != (Addr)NULL)
1307      ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1308                                   "socketcall.accept(addr)",
1309                                   "socketcall.accept(addrlen_in)" );
1310}
1311
1312SysRes
1313ML_(generic_POST_sys_accept) ( ThreadId tid,
1314                               SysRes res,
1315                               UWord arg0, UWord arg1, UWord arg2 )
1316{
1317   SysRes r = res;
1318   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1319   if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1320      VG_(close)(sr_Res(res));
1321      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1322   } else {
1323      Addr addr_p     = arg1;
1324      Addr addrlen_p  = arg2;
1325      if (addr_p != (Addr)NULL)
1326         ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1327                                       "socketcall.accept(addrlen_out)" );
1328      if (VG_(clo_track_fds))
1329          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1330   }
1331   return r;
1332}
1333
1334/* ------ */
1335
1336void
1337ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1338                              UWord arg0, UWord arg1, UWord arg2,
1339                              UWord arg3, UWord arg4, UWord arg5 )
1340{
1341   /* int sendto(int s, const void *msg, int len,
1342                 unsigned int flags,
1343                 const struct sockaddr *to, int tolen); */
1344   PRE_MEM_READ( "socketcall.sendto(msg)",
1345                 arg1, /* msg */
1346                 arg2  /* len */ );
1347   pre_mem_read_sockaddr(
1348      tid, "socketcall.sendto(to.%s)",
1349      (struct vki_sockaddr *) arg4, arg5
1350   );
1351}
1352
1353/* ------ */
1354
1355void
1356ML_(generic_PRE_sys_send) ( ThreadId tid,
1357                            UWord arg0, UWord arg1, UWord arg2 )
1358{
1359   /* int send(int s, const void *msg, size_t len, int flags); */
1360   PRE_MEM_READ( "socketcall.send(msg)",
1361                  arg1, /* msg */
1362                  arg2  /* len */ );
1363
1364}
1365
1366/* ------ */
1367
1368void
1369ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1370                                UWord arg0, UWord arg1, UWord arg2,
1371                                UWord arg3, UWord arg4, UWord arg5 )
1372{
1373   /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1374                   struct sockaddr *from, int *fromlen); */
1375   Addr buf_p      = arg1;
1376   Int  len        = arg2;
1377   Addr from_p     = arg4;
1378   Addr fromlen_p  = arg5;
1379   PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1380   if (from_p != (Addr)NULL)
1381      ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1382                                   "socketcall.recvfrom(from)",
1383                                   "socketcall.recvfrom(fromlen_in)" );
1384}
1385
1386void
1387ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1388                                 SysRes res,
1389                                 UWord arg0, UWord arg1, UWord arg2,
1390                                 UWord arg3, UWord arg4, UWord arg5 )
1391{
1392   Addr buf_p      = arg1;
1393   Int  len        = arg2;
1394   Addr from_p     = arg4;
1395   Addr fromlen_p  = arg5;
1396
1397   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1398   if (from_p != (Addr)NULL)
1399      ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1400                                    "socketcall.recvfrom(fromlen_out)" );
1401   POST_MEM_WRITE( buf_p, len );
1402}
1403
1404/* ------ */
1405
1406void
1407ML_(generic_PRE_sys_recv) ( ThreadId tid,
1408                            UWord arg0, UWord arg1, UWord arg2 )
1409{
1410   /* int recv(int s, void *buf, int len, unsigned int flags); */
1411   /* man 2 recv says:
1412      The  recv call is normally used only on a connected socket
1413      (see connect(2)) and is identical to recvfrom with a  NULL
1414      from parameter.
1415   */
1416   PRE_MEM_WRITE( "socketcall.recv(buf)",
1417                  arg1, /* buf */
1418                  arg2  /* len */ );
1419}
1420
1421void
1422ML_(generic_POST_sys_recv) ( ThreadId tid,
1423                             UWord res,
1424                             UWord arg0, UWord arg1, UWord arg2 )
1425{
1426   if (res >= 0 && arg1 != 0) {
1427      POST_MEM_WRITE( arg1, /* buf */
1428                      arg2  /* len */ );
1429   }
1430}
1431
1432/* ------ */
1433
1434void
1435ML_(generic_PRE_sys_connect) ( ThreadId tid,
1436                               UWord arg0, UWord arg1, UWord arg2 )
1437{
1438   /* int connect(int sockfd,
1439                  struct sockaddr *serv_addr, int addrlen ); */
1440   pre_mem_read_sockaddr( tid,
1441                          "socketcall.connect(serv_addr.%s)",
1442                          (struct vki_sockaddr *) arg1, arg2);
1443}
1444
1445/* ------ */
1446
1447void
1448ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1449                                  UWord arg0, UWord arg1, UWord arg2,
1450                                  UWord arg3, UWord arg4 )
1451{
1452   /* int setsockopt(int s, int level, int optname,
1453                     const void *optval, int optlen); */
1454   PRE_MEM_READ( "socketcall.setsockopt(optval)",
1455                 arg3, /* optval */
1456                 arg4  /* optlen */ );
1457}
1458
1459/* ------ */
1460
1461void
1462ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1463                                   UWord arg0, UWord arg1, UWord arg2 )
1464{
1465   /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1466   Addr name_p     = arg1;
1467   Addr namelen_p  = arg2;
1468   /* Nb: name_p cannot be NULL */
1469   ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1470                                "socketcall.getsockname(name)",
1471                                "socketcall.getsockname(namelen_in)" );
1472}
1473
1474void
1475ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1476                                    SysRes res,
1477                                    UWord arg0, UWord arg1, UWord arg2 )
1478{
1479   Addr name_p     = arg1;
1480   Addr namelen_p  = arg2;
1481   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1482   ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1483                                 "socketcall.getsockname(namelen_out)" );
1484}
1485
1486/* ------ */
1487
1488void
1489ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1490                                   UWord arg0, UWord arg1, UWord arg2 )
1491{
1492   /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1493   Addr name_p     = arg1;
1494   Addr namelen_p  = arg2;
1495   /* Nb: name_p cannot be NULL */
1496   ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1497                                "socketcall.getpeername(name)",
1498                                "socketcall.getpeername(namelen_in)" );
1499}
1500
1501void
1502ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1503                                    SysRes res,
1504                                    UWord arg0, UWord arg1, UWord arg2 )
1505{
1506   Addr name_p     = arg1;
1507   Addr namelen_p  = arg2;
1508   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1509   ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1510                                 "socketcall.getpeername(namelen_out)" );
1511}
1512
1513/* ------ */
1514
1515void
1516ML_(generic_PRE_sys_sendmsg) ( ThreadId tid,
1517                               UWord arg0, UWord arg1 )
1518{
1519   /* int sendmsg(int s, const struct msghdr *msg, int flags); */
1520   struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1521   msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
1522}
1523
1524/* ------ */
1525
1526void
1527ML_(generic_PRE_sys_recvmsg) ( ThreadId tid,
1528                               UWord arg0, UWord arg1 )
1529{
1530   /* int recvmsg(int s, struct msghdr *msg, int flags); */
1531   struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1532   msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
1533}
1534
1535void
1536ML_(generic_POST_sys_recvmsg) ( ThreadId tid,
1537                                UWord arg0, UWord arg1 )
1538{
1539   struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1540   msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
1541   check_cmsg_for_fds( tid, msg );
1542}
1543
1544
1545/* ---------------------------------------------------------------------
1546   Deal with a bunch of IPC related syscalls
1547   ------------------------------------------------------------------ */
1548
1549/* ------ */
1550
1551void
1552ML_(generic_PRE_sys_semop) ( ThreadId tid,
1553                             UWord arg0, UWord arg1, UWord arg2 )
1554{
1555   /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1556   PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1557}
1558
1559/* ------ */
1560
1561void
1562ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1563                                  UWord arg0, UWord arg1,
1564                                  UWord arg2, UWord arg3 )
1565{
1566   /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1567                     struct timespec *timeout); */
1568   PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1569   if (arg3 != 0)
1570      PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1571}
1572
1573/* ------ */
1574
1575static
1576UInt get_sem_count( Int semid )
1577{
1578   struct vki_semid_ds buf;
1579   union vki_semun arg;
1580   SysRes res;
1581
1582   /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1583      (experimental) otherwise complains that the use in the return
1584      statement below is uninitialised. */
1585   buf.sem_nsems = 0;
1586
1587   arg.buf = &buf;
1588
1589#  ifdef __NR_semctl
1590   res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1591#  else
1592   res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1593                          VKI_IPC_STAT, (UWord)&arg);
1594#  endif
1595   if (sr_isError(res))
1596      return 0;
1597
1598   return buf.sem_nsems;
1599}
1600
1601void
1602ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1603                              UWord arg0, UWord arg1,
1604                              UWord arg2, UWord arg3 )
1605{
1606   /* int semctl(int semid, int semnum, int cmd, ...); */
1607   union vki_semun arg = *(union vki_semun *)&arg3;
1608   UInt nsems;
1609   switch (arg2 /* cmd */) {
1610#if defined(VKI_IPC_INFO)
1611   case VKI_IPC_INFO:
1612   case VKI_SEM_INFO:
1613   case VKI_IPC_INFO|VKI_IPC_64:
1614   case VKI_SEM_INFO|VKI_IPC_64:
1615      PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1616                     (Addr)arg.buf, sizeof(struct vki_seminfo) );
1617      break;
1618#endif
1619
1620   case VKI_IPC_STAT:
1621#if defined(VKI_SEM_STAT)
1622   case VKI_SEM_STAT:
1623#endif
1624      PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1625                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1626      break;
1627
1628#if defined(VKI_IPC_64)
1629   case VKI_IPC_STAT|VKI_IPC_64:
1630#if defined(VKI_SEM_STAT)
1631   case VKI_SEM_STAT|VKI_IPC_64:
1632#endif
1633      PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1634                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1635      break;
1636#endif
1637
1638   case VKI_IPC_SET:
1639      PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1640                    (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1641      break;
1642
1643#if defined(VKI_IPC_64)
1644   case VKI_IPC_SET|VKI_IPC_64:
1645      PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1646                    (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1647      break;
1648#endif
1649
1650   case VKI_GETALL:
1651#if defined(VKI_IPC_64)
1652   case VKI_GETALL|VKI_IPC_64:
1653#endif
1654      nsems = get_sem_count( arg0 );
1655      PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1656                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1657      break;
1658
1659   case VKI_SETALL:
1660#if defined(VKI_IPC_64)
1661   case VKI_SETALL|VKI_IPC_64:
1662#endif
1663      nsems = get_sem_count( arg0 );
1664      PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1665                    (Addr)arg.array, sizeof(unsigned short) * nsems );
1666      break;
1667   }
1668}
1669
1670void
1671ML_(generic_POST_sys_semctl) ( ThreadId tid,
1672                               UWord res,
1673                               UWord arg0, UWord arg1,
1674                               UWord arg2, UWord arg3 )
1675{
1676   union vki_semun arg = *(union vki_semun *)&arg3;
1677   UInt nsems;
1678   switch (arg2 /* cmd */) {
1679#if defined(VKI_IPC_INFO)
1680   case VKI_IPC_INFO:
1681   case VKI_SEM_INFO:
1682   case VKI_IPC_INFO|VKI_IPC_64:
1683   case VKI_SEM_INFO|VKI_IPC_64:
1684      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1685      break;
1686#endif
1687
1688   case VKI_IPC_STAT:
1689#if defined(VKI_SEM_STAT)
1690   case VKI_SEM_STAT:
1691#endif
1692      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1693      break;
1694
1695#if defined(VKI_IPC_64)
1696   case VKI_IPC_STAT|VKI_IPC_64:
1697   case VKI_SEM_STAT|VKI_IPC_64:
1698      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1699      break;
1700#endif
1701
1702   case VKI_GETALL:
1703#if defined(VKI_IPC_64)
1704   case VKI_GETALL|VKI_IPC_64:
1705#endif
1706      nsems = get_sem_count( arg0 );
1707      POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1708      break;
1709   }
1710}
1711
1712/* ------ */
1713
1714/* ------ */
1715
1716static
1717UInt get_shm_size ( Int shmid )
1718{
1719#ifdef __NR_shmctl
1720#  ifdef VKI_IPC_64
1721   struct vki_shmid64_ds buf;
1722#    ifdef VGP_amd64_linux
1723     /* See bug 222545 comment 7 */
1724     SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1725                                     VKI_IPC_STAT, (UWord)&buf);
1726#    else
1727     SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1728                                     VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1729#    endif
1730#  else /* !def VKI_IPC_64 */
1731   struct vki_shmid_ds buf;
1732   SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1733#  endif /* def VKI_IPC_64 */
1734#else
1735   struct vki_shmid_ds buf;
1736   SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1737                                 VKI_IPC_STAT, 0, (UWord)&buf);
1738#endif
1739   if (sr_isError(__res))
1740      return 0;
1741
1742   return buf.shm_segsz;
1743}
1744
1745UWord
1746ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1747                             UWord arg0, UWord arg1, UWord arg2 )
1748{
1749   /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1750   UInt  segmentSize = get_shm_size ( arg0 );
1751   UWord tmp;
1752   Bool  ok;
1753   if (arg1 == 0) {
1754      /* arm-linux only: work around the fact that
1755         VG_(am_get_advisory_client_simple) produces something that is
1756         VKI_PAGE_SIZE aligned, whereas what we want is something
1757         VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1758         increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1759         then round the result up to the next VKI_SHMLBA boundary.
1760         See bug 222545 comment 15.  So far, arm-linux is the only
1761         platform where this is known to be necessary. */
1762      vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1763      if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1764         segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1765      }
1766      tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1767      if (ok) {
1768         if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1769            arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1770         } else {
1771            arg1 = tmp;
1772         }
1773      }
1774   }
1775   else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1776      arg1 = 0;
1777   return arg1;
1778}
1779
1780void
1781ML_(generic_POST_sys_shmat) ( ThreadId tid,
1782                              UWord res,
1783                              UWord arg0, UWord arg1, UWord arg2 )
1784{
1785   UInt segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1786   if ( segmentSize > 0 ) {
1787      UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1788      Bool d;
1789
1790      if (arg2 & VKI_SHM_RDONLY)
1791         prot &= ~VKI_PROT_WRITE;
1792      /* It isn't exactly correct to pass 0 for the fd and offset
1793         here.  The kernel seems to think the corresponding section
1794         does have dev/ino numbers:
1795
1796         04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
1797
1798         However there is no obvious way to find them.  In order to
1799         cope with the discrepancy, aspacem's sync checker omits the
1800         dev/ino correspondence check in cases where V does not know
1801         the dev/ino. */
1802      d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1803
1804      /* we don't distinguish whether it's read-only or
1805       * read-write -- it doesn't matter really. */
1806      VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1807                              0/*di_handle*/ );
1808      if (d)
1809         VG_(discard_translations)( (Addr64)res,
1810                                    (ULong)VG_PGROUNDUP(segmentSize),
1811                                    "ML_(generic_POST_sys_shmat)" );
1812   }
1813}
1814
1815/* ------ */
1816
1817Bool
1818ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1819{
1820   /* int shmdt(const void *shmaddr); */
1821   return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1822}
1823
1824void
1825ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1826{
1827   NSegment const* s = VG_(am_find_nsegment)(arg0);
1828
1829   if (s != NULL) {
1830      Addr  s_start = s->start;
1831      SizeT s_len   = s->end+1 - s->start;
1832      Bool  d;
1833
1834      vg_assert(s->kind == SkShmC);
1835      vg_assert(s->start == arg0);
1836
1837      d = VG_(am_notify_munmap)(s_start, s_len);
1838      s = NULL; /* s is now invalid */
1839      VG_TRACK( die_mem_munmap, s_start, s_len );
1840      if (d)
1841         VG_(discard_translations)( (Addr64)s_start,
1842                                    (ULong)s_len,
1843                                    "ML_(generic_POST_sys_shmdt)" );
1844   }
1845}
1846/* ------ */
1847
1848void
1849ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1850                              UWord arg0, UWord arg1, UWord arg2 )
1851{
1852   /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1853   switch (arg1 /* cmd */) {
1854#if defined(VKI_IPC_INFO)
1855   case VKI_IPC_INFO:
1856      PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1857                     arg2, sizeof(struct vki_shminfo) );
1858      break;
1859#if defined(VKI_IPC_64)
1860   case VKI_IPC_INFO|VKI_IPC_64:
1861      PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1862                     arg2, sizeof(struct vki_shminfo64) );
1863      break;
1864#endif
1865#endif
1866
1867#if defined(VKI_SHM_INFO)
1868   case VKI_SHM_INFO:
1869#if defined(VKI_IPC_64)
1870   case VKI_SHM_INFO|VKI_IPC_64:
1871#endif
1872      PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1873                     arg2, sizeof(struct vki_shm_info) );
1874      break;
1875#endif
1876
1877   case VKI_IPC_STAT:
1878#if defined(VKI_SHM_STAT)
1879   case VKI_SHM_STAT:
1880#endif
1881      PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1882                     arg2, sizeof(struct vki_shmid_ds) );
1883      break;
1884
1885#if defined(VKI_IPC_64)
1886   case VKI_IPC_STAT|VKI_IPC_64:
1887   case VKI_SHM_STAT|VKI_IPC_64:
1888      PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1889                     arg2, sizeof(struct vki_shmid64_ds) );
1890      break;
1891#endif
1892
1893   case VKI_IPC_SET:
1894      PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1895                    arg2, sizeof(struct vki_shmid_ds) );
1896      break;
1897
1898#if defined(VKI_IPC_64)
1899   case VKI_IPC_SET|VKI_IPC_64:
1900      PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1901                    arg2, sizeof(struct vki_shmid64_ds) );
1902      break;
1903#endif
1904   }
1905}
1906
1907void
1908ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1909                               UWord res,
1910                               UWord arg0, UWord arg1, UWord arg2 )
1911{
1912   switch (arg1 /* cmd */) {
1913#if defined(VKI_IPC_INFO)
1914   case VKI_IPC_INFO:
1915      POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
1916      break;
1917   case VKI_IPC_INFO|VKI_IPC_64:
1918      POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
1919      break;
1920#endif
1921
1922#if defined(VKI_SHM_INFO)
1923   case VKI_SHM_INFO:
1924   case VKI_SHM_INFO|VKI_IPC_64:
1925      POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
1926      break;
1927#endif
1928
1929   case VKI_IPC_STAT:
1930#if defined(VKI_SHM_STAT)
1931   case VKI_SHM_STAT:
1932#endif
1933      POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
1934      break;
1935
1936#if defined(VKI_IPC_64)
1937   case VKI_IPC_STAT|VKI_IPC_64:
1938   case VKI_SHM_STAT|VKI_IPC_64:
1939      POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
1940      break;
1941#endif
1942
1943
1944   }
1945}
1946
1947
1948/* ---------------------------------------------------------------------
1949   Generic handler for mmap
1950   ------------------------------------------------------------------ */
1951
1952/*
1953 * Although mmap is specified by POSIX and the argument are generally
1954 * consistent across platforms the precise details of the low level
1955 * argument passing conventions differ. For example:
1956 *
1957 * - On x86-linux there is mmap (aka old_mmap) which takes the
1958 *   arguments in a memory block and the offset in bytes; and
1959 *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1960 *   way and the offset in pages.
1961 *
1962 * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
1963 *   arguments in the normal way and the offset in bytes; and
1964 *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1965 *   way and the offset in pages.
1966 *
1967 * - On amd64-linux everything is simple and there is just the one
1968 *   call, mmap (aka sys_mmap)  which takes the arguments in the
1969 *   normal way and the offset in bytes.
1970 *
1971 * - On s390x-linux there is mmap (aka old_mmap) which takes the
1972 *   arguments in a memory block and the offset in bytes. mmap2
1973 *   is also available (but not exported via unistd.h) with
1974 *   arguments in a memory block and the offset in pages.
1975 *
1976 * To cope with all this we provide a generic handler function here
1977 * and then each platform implements one or more system call handlers
1978 * which call this generic routine after extracting and normalising
1979 * the arguments.
1980 */
1981
1982SysRes
1983ML_(generic_PRE_sys_mmap) ( ThreadId tid,
1984                            UWord arg1, UWord arg2, UWord arg3,
1985                            UWord arg4, UWord arg5, Off64T arg6 )
1986{
1987   Addr       advised;
1988   SysRes     sres;
1989   MapRequest mreq;
1990   Bool       mreq_ok;
1991
1992#if defined(VGO_darwin)
1993   // Nb: we can't use this on Darwin, it has races:
1994   // * needs to RETRY if advisory succeeds but map fails
1995   //   (could have been some other thread in a nonblocking call)
1996   // * needs to not use fixed-position mmap() on Darwin
1997   //   (mmap will cheerfully smash whatever's already there, which might
1998   //   be a new mapping from some other thread in a nonblocking call)
1999   VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2000#endif
2001
2002   if (arg2 == 0) {
2003      /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2004         shall be established. */
2005      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2006   }
2007
2008   if (!VG_IS_PAGE_ALIGNED(arg1)) {
2009      /* zap any misaligned addresses. */
2010      /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2011         to fail.   Here, we catch them all. */
2012      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2013   }
2014
2015   if (!VG_IS_PAGE_ALIGNED(arg6)) {
2016      /* zap any misaligned offsets. */
2017      /* SuSV3 says: The off argument is constrained to be aligned and
2018         sized according to the value returned by sysconf() when
2019         passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2020      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2021   }
2022
2023   /* Figure out what kind of allocation constraints there are
2024      (fixed/hint/any), and ask aspacem what we should do. */
2025   mreq.start = arg1;
2026   mreq.len   = arg2;
2027   if (arg4 & VKI_MAP_FIXED) {
2028      mreq.rkind = MFixed;
2029   } else
2030   if (arg1 != 0) {
2031      mreq.rkind = MHint;
2032   } else {
2033      mreq.rkind = MAny;
2034   }
2035
2036   /* Enquire ... */
2037   advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2038   if (!mreq_ok) {
2039      /* Our request was bounced, so we'd better fail. */
2040      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2041   }
2042
2043   /* Otherwise we're OK (so far).  Install aspacem's choice of
2044      address, and let the mmap go through.  */
2045   sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2046                                    arg4 | VKI_MAP_FIXED,
2047                                    arg5, arg6);
2048
2049   /* A refinement: it may be that the kernel refused aspacem's choice
2050      of address.  If we were originally asked for a hinted mapping,
2051      there is still a last chance: try again at any address.
2052      Hence: */
2053   if (mreq.rkind == MHint && sr_isError(sres)) {
2054      mreq.start = 0;
2055      mreq.len   = arg2;
2056      mreq.rkind = MAny;
2057      advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2058      if (!mreq_ok) {
2059         /* Our request was bounced, so we'd better fail. */
2060         return VG_(mk_SysRes_Error)( VKI_EINVAL );
2061      }
2062      /* and try again with the kernel */
2063      sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2064                                       arg4 | VKI_MAP_FIXED,
2065                                       arg5, arg6);
2066   }
2067
2068   if (!sr_isError(sres)) {
2069      ULong di_handle;
2070      /* Notify aspacem. */
2071      notify_core_of_mmap(
2072         (Addr)sr_Res(sres), /* addr kernel actually assigned */
2073         arg2, /* length */
2074         arg3, /* prot */
2075         arg4, /* the original flags value */
2076         arg5, /* fd */
2077         arg6  /* offset */
2078      );
2079      /* Load symbols? */
2080      di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2081                                       False/*allow_SkFileV*/, (Int)arg5 );
2082      /* Notify the tool. */
2083      notify_tool_of_mmap(
2084         (Addr)sr_Res(sres), /* addr kernel actually assigned */
2085         arg2, /* length */
2086         arg3, /* prot */
2087         di_handle /* so the tool can refer to the read debuginfo later,
2088                      if it wants. */
2089      );
2090   }
2091
2092   /* Stay sane */
2093   if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2094      vg_assert(sr_Res(sres) == arg1);
2095
2096   return sres;
2097}
2098
2099
2100/* ---------------------------------------------------------------------
2101   The Main Entertainment ... syscall wrappers
2102   ------------------------------------------------------------------ */
2103
2104/* Note: the PRE() and POST() wrappers are for the actual functions
2105   implementing the system calls in the OS kernel.  These mostly have
2106   names like sys_write();  a few have names like old_mmap().  See the
2107   comment for ML_(syscall_table)[] for important info about the __NR_foo
2108   constants and their relationship to the sys_foo() functions.
2109
2110   Some notes about names used for syscalls and args:
2111   - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2112     ambiguity.
2113
2114   - For error messages, we generally use a somewhat generic name
2115     for the syscall (eg. "write" rather than "sys_write").  This should be
2116     good enough for the average user to understand what is happening,
2117     without confusing them with names like "sys_write".
2118
2119   - Also, for error messages the arg names are mostly taken from the man
2120     pages (even though many of those man pages are really for glibc
2121     functions of the same name), rather than from the OS kernel source,
2122     for the same reason -- a user presented with a "bogus foo(bar)" arg
2123     will most likely look at the "foo" man page to see which is the "bar"
2124     arg.
2125
2126   Note that we use our own vki_* types.  The one exception is in
2127   PRE_REG_READn calls, where pointer types haven't been changed, because
2128   they don't need to be -- eg. for "foo*" to be used, the type foo need not
2129   be visible.
2130
2131   XXX: some of these are arch-specific, and should be factored out.
2132*/
2133
2134#define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2135#define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2136
2137// Macros to support 64-bit syscall args split into two 32 bit values
2138#if defined(VG_LITTLEENDIAN)
2139#define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2140#define MERGE64_FIRST(name) name##_low
2141#define MERGE64_SECOND(name) name##_high
2142#elif defined(VG_BIGENDIAN)
2143#define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2144#define MERGE64_FIRST(name) name##_high
2145#define MERGE64_SECOND(name) name##_low
2146#else
2147#error Unknown endianness
2148#endif
2149
2150PRE(sys_exit)
2151{
2152   ThreadState* tst;
2153   /* simple; just make this thread exit */
2154   PRINT("exit( %ld )", ARG1);
2155   PRE_REG_READ1(void, "exit", int, status);
2156   tst = VG_(get_ThreadState)(tid);
2157   /* Set the thread's status to be exiting, then claim that the
2158      syscall succeeded. */
2159   tst->exitreason = VgSrc_ExitThread;
2160   tst->os_state.exitcode = ARG1;
2161   SET_STATUS_Success(0);
2162}
2163
2164PRE(sys_ni_syscall)
2165{
2166   PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2167      VG_SYSNUM_STRING(SYSNO));
2168   PRE_REG_READ0(long, "ni_syscall");
2169   SET_STATUS_Failure( VKI_ENOSYS );
2170}
2171
2172PRE(sys_iopl)
2173{
2174   PRINT("sys_iopl ( %ld )", ARG1);
2175   PRE_REG_READ1(long, "iopl", unsigned long, level);
2176}
2177
2178PRE(sys_fsync)
2179{
2180   *flags |= SfMayBlock;
2181   PRINT("sys_fsync ( %ld )", ARG1);
2182   PRE_REG_READ1(long, "fsync", unsigned int, fd);
2183}
2184
2185PRE(sys_fdatasync)
2186{
2187   *flags |= SfMayBlock;
2188   PRINT("sys_fdatasync ( %ld )", ARG1);
2189   PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2190}
2191
2192PRE(sys_msync)
2193{
2194   *flags |= SfMayBlock;
2195   PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2196   PRE_REG_READ3(long, "msync",
2197                 unsigned long, start, vki_size_t, length, int, flags);
2198   PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2199}
2200
2201// Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2202// versions of LiS (Linux Streams).  They are not part of the kernel.
2203// Therefore, we have to provide this type ourself, rather than getting it
2204// from the kernel sources.
2205struct vki_pmsg_strbuf {
2206   int     maxlen;         /* no. of bytes in buffer */
2207   int     len;            /* no. of bytes returned */
2208   vki_caddr_t buf;        /* pointer to data */
2209};
2210PRE(sys_getpmsg)
2211{
2212   /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2213   struct vki_pmsg_strbuf *ctrl;
2214   struct vki_pmsg_strbuf *data;
2215   *flags |= SfMayBlock;
2216   PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2217   PRE_REG_READ5(int, "getpmsg",
2218                 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2219                 int *, bandp, int *, flagsp);
2220   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2221   data = (struct vki_pmsg_strbuf *)ARG3;
2222   if (ctrl && ctrl->maxlen > 0)
2223      PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2224   if (data && data->maxlen > 0)
2225      PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2226   if (ARG4)
2227      PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2228   if (ARG5)
2229      PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2230}
2231POST(sys_getpmsg)
2232{
2233   struct vki_pmsg_strbuf *ctrl;
2234   struct vki_pmsg_strbuf *data;
2235   vg_assert(SUCCESS);
2236   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2237   data = (struct vki_pmsg_strbuf *)ARG3;
2238   if (RES == 0 && ctrl && ctrl->len > 0) {
2239      POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2240   }
2241   if (RES == 0 && data && data->len > 0) {
2242      POST_MEM_WRITE( (Addr)data->buf, data->len);
2243   }
2244}
2245
2246PRE(sys_putpmsg)
2247{
2248   /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2249   struct vki_pmsg_strbuf *ctrl;
2250   struct vki_pmsg_strbuf *data;
2251   *flags |= SfMayBlock;
2252   PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2253   PRE_REG_READ5(int, "putpmsg",
2254                 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2255                 int, band, int, flags);
2256   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2257   data = (struct vki_pmsg_strbuf *)ARG3;
2258   if (ctrl && ctrl->len > 0)
2259      PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2260   if (data && data->len > 0)
2261      PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2262}
2263
2264PRE(sys_getitimer)
2265{
2266   struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2267   PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2268   PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2269
2270   PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2271   PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2272}
2273
2274POST(sys_getitimer)
2275{
2276   if (ARG2 != (Addr)NULL) {
2277      struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2278      POST_timeval_WRITE( &(value->it_interval) );
2279      POST_timeval_WRITE( &(value->it_value) );
2280   }
2281}
2282
2283PRE(sys_setitimer)
2284{
2285   PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2286   PRE_REG_READ3(long, "setitimer",
2287                 int, which,
2288                 struct itimerval *, value, struct itimerval *, ovalue);
2289   if (ARG2 != (Addr)NULL) {
2290      struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2291      PRE_timeval_READ( "setitimer(&value->it_interval)",
2292                         &(value->it_interval));
2293      PRE_timeval_READ( "setitimer(&value->it_value)",
2294                         &(value->it_value));
2295   }
2296   if (ARG3 != (Addr)NULL) {
2297      struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2298      PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2299                         &(ovalue->it_interval));
2300      PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2301                         &(ovalue->it_value));
2302   }
2303}
2304
2305POST(sys_setitimer)
2306{
2307   if (ARG3 != (Addr)NULL) {
2308      struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2309      POST_timeval_WRITE( &(ovalue->it_interval) );
2310      POST_timeval_WRITE( &(ovalue->it_value) );
2311   }
2312}
2313
2314PRE(sys_chroot)
2315{
2316   PRINT("sys_chroot ( %#lx )", ARG1);
2317   PRE_REG_READ1(long, "chroot", const char *, path);
2318   PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2319}
2320
2321PRE(sys_madvise)
2322{
2323   *flags |= SfMayBlock;
2324   PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2325   PRE_REG_READ3(long, "madvise",
2326                 unsigned long, start, vki_size_t, length, int, advice);
2327}
2328
2329#if HAVE_MREMAP
2330PRE(sys_mremap)
2331{
2332   // Nb: this is different to the glibc version described in the man pages,
2333   // which lacks the fifth 'new_address' argument.
2334   if (ARG4 & VKI_MREMAP_FIXED) {
2335      PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2336            ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2337      PRE_REG_READ5(unsigned long, "mremap",
2338                    unsigned long, old_addr, unsigned long, old_size,
2339                    unsigned long, new_size, unsigned long, flags,
2340                    unsigned long, new_addr);
2341   } else {
2342      PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2343            ARG1, (ULong)ARG2, ARG3, ARG4);
2344      PRE_REG_READ4(unsigned long, "mremap",
2345                    unsigned long, old_addr, unsigned long, old_size,
2346                    unsigned long, new_size, unsigned long, flags);
2347   }
2348   SET_STATUS_from_SysRes(
2349      do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2350   );
2351}
2352#endif /* HAVE_MREMAP */
2353
2354PRE(sys_nice)
2355{
2356   PRINT("sys_nice ( %ld )", ARG1);
2357   PRE_REG_READ1(long, "nice", int, inc);
2358}
2359
2360PRE(sys_mlock)
2361{
2362   *flags |= SfMayBlock;
2363   PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2364   PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2365}
2366
2367PRE(sys_munlock)
2368{
2369   *flags |= SfMayBlock;
2370   PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2371   PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2372}
2373
2374PRE(sys_mlockall)
2375{
2376   *flags |= SfMayBlock;
2377   PRINT("sys_mlockall ( %lx )", ARG1);
2378   PRE_REG_READ1(long, "mlockall", int, flags);
2379}
2380
2381PRE(sys_setpriority)
2382{
2383   PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2384   PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2385}
2386
2387PRE(sys_getpriority)
2388{
2389   PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2390   PRE_REG_READ2(long, "getpriority", int, which, int, who);
2391}
2392
2393PRE(sys_pwrite64)
2394{
2395   *flags |= SfMayBlock;
2396#if VG_WORDSIZE == 4
2397   PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2398         ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2399   PRE_REG_READ5(ssize_t, "pwrite64",
2400                 unsigned int, fd, const char *, buf, vki_size_t, count,
2401                 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2402#elif VG_WORDSIZE == 8
2403   PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2404         ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2405   PRE_REG_READ4(ssize_t, "pwrite64",
2406                 unsigned int, fd, const char *, buf, vki_size_t, count,
2407                 Word, offset);
2408#else
2409#  error Unexpected word size
2410#endif
2411   PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2412}
2413
2414PRE(sys_sync)
2415{
2416   *flags |= SfMayBlock;
2417   PRINT("sys_sync ( )");
2418   PRE_REG_READ0(long, "sync");
2419}
2420
2421PRE(sys_fstatfs)
2422{
2423   FUSE_COMPATIBLE_MAY_BLOCK();
2424   PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2425   PRE_REG_READ2(long, "fstatfs",
2426                 unsigned int, fd, struct statfs *, buf);
2427   PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2428}
2429
2430POST(sys_fstatfs)
2431{
2432   POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2433}
2434
2435PRE(sys_fstatfs64)
2436{
2437   FUSE_COMPATIBLE_MAY_BLOCK();
2438   PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2439   PRE_REG_READ3(long, "fstatfs64",
2440                 unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2441   PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2442}
2443POST(sys_fstatfs64)
2444{
2445   POST_MEM_WRITE( ARG3, ARG2 );
2446}
2447
2448PRE(sys_getsid)
2449{
2450   PRINT("sys_getsid ( %ld )", ARG1);
2451   PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2452}
2453
2454PRE(sys_pread64)
2455{
2456   *flags |= SfMayBlock;
2457#if VG_WORDSIZE == 4
2458   PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2459         ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2460   PRE_REG_READ5(ssize_t, "pread64",
2461                 unsigned int, fd, char *, buf, vki_size_t, count,
2462                 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2463#elif VG_WORDSIZE == 8
2464   PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2465         ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2466   PRE_REG_READ4(ssize_t, "pread64",
2467                 unsigned int, fd, char *, buf, vki_size_t, count,
2468                 Word, offset);
2469#else
2470#  error Unexpected word size
2471#endif
2472   PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2473}
2474POST(sys_pread64)
2475{
2476   vg_assert(SUCCESS);
2477   if (RES > 0) {
2478      POST_MEM_WRITE( ARG2, RES );
2479   }
2480}
2481
2482PRE(sys_mknod)
2483{
2484   FUSE_COMPATIBLE_MAY_BLOCK();
2485   PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2486   PRE_REG_READ3(long, "mknod",
2487                 const char *, pathname, int, mode, unsigned, dev);
2488   PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2489}
2490
2491PRE(sys_flock)
2492{
2493   *flags |= SfMayBlock;
2494   PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2495   PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2496}
2497
2498// Pre_read a char** argument.
2499static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
2500{
2501   while (True) {
2502      Addr a_deref;
2503      Addr* a_p = (Addr*)a;
2504      PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2505      a_deref = *a_p;
2506      if (0 == a_deref)
2507         break;
2508      PRE_MEM_RASCIIZ( s2, a_deref );
2509      a += sizeof(char*);
2510   }
2511}
2512
2513static Bool i_am_the_only_thread ( void )
2514{
2515   Int c = VG_(count_living_threads)();
2516   vg_assert(c >= 1); /* stay sane */
2517   return c == 1;
2518}
2519
2520/* Wait until all other threads disappear. */
2521void VG_(reap_threads)(ThreadId self)
2522{
2523   while (!i_am_the_only_thread()) {
2524      /* Let other thread(s) run */
2525      VG_(vg_yield)();
2526      VG_(poll_signals)(self);
2527   }
2528   vg_assert(i_am_the_only_thread());
2529}
2530
2531// XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2532// but it seems to work nonetheless...
2533PRE(sys_execve)
2534{
2535   Char*        path = NULL;       /* path to executable */
2536   Char**       envp = NULL;
2537   Char**       argv = NULL;
2538   Char**       arg2copy;
2539   Char*        launcher_basename = NULL;
2540   ThreadState* tst;
2541   Int          i, j, tot_args;
2542   SysRes       res;
2543   Bool         setuid_allowed, trace_this_child;
2544
2545   PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2546   PRE_REG_READ3(vki_off_t, "execve",
2547                 char *, filename, char **, argv, char **, envp);
2548   PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2549   if (ARG2 != 0)
2550      pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2551   if (ARG3 != 0)
2552      pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2553
2554   vg_assert(VG_(is_valid_tid)(tid));
2555   tst = VG_(get_ThreadState)(tid);
2556
2557   /* Erk.  If the exec fails, then the following will have made a
2558      mess of things which makes it hard for us to continue.  The
2559      right thing to do is piece everything together again in
2560      POST(execve), but that's close to impossible.  Instead, we make
2561      an effort to check that the execve will work before actually
2562      doing it. */
2563
2564   /* Check that the name at least begins in client-accessible storage. */
2565   if (ARG1 == 0 /* obviously bogus */
2566       || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2567      SET_STATUS_Failure( VKI_EFAULT );
2568      return;
2569   }
2570   // debug-only printing
2571   if (0) {
2572      VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2573      if (ARG2) {
2574         VG_(printf)("ARG2 = ");
2575         Int q;
2576         HChar** vec = (HChar**)ARG2;
2577         for (q = 0; vec[q]; q++)
2578            VG_(printf)("%p(%s) ", vec[q], vec[q]);
2579         VG_(printf)("\n");
2580      } else {
2581         VG_(printf)("ARG2 = null\n");
2582      }
2583   }
2584
2585   // Decide whether or not we want to follow along
2586   { // Make 'child_argv' be a pointer to the child's arg vector
2587     // (skipping the exe name)
2588     HChar** child_argv = (HChar**)ARG2;
2589     if (child_argv && child_argv[0] == NULL)
2590        child_argv = NULL;
2591     trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2592   }
2593
2594   // Do the important checks:  it is a file, is executable, permissions are
2595   // ok, etc.  We allow setuid executables to run only in the case when
2596   // we are not simulating them, that is, they to be run natively.
2597   setuid_allowed = trace_this_child  ? False  : True;
2598   res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
2599   if (sr_isError(res)) {
2600      SET_STATUS_Failure( sr_Err(res) );
2601      return;
2602   }
2603
2604   /* If we're tracing the child, and the launcher name looks bogus
2605      (possibly because launcher.c couldn't figure it out, see
2606      comments therein) then we have no option but to fail. */
2607   if (trace_this_child
2608       && (VG_(name_of_launcher) == NULL
2609           || VG_(name_of_launcher)[0] != '/')) {
2610      SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2611      return;
2612   }
2613
2614   /* After this point, we can't recover if the execve fails. */
2615   VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
2616
2617
2618   // Terminate gdbserver if it is active.
2619   if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2620      // If the child will not be traced, we need to terminate gdbserver
2621      // to cleanup the gdbserver resources (e.g. the FIFO files).
2622      // If child will be traced, we also terminate gdbserver: the new
2623      // Valgrind will start a fresh gdbserver after exec.
2624      VG_(gdbserver) (0);
2625   }
2626
2627   /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2628      this. (Really, nuke them all, since the new process will make
2629      its own new thread.) */
2630   VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2631   VG_(reap_threads)(tid);
2632
2633   // Set up the child's exe path.
2634   //
2635   if (trace_this_child) {
2636
2637      // We want to exec the launcher.  Get its pre-remembered path.
2638      path = VG_(name_of_launcher);
2639      // VG_(name_of_launcher) should have been acquired by m_main at
2640      // startup.
2641      vg_assert(path);
2642
2643      launcher_basename = VG_(strrchr)(path, '/');
2644      if (launcher_basename == NULL || launcher_basename[1] == 0) {
2645         launcher_basename = path;  // hmm, tres dubious
2646      } else {
2647         launcher_basename++;
2648      }
2649
2650   } else {
2651      path = (Char*)ARG1;
2652      if (VG_(clo_xml)) {
2653        VG_(printf_xml)("\n<execv/>\n\n</valgrindoutput>\n\n");
2654      } else {
2655        VG_(umsg)("execv called - the tool will now quit\n");
2656      }
2657   }
2658
2659   // Set up the child's environment.
2660   //
2661   // Remove the valgrind-specific stuff from the environment so the
2662   // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2663   // This is done unconditionally, since if we are tracing the child,
2664   // the child valgrind will set up the appropriate client environment.
2665   // Nb: we make a copy of the environment before trying to mangle it
2666   // as it might be in read-only memory (this was bug #101881).
2667   //
2668   // Then, if tracing the child, set VALGRIND_LIB for it.
2669   //
2670   if (ARG3 == 0) {
2671      envp = NULL;
2672   } else {
2673      envp = VG_(env_clone)( (Char**)ARG3 );
2674      if (envp == NULL) goto hosed;
2675      VG_(env_remove_valgrind_env_stuff)( envp );
2676   }
2677
2678   if (trace_this_child) {
2679      // Set VALGRIND_LIB in ARG3 (the environment)
2680      VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2681   }
2682
2683   // Set up the child's args.  If not tracing it, they are
2684   // simply ARG2.  Otherwise, they are
2685   //
2686   // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2687   //
2688   // except that the first VG_(args_for_valgrind_noexecpass) args
2689   // are omitted.
2690   //
2691   if (!trace_this_child) {
2692      argv = (Char**)ARG2;
2693   } else {
2694      vg_assert( VG_(args_for_valgrind) );
2695      vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2696      vg_assert( VG_(args_for_valgrind_noexecpass)
2697                   <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2698      /* how many args in total will there be? */
2699      // launcher basename
2700      tot_args = 1;
2701      // V's args
2702      tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2703      tot_args -= VG_(args_for_valgrind_noexecpass);
2704      // name of client exe
2705      tot_args++;
2706      // args for client exe, skipping [0]
2707      arg2copy = (Char**)ARG2;
2708      if (arg2copy && arg2copy[0]) {
2709         for (i = 1; arg2copy[i]; i++)
2710            tot_args++;
2711      }
2712      // allocate
2713      argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2714                          (tot_args+1) * sizeof(HChar*) );
2715      if (argv == 0) goto hosed;
2716      // copy
2717      j = 0;
2718      argv[j++] = launcher_basename;
2719      for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2720         if (i < VG_(args_for_valgrind_noexecpass))
2721            continue;
2722         argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2723      }
2724      argv[j++] = (Char*)ARG1;
2725      if (arg2copy && arg2copy[0])
2726         for (i = 1; arg2copy[i]; i++)
2727            argv[j++] = arg2copy[i];
2728      argv[j++] = NULL;
2729      // check
2730      vg_assert(j == tot_args+1);
2731   }
2732
2733   /* restore the DATA rlimit for the child */
2734   VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2735
2736   /*
2737      Set the signal state up for exec.
2738
2739      We need to set the real signal state to make sure the exec'd
2740      process gets SIG_IGN properly.
2741
2742      Also set our real sigmask to match the client's sigmask so that
2743      the exec'd child will get the right mask.  First we need to
2744      clear out any pending signals so they they don't get delivered,
2745      which would confuse things.
2746
2747      XXX This is a bug - the signals should remain pending, and be
2748      delivered to the new process after exec.  There's also a
2749      race-condition, since if someone delivers us a signal between
2750      the sigprocmask and the execve, we'll still get the signal. Oh
2751      well.
2752   */
2753   {
2754      vki_sigset_t allsigs;
2755      vki_siginfo_t info;
2756
2757      /* What this loop does: it queries SCSS (the signal state that
2758         the client _thinks_ the kernel is in) by calling
2759         VG_(do_sys_sigaction), and modifies the real kernel signal
2760         state accordingly. */
2761      for (i = 1; i < VG_(max_signal); i++) {
2762         vki_sigaction_fromK_t sa_f;
2763         vki_sigaction_toK_t   sa_t;
2764         VG_(do_sys_sigaction)(i, NULL, &sa_f);
2765         VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2766         if (sa_t.ksa_handler == VKI_SIG_IGN)
2767            VG_(sigaction)(i, &sa_t, NULL);
2768         else {
2769            sa_t.ksa_handler = VKI_SIG_DFL;
2770            VG_(sigaction)(i, &sa_t, NULL);
2771         }
2772      }
2773
2774      VG_(sigfillset)(&allsigs);
2775      while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2776         ;
2777
2778      VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2779   }
2780
2781   if (0) {
2782      Char **cpp;
2783      VG_(printf)("exec: %s\n", path);
2784      for (cpp = argv; cpp && *cpp; cpp++)
2785         VG_(printf)("argv: %s\n", *cpp);
2786      if (0)
2787         for (cpp = envp; cpp && *cpp; cpp++)
2788            VG_(printf)("env: %s\n", *cpp);
2789   }
2790
2791   SET_STATUS_from_SysRes(
2792      VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2793   );
2794
2795   /* If we got here, then the execve failed.  We've already made way
2796      too much of a mess to continue, so we have to abort. */
2797  hosed:
2798   vg_assert(FAILURE);
2799   VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2800                ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2801   VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2802                            "execve() failing, so I'm dying.\n");
2803   VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2804                            "or work out how to recover.\n");
2805   VG_(exit)(101);
2806}
2807
2808PRE(sys_access)
2809{
2810   PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2811   PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2812   PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2813}
2814
2815PRE(sys_alarm)
2816{
2817   PRINT("sys_alarm ( %ld )", ARG1);
2818   PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2819}
2820
2821PRE(sys_brk)
2822{
2823   Addr brk_limit = VG_(brk_limit);
2824   Addr brk_new;
2825
2826   /* libc   says: int   brk(void *end_data_segment);
2827      kernel says: void* brk(void* end_data_segment);  (more or less)
2828
2829      libc returns 0 on success, and -1 (and sets errno) on failure.
2830      Nb: if you ask to shrink the dataseg end below what it
2831      currently is, that always succeeds, even if the dataseg end
2832      doesn't actually change (eg. brk(0)).  Unless it seg faults.
2833
2834      Kernel returns the new dataseg end.  If the brk() failed, this
2835      will be unchanged from the old one.  That's why calling (kernel)
2836      brk(0) gives the current dataseg end (libc brk() just returns
2837      zero in that case).
2838
2839      Both will seg fault if you shrink it back into a text segment.
2840   */
2841   PRINT("sys_brk ( %#lx )", ARG1);
2842   PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2843
2844   brk_new = do_brk(ARG1);
2845   SET_STATUS_Success( brk_new );
2846
2847   if (brk_new == ARG1) {
2848      /* brk() succeeded */
2849      if (brk_new < brk_limit) {
2850         /* successfully shrunk the data segment. */
2851         VG_TRACK( die_mem_brk, (Addr)ARG1,
2852		   brk_limit-ARG1 );
2853      } else
2854      if (brk_new > brk_limit) {
2855         /* successfully grew the data segment */
2856         VG_TRACK( new_mem_brk, brk_limit,
2857                   ARG1-brk_limit, tid );
2858      }
2859   } else {
2860      /* brk() failed */
2861      vg_assert(brk_limit == brk_new);
2862   }
2863}
2864
2865PRE(sys_chdir)
2866{
2867   FUSE_COMPATIBLE_MAY_BLOCK();
2868   PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2869   PRE_REG_READ1(long, "chdir", const char *, path);
2870   PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2871}
2872
2873PRE(sys_chmod)
2874{
2875   FUSE_COMPATIBLE_MAY_BLOCK();
2876   PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2877   PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2878   PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2879}
2880
2881PRE(sys_chown)
2882{
2883   FUSE_COMPATIBLE_MAY_BLOCK();
2884   PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2885   PRE_REG_READ3(long, "chown",
2886                 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2887   PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2888}
2889
2890PRE(sys_lchown)
2891{
2892   FUSE_COMPATIBLE_MAY_BLOCK();
2893   PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2894   PRE_REG_READ3(long, "lchown",
2895                 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2896   PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2897}
2898
2899PRE(sys_close)
2900{
2901   FUSE_COMPATIBLE_MAY_BLOCK();
2902   PRINT("sys_close ( %ld )", ARG1);
2903   PRE_REG_READ1(long, "close", unsigned int, fd);
2904
2905   /* Detect and negate attempts by the client to close Valgrind's log fd */
2906   if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2907        /* If doing -d style logging (which is to fd=2), don't
2908           allow that to be closed either. */
2909        || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
2910      SET_STATUS_Failure( VKI_EBADF );
2911}
2912
2913POST(sys_close)
2914{
2915   if (VG_(clo_track_fds)) record_fd_close(ARG1);
2916}
2917
2918PRE(sys_dup)
2919{
2920   PRINT("sys_dup ( %ld )", ARG1);
2921   PRE_REG_READ1(long, "dup", unsigned int, oldfd);
2922}
2923
2924POST(sys_dup)
2925{
2926   vg_assert(SUCCESS);
2927   if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
2928      VG_(close)(RES);
2929      SET_STATUS_Failure( VKI_EMFILE );
2930   } else {
2931      if (VG_(clo_track_fds))
2932         ML_(record_fd_open_named)(tid, RES);
2933   }
2934}
2935
2936PRE(sys_dup2)
2937{
2938   PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
2939   PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
2940   if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
2941      SET_STATUS_Failure( VKI_EBADF );
2942}
2943
2944POST(sys_dup2)
2945{
2946   vg_assert(SUCCESS);
2947   if (VG_(clo_track_fds))
2948      ML_(record_fd_open_named)(tid, RES);
2949}
2950
2951PRE(sys_fchdir)
2952{
2953   FUSE_COMPATIBLE_MAY_BLOCK();
2954   PRINT("sys_fchdir ( %ld )", ARG1);
2955   PRE_REG_READ1(long, "fchdir", unsigned int, fd);
2956}
2957
2958PRE(sys_fchown)
2959{
2960   FUSE_COMPATIBLE_MAY_BLOCK();
2961   PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
2962   PRE_REG_READ3(long, "fchown",
2963                 unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
2964}
2965
2966PRE(sys_fchmod)
2967{
2968   FUSE_COMPATIBLE_MAY_BLOCK();
2969   PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
2970   PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
2971}
2972
2973PRE(sys_newfstat)
2974{
2975   FUSE_COMPATIBLE_MAY_BLOCK();
2976   PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
2977   PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
2978   PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
2979}
2980
2981POST(sys_newfstat)
2982{
2983   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
2984}
2985
2986static vki_sigset_t fork_saved_mask;
2987
2988// In Linux, the sys_fork() function varies across architectures, but we
2989// ignore the various args it gets, and so it looks arch-neutral.  Hmm.
2990PRE(sys_fork)
2991{
2992   Bool is_child;
2993   Int child_pid;
2994   vki_sigset_t mask;
2995
2996   PRINT("sys_fork ( )");
2997   PRE_REG_READ0(long, "fork");
2998
2999   /* Block all signals during fork, so that we can fix things up in
3000      the child without being interrupted. */
3001   VG_(sigfillset)(&mask);
3002   VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3003
3004   SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3005
3006   if (!SUCCESS) return;
3007
3008#if defined(VGO_linux)
3009   // RES is 0 for child, non-0 (the child's PID) for parent.
3010   is_child = ( RES == 0 ? True : False );
3011   child_pid = ( is_child ? -1 : RES );
3012#elif defined(VGO_darwin)
3013   // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3014   is_child = RESHI;
3015   child_pid = RES;
3016#else
3017#  error Unknown OS
3018#endif
3019
3020   VG_(do_atfork_pre)(tid);
3021
3022   if (is_child) {
3023      VG_(do_atfork_child)(tid);
3024
3025      /* restore signal mask */
3026      VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3027
3028      /* If --child-silent-after-fork=yes was specified, set the
3029         output file descriptors to 'impossible' values.  This is
3030         noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3031         duly stops writing any further output. */
3032      if (VG_(clo_child_silent_after_fork)) {
3033         if (!VG_(log_output_sink).is_socket)
3034            VG_(log_output_sink).fd = -1;
3035         if (!VG_(xml_output_sink).is_socket)
3036            VG_(xml_output_sink).fd = -1;
3037      }
3038
3039   } else {
3040      VG_(do_atfork_parent)(tid);
3041
3042      PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3043
3044      /* restore signal mask */
3045      VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3046   }
3047}
3048
3049PRE(sys_ftruncate)
3050{
3051   *flags |= SfMayBlock;
3052   PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3053   PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3054}
3055
3056PRE(sys_truncate)
3057{
3058   *flags |= SfMayBlock;
3059   PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3060   PRE_REG_READ2(long, "truncate",
3061                 const char *, path, unsigned long, length);
3062   PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3063}
3064
3065PRE(sys_ftruncate64)
3066{
3067   *flags |= SfMayBlock;
3068#if VG_WORDSIZE == 4
3069   PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3070   PRE_REG_READ3(long, "ftruncate64",
3071                 unsigned int, fd,
3072                 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3073#else
3074   PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3075   PRE_REG_READ2(long, "ftruncate64",
3076                 unsigned int,fd, UWord,length);
3077#endif
3078}
3079
3080PRE(sys_truncate64)
3081{
3082   *flags |= SfMayBlock;
3083#if VG_WORDSIZE == 4
3084   PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3085   PRE_REG_READ3(long, "truncate64",
3086                 const char *, path,
3087                 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3088#else
3089   PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3090   PRE_REG_READ2(long, "truncate64",
3091                 const char *,path, UWord,length);
3092#endif
3093   PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3094}
3095
3096PRE(sys_getdents)
3097{
3098   *flags |= SfMayBlock;
3099   PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3100   PRE_REG_READ3(long, "getdents",
3101                 unsigned int, fd, struct linux_dirent *, dirp,
3102                 unsigned int, count);
3103   PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3104}
3105
3106POST(sys_getdents)
3107{
3108   vg_assert(SUCCESS);
3109   if (RES > 0)
3110      POST_MEM_WRITE( ARG2, RES );
3111}
3112
3113PRE(sys_getdents64)
3114{
3115   *flags |= SfMayBlock;
3116   PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3117   PRE_REG_READ3(long, "getdents64",
3118                 unsigned int, fd, struct linux_dirent64 *, dirp,
3119                 unsigned int, count);
3120   PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3121}
3122
3123POST(sys_getdents64)
3124{
3125   vg_assert(SUCCESS);
3126   if (RES > 0)
3127      POST_MEM_WRITE( ARG2, RES );
3128}
3129
3130PRE(sys_getgroups)
3131{
3132   PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3133   PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3134   if (ARG1 > 0)
3135      PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3136}
3137
3138POST(sys_getgroups)
3139{
3140   vg_assert(SUCCESS);
3141   if (ARG1 > 0 && RES > 0)
3142      POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3143}
3144
3145PRE(sys_getcwd)
3146{
3147   // Comment from linux/fs/dcache.c:
3148   //   NOTE! The user-level library version returns a character pointer.
3149   //   The kernel system call just returns the length of the buffer filled
3150   //   (which includes the ending '\0' character), or a negative error
3151   //   value.
3152   // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3153   PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3154   PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3155   PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3156}
3157
3158POST(sys_getcwd)
3159{
3160   vg_assert(SUCCESS);
3161   if (RES != (Addr)NULL)
3162      POST_MEM_WRITE( ARG1, RES );
3163}
3164
3165PRE(sys_geteuid)
3166{
3167   PRINT("sys_geteuid ( )");
3168   PRE_REG_READ0(long, "geteuid");
3169}
3170
3171PRE(sys_getegid)
3172{
3173   PRINT("sys_getegid ( )");
3174   PRE_REG_READ0(long, "getegid");
3175}
3176
3177PRE(sys_getgid)
3178{
3179   PRINT("sys_getgid ( )");
3180   PRE_REG_READ0(long, "getgid");
3181}
3182
3183PRE(sys_getpid)
3184{
3185   PRINT("sys_getpid ()");
3186   PRE_REG_READ0(long, "getpid");
3187}
3188
3189PRE(sys_getpgid)
3190{
3191   PRINT("sys_getpgid ( %ld )", ARG1);
3192   PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3193}
3194
3195PRE(sys_getpgrp)
3196{
3197   PRINT("sys_getpgrp ()");
3198   PRE_REG_READ0(long, "getpgrp");
3199}
3200
3201PRE(sys_getppid)
3202{
3203   PRINT("sys_getppid ()");
3204   PRE_REG_READ0(long, "getppid");
3205}
3206
3207static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3208{
3209   POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3210
3211#ifdef _RLIMIT_POSIX_FLAG
3212   // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3213   // Unset it here to make the switch case below work correctly.
3214   a1 &= ~_RLIMIT_POSIX_FLAG;
3215#endif
3216
3217   switch (a1) {
3218   case VKI_RLIMIT_NOFILE:
3219      ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3220      ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3221      break;
3222
3223   case VKI_RLIMIT_DATA:
3224      *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3225      break;
3226
3227   case VKI_RLIMIT_STACK:
3228      *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3229      break;
3230   }
3231}
3232
3233PRE(sys_old_getrlimit)
3234{
3235   PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3236   PRE_REG_READ2(long, "old_getrlimit",
3237                 unsigned int, resource, struct rlimit *, rlim);
3238   PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3239}
3240
3241POST(sys_old_getrlimit)
3242{
3243   common_post_getrlimit(tid, ARG1, ARG2);
3244}
3245
3246PRE(sys_getrlimit)
3247{
3248   PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3249   PRE_REG_READ2(long, "getrlimit",
3250                 unsigned int, resource, struct rlimit *, rlim);
3251   PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3252}
3253
3254POST(sys_getrlimit)
3255{
3256   common_post_getrlimit(tid, ARG1, ARG2);
3257}
3258
3259PRE(sys_getrusage)
3260{
3261   PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3262   PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3263   PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3264}
3265
3266POST(sys_getrusage)
3267{
3268   vg_assert(SUCCESS);
3269   if (RES == 0)
3270      POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3271}
3272
3273PRE(sys_gettimeofday)
3274{
3275   PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3276   PRE_REG_READ2(long, "gettimeofday",
3277                 struct timeval *, tv, struct timezone *, tz);
3278   // GrP fixme does darwin write to *tz anymore?
3279   if (ARG1 != 0)
3280      PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3281   if (ARG2 != 0)
3282      PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3283}
3284
3285POST(sys_gettimeofday)
3286{
3287   vg_assert(SUCCESS);
3288   if (RES == 0) {
3289      if (ARG1 != 0)
3290         POST_timeval_WRITE( ARG1 );
3291      if (ARG2 != 0)
3292	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3293   }
3294}
3295
3296PRE(sys_settimeofday)
3297{
3298   PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3299   PRE_REG_READ2(long, "settimeofday",
3300                 struct timeval *, tv, struct timezone *, tz);
3301   if (ARG1 != 0)
3302      PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3303   if (ARG2 != 0) {
3304      PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3305      /* maybe should warn if tz->tz_dsttime is non-zero? */
3306   }
3307}
3308
3309PRE(sys_getuid)
3310{
3311   PRINT("sys_getuid ( )");
3312   PRE_REG_READ0(long, "getuid");
3313}
3314
3315void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3316{
3317   /* We don't have any specific information on it, so
3318      try to do something reasonable based on direction and
3319      size bits.  The encoding scheme is described in
3320      /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3321
3322      According to Simon Hausmann, _IOC_READ means the kernel
3323      writes a value to the ioctl value passed from the user
3324      space and the other way around with _IOC_WRITE. */
3325
3326   UInt dir  = _VKI_IOC_DIR(request);
3327   UInt size = _VKI_IOC_SIZE(request);
3328   if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3329      /*
3330       * Be very lax about ioctl handling; the only
3331       * assumption is that the size is correct. Doesn't
3332       * require the full buffer to be initialized when
3333       * writing.  Without this, using some device
3334       * drivers with a large number of strange ioctl
3335       * commands becomes very tiresome.
3336       */
3337   } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3338      //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3339      //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3340      static Int moans = 3;
3341      if (moans > 0 && !VG_(clo_xml)) {
3342         moans--;
3343         VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3344                   " with no size/direction hints\n", request);
3345         VG_(umsg)("   This could cause spurious value errors to appear.\n");
3346         VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3347                   "guidance on writing a proper wrapper.\n" );
3348      }
3349   } else {
3350      //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3351      //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3352      if ((dir & _VKI_IOC_WRITE) && size > 0)
3353         PRE_MEM_READ( "ioctl(generic)", arg, size);
3354      if ((dir & _VKI_IOC_READ) && size > 0)
3355         PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3356   }
3357}
3358
3359void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3360{
3361   /* We don't have any specific information on it, so
3362      try to do something reasonable based on direction and
3363      size bits.  The encoding scheme is described in
3364      /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3365
3366      According to Simon Hausmann, _IOC_READ means the kernel
3367      writes a value to the ioctl value passed from the user
3368      space and the other way around with _IOC_WRITE. */
3369
3370   UInt dir  = _VKI_IOC_DIR(request);
3371   UInt size = _VKI_IOC_SIZE(request);
3372   if (size > 0 && (dir & _VKI_IOC_READ)
3373       && res == 0
3374       && arg != (Addr)NULL)
3375   {
3376      POST_MEM_WRITE(arg, size);
3377   }
3378}
3379
3380/*
3381   If we're sending a SIGKILL to one of our own threads, then simulate
3382   it rather than really sending the signal, so that the target thread
3383   gets a chance to clean up.  Returns True if we did the killing (or
3384   no killing is necessary), and False if the caller should use the
3385   normal kill syscall.
3386
3387   "pid" is any pid argument which can be passed to kill; group kills
3388   (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3389   they'll most likely hit all the threads and we won't need to worry
3390   about cleanup.  In truth, we can't fully emulate these multicast
3391   kills.
3392
3393   "tgid" is a thread group id.  If it is not -1, then the target
3394   thread must be in that thread group.
3395 */
3396Bool ML_(do_sigkill)(Int pid, Int tgid)
3397{
3398   ThreadState *tst;
3399   ThreadId tid;
3400
3401   if (pid <= 0)
3402      return False;
3403
3404   tid = VG_(lwpid_to_vgtid)(pid);
3405   if (tid == VG_INVALID_THREADID)
3406      return False;		/* none of our threads */
3407
3408   tst = VG_(get_ThreadState)(tid);
3409   if (tst == NULL || tst->status == VgTs_Empty)
3410      return False;		/* hm, shouldn't happen */
3411
3412   if (tgid != -1 && tst->os_state.threadgroup != tgid)
3413      return False;		/* not the right thread group */
3414
3415   /* Check to see that the target isn't already exiting. */
3416   if (!VG_(is_exiting)(tid)) {
3417      if (VG_(clo_trace_signals))
3418	 VG_(message)(Vg_DebugMsg,
3419                      "Thread %d being killed with SIGKILL\n",
3420                      tst->tid);
3421
3422      tst->exitreason = VgSrc_FatalSig;
3423      tst->os_state.fatalsig = VKI_SIGKILL;
3424
3425      if (!VG_(is_running_thread)(tid))
3426	 VG_(get_thread_out_of_syscall)(tid);
3427   }
3428
3429   return True;
3430}
3431
3432PRE(sys_kill)
3433{
3434   PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3435   PRE_REG_READ2(long, "kill", int, pid, int, sig);
3436   if (!ML_(client_signal_OK)(ARG2)) {
3437      SET_STATUS_Failure( VKI_EINVAL );
3438      return;
3439   }
3440
3441   /* If we're sending SIGKILL, check to see if the target is one of
3442      our threads and handle it specially. */
3443   if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3444      SET_STATUS_Success(0);
3445   else
3446      /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3447         affecting how posix-compliant the call is.  I guess it is
3448         harmless to pass the 3rd arg on other platforms; hence pass
3449         it on all. */
3450      SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3451
3452   if (VG_(clo_trace_signals))
3453      VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3454		   ARG2, ARG1);
3455
3456   /* This kill might have given us a pending signal.  Ask for a check once
3457      the syscall is done. */
3458   *flags |= SfPollAfter;
3459}
3460
3461PRE(sys_link)
3462{
3463   *flags |= SfMayBlock;
3464   PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3465   PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3466   PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3467   PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3468}
3469
3470PRE(sys_newlstat)
3471{
3472   PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3473   PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3474   PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3475   PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3476}
3477
3478POST(sys_newlstat)
3479{
3480   vg_assert(SUCCESS);
3481   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3482}
3483
3484PRE(sys_mkdir)
3485{
3486   *flags |= SfMayBlock;
3487   PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3488   PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3489   PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3490}
3491
3492PRE(sys_mprotect)
3493{
3494   PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3495   PRE_REG_READ3(long, "mprotect",
3496                 unsigned long, addr, vki_size_t, len, unsigned long, prot);
3497
3498   if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3499      SET_STATUS_Failure( VKI_ENOMEM );
3500   }
3501#if defined(VKI_PROT_GROWSDOWN)
3502   else
3503   if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3504      /* Deal with mprotects on growable stack areas.
3505
3506         The critical files to understand all this are mm/mprotect.c
3507         in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3508         glibc.
3509
3510         The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3511         round the start/end address of mprotect to the start/end of
3512         the underlying vma and glibc uses that as an easy way to
3513         change the protection of the stack by calling mprotect on the
3514         last page of the stack with PROT_GROWSDOWN set.
3515
3516         The sanity check provided by the kernel is that the vma must
3517         have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3518      UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3519      NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3520      NSegment const *rseg;
3521
3522      vg_assert(aseg);
3523
3524      if (grows == VKI_PROT_GROWSDOWN) {
3525         rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
3526         if (rseg &&
3527             rseg->kind == SkResvn &&
3528             rseg->smode == SmUpper &&
3529             rseg->end+1 == aseg->start) {
3530            Addr end = ARG1 + ARG2;
3531            ARG1 = aseg->start;
3532            ARG2 = end - aseg->start;
3533            ARG3 &= ~VKI_PROT_GROWSDOWN;
3534         } else {
3535            SET_STATUS_Failure( VKI_EINVAL );
3536         }
3537      } else if (grows == VKI_PROT_GROWSUP) {
3538         rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
3539         if (rseg &&
3540             rseg->kind == SkResvn &&
3541             rseg->smode == SmLower &&
3542             aseg->end+1 == rseg->start) {
3543            ARG2 = aseg->end - ARG1 + 1;
3544            ARG3 &= ~VKI_PROT_GROWSUP;
3545         } else {
3546            SET_STATUS_Failure( VKI_EINVAL );
3547         }
3548      } else {
3549         /* both GROWSUP and GROWSDOWN */
3550         SET_STATUS_Failure( VKI_EINVAL );
3551      }
3552   }
3553#endif   // defined(VKI_PROT_GROWSDOWN)
3554}
3555
3556POST(sys_mprotect)
3557{
3558   Addr a    = ARG1;
3559   SizeT len = ARG2;
3560   Int  prot = ARG3;
3561
3562   ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3563}
3564
3565PRE(sys_munmap)
3566{
3567   if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3568   PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3569   PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3570
3571   if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3572      SET_STATUS_Failure( VKI_EINVAL );
3573}
3574
3575POST(sys_munmap)
3576{
3577   Addr  a   = ARG1;
3578   SizeT len = ARG2;
3579
3580   ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3581}
3582
3583PRE(sys_mincore)
3584{
3585   PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3586   PRE_REG_READ3(long, "mincore",
3587                 unsigned long, start, vki_size_t, length,
3588                 unsigned char *, vec);
3589   PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3590}
3591POST(sys_mincore)
3592{
3593   POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3594}
3595
3596PRE(sys_nanosleep)
3597{
3598   *flags |= SfMayBlock|SfPostOnFail;
3599   PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3600   PRE_REG_READ2(long, "nanosleep",
3601                 struct timespec *, req, struct timespec *, rem);
3602   PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3603   if (ARG2 != 0)
3604      PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3605}
3606
3607POST(sys_nanosleep)
3608{
3609   vg_assert(SUCCESS || FAILURE);
3610   if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3611      POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3612}
3613
3614PRE(sys_open)
3615{
3616   if (ARG2 & VKI_O_CREAT) {
3617      // 3-arg version
3618      PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3619      PRE_REG_READ3(long, "open",
3620                    const char *, filename, int, flags, int, mode);
3621   } else {
3622      // 2-arg version
3623      PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3624      PRE_REG_READ2(long, "open",
3625                    const char *, filename, int, flags);
3626   }
3627   PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3628
3629#if defined(VGO_linux)
3630   /* Handle the case where the open is of /proc/self/cmdline or
3631      /proc/<pid>/cmdline, and just give it a copy of the fd for the
3632      fake file we cooked up at startup (in m_main).  Also, seek the
3633      cloned fd back to the start. */
3634   {
3635      HChar  name[30];
3636      Char*  arg1s = (Char*) ARG1;
3637      SysRes sres;
3638
3639      VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3640      if (ML_(safe_to_deref)( arg1s, 1 ) &&
3641          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3642         )
3643      {
3644         sres = VG_(dup)( VG_(cl_cmdline_fd) );
3645         SET_STATUS_from_SysRes( sres );
3646         if (!sr_isError(sres)) {
3647            OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3648            if (off < 0)
3649               SET_STATUS_Failure( VKI_EMFILE );
3650         }
3651         return;
3652      }
3653   }
3654#endif // defined(VGO_linux)
3655
3656   /* Otherwise handle normally */
3657   *flags |= SfMayBlock;
3658}
3659
3660POST(sys_open)
3661{
3662   vg_assert(SUCCESS);
3663   if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3664      VG_(close)(RES);
3665      SET_STATUS_Failure( VKI_EMFILE );
3666   } else {
3667      if (VG_(clo_track_fds))
3668         ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3669   }
3670}
3671
3672PRE(sys_read)
3673{
3674   *flags |= SfMayBlock;
3675   PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3676   PRE_REG_READ3(ssize_t, "read",
3677                 unsigned int, fd, char *, buf, vki_size_t, count);
3678
3679   if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3680      SET_STATUS_Failure( VKI_EBADF );
3681   else
3682      PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3683}
3684
3685POST(sys_read)
3686{
3687   vg_assert(SUCCESS);
3688   POST_MEM_WRITE( ARG2, RES );
3689}
3690
3691PRE(sys_write)
3692{
3693   Bool ok;
3694   *flags |= SfMayBlock;
3695   PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3696   PRE_REG_READ3(ssize_t, "write",
3697                 unsigned int, fd, const char *, buf, vki_size_t, count);
3698   /* check to see if it is allowed.  If not, try for an exemption from
3699      --sim-hints=enable-outer (used for self hosting). */
3700   ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3701   if (!ok && ARG1 == 2/*stderr*/
3702           && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3703      ok = True;
3704   if (!ok)
3705      SET_STATUS_Failure( VKI_EBADF );
3706   else
3707      PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3708}
3709
3710PRE(sys_creat)
3711{
3712   *flags |= SfMayBlock;
3713   PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3714   PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3715   PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3716}
3717
3718POST(sys_creat)
3719{
3720   vg_assert(SUCCESS);
3721   if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3722      VG_(close)(RES);
3723      SET_STATUS_Failure( VKI_EMFILE );
3724   } else {
3725      if (VG_(clo_track_fds))
3726         ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3727   }
3728}
3729
3730PRE(sys_poll)
3731{
3732   /* struct pollfd {
3733        int fd;           -- file descriptor
3734        short events;     -- requested events
3735        short revents;    -- returned events
3736      };
3737      int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3738   */
3739   UInt i;
3740   struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3741   *flags |= SfMayBlock;
3742   PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3743   PRE_REG_READ3(long, "poll",
3744                 struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3745
3746   for (i = 0; i < ARG2; i++) {
3747      PRE_MEM_READ( "poll(ufds.fd)",
3748                    (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3749      PRE_MEM_READ( "poll(ufds.events)",
3750                    (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3751      PRE_MEM_WRITE( "poll(ufds.reventss)",
3752                     (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3753   }
3754}
3755
3756POST(sys_poll)
3757{
3758   if (RES >= 0) {
3759      UInt i;
3760      struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3761      for (i = 0; i < ARG2; i++)
3762	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3763   }
3764}
3765
3766PRE(sys_readlink)
3767{
3768   FUSE_COMPATIBLE_MAY_BLOCK();
3769   Word saved = SYSNO;
3770
3771   PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3772   PRE_REG_READ3(long, "readlink",
3773                 const char *, path, char *, buf, int, bufsiz);
3774   PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3775   PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3776
3777   {
3778#if defined(VGO_linux)
3779      /*
3780       * Handle the case where readlink is looking at /proc/self/exe or
3781       * /proc/<pid>/exe.
3782       */
3783      HChar name[25];
3784      Char* arg1s = (Char*) ARG1;
3785      VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3786      if (ML_(safe_to_deref)(arg1s, 1) &&
3787          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3788         )
3789      {
3790         VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3791         SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3792                                                         ARG2, ARG3));
3793      } else
3794#endif // defined(VGO_linux)
3795      {
3796         /* Normal case */
3797         SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3798      }
3799   }
3800
3801   if (SUCCESS && RES > 0)
3802      POST_MEM_WRITE( ARG2, RES );
3803}
3804
3805PRE(sys_readv)
3806{
3807   Int i;
3808   struct vki_iovec * vec;
3809   *flags |= SfMayBlock;
3810   PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3811   PRE_REG_READ3(ssize_t, "readv",
3812                 unsigned long, fd, const struct iovec *, vector,
3813                 unsigned long, count);
3814   if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3815      SET_STATUS_Failure( VKI_EBADF );
3816   } else {
3817      PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3818
3819      if (ARG2 != 0) {
3820         /* ToDo: don't do any of the following if the vector is invalid */
3821         vec = (struct vki_iovec *)ARG2;
3822         for (i = 0; i < (Int)ARG3; i++)
3823            PRE_MEM_WRITE( "readv(vector[...])",
3824                           (Addr)vec[i].iov_base, vec[i].iov_len );
3825      }
3826   }
3827}
3828
3829POST(sys_readv)
3830{
3831   vg_assert(SUCCESS);
3832   if (RES > 0) {
3833      Int i;
3834      struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3835      Int remains = RES;
3836
3837      /* RES holds the number of bytes read. */
3838      for (i = 0; i < (Int)ARG3; i++) {
3839	 Int nReadThisBuf = vec[i].iov_len;
3840	 if (nReadThisBuf > remains) nReadThisBuf = remains;
3841	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3842	 remains -= nReadThisBuf;
3843	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3844      }
3845   }
3846}
3847
3848PRE(sys_rename)
3849{
3850   FUSE_COMPATIBLE_MAY_BLOCK();
3851   PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3852   PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3853   PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3854   PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3855}
3856
3857PRE(sys_rmdir)
3858{
3859   *flags |= SfMayBlock;
3860   PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3861   PRE_REG_READ1(long, "rmdir", const char *, pathname);
3862   PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3863}
3864
3865PRE(sys_select)
3866{
3867   *flags |= SfMayBlock;
3868   PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3869   PRE_REG_READ5(long, "select",
3870                 int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3871                 vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3872   // XXX: this possibly understates how much memory is read.
3873   if (ARG2 != 0)
3874      PRE_MEM_READ( "select(readfds)",
3875		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3876   if (ARG3 != 0)
3877      PRE_MEM_READ( "select(writefds)",
3878		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3879   if (ARG4 != 0)
3880      PRE_MEM_READ( "select(exceptfds)",
3881		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3882   if (ARG5 != 0)
3883      PRE_timeval_READ( "select(timeout)", ARG5 );
3884}
3885
3886PRE(sys_setgid)
3887{
3888   PRINT("sys_setgid ( %ld )", ARG1);
3889   PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
3890}
3891
3892PRE(sys_setsid)
3893{
3894   PRINT("sys_setsid ( )");
3895   PRE_REG_READ0(long, "setsid");
3896}
3897
3898PRE(sys_setgroups)
3899{
3900   PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
3901   PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
3902   if (ARG1 > 0)
3903      PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3904}
3905
3906PRE(sys_setpgid)
3907{
3908   PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
3909   PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
3910}
3911
3912PRE(sys_setregid)
3913{
3914   PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
3915   PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
3916}
3917
3918PRE(sys_setreuid)
3919{
3920   PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
3921   PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
3922}
3923
3924PRE(sys_setrlimit)
3925{
3926   UWord arg1 = ARG1;
3927   PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
3928   PRE_REG_READ2(long, "setrlimit",
3929                 unsigned int, resource, struct rlimit *, rlim);
3930   PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3931
3932#ifdef _RLIMIT_POSIX_FLAG
3933   // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
3934   // Unset it here to make the if statements below work correctly.
3935   arg1 &= ~_RLIMIT_POSIX_FLAG;
3936#endif
3937
3938   if (arg1 == VKI_RLIMIT_NOFILE) {
3939      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
3940          ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
3941         SET_STATUS_Failure( VKI_EPERM );
3942      }
3943      else {
3944         VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
3945         SET_STATUS_Success( 0 );
3946      }
3947   }
3948   else if (arg1 == VKI_RLIMIT_DATA) {
3949      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
3950          ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
3951         SET_STATUS_Failure( VKI_EPERM );
3952      }
3953      else {
3954         VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
3955         SET_STATUS_Success( 0 );
3956      }
3957   }
3958   else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
3959      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
3960          ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
3961         SET_STATUS_Failure( VKI_EPERM );
3962      }
3963      else {
3964         VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
3965         VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
3966         SET_STATUS_Success( 0 );
3967      }
3968   }
3969}
3970
3971PRE(sys_setuid)
3972{
3973   PRINT("sys_setuid ( %ld )", ARG1);
3974   PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
3975}
3976
3977PRE(sys_newstat)
3978{
3979   PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3980   PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
3981   PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
3982   PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
3983}
3984
3985POST(sys_newstat)
3986{
3987   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3988}
3989
3990PRE(sys_statfs)
3991{
3992   PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
3993   PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
3994   PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
3995   PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
3996}
3997POST(sys_statfs)
3998{
3999   POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4000}
4001
4002PRE(sys_statfs64)
4003{
4004   PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4005   PRE_REG_READ3(long, "statfs64",
4006                 const char *, path, vki_size_t, size, struct statfs64 *, buf);
4007   PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4008   PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4009}
4010POST(sys_statfs64)
4011{
4012   POST_MEM_WRITE( ARG3, ARG2 );
4013}
4014
4015PRE(sys_symlink)
4016{
4017   *flags |= SfMayBlock;
4018   PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4019   PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4020   PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4021   PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4022}
4023
4024PRE(sys_time)
4025{
4026   /* time_t time(time_t *t); */
4027   PRINT("sys_time ( %#lx )",ARG1);
4028   PRE_REG_READ1(long, "time", int *, t);
4029   if (ARG1 != 0) {
4030      PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4031   }
4032}
4033
4034POST(sys_time)
4035{
4036   if (ARG1 != 0) {
4037      POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4038   }
4039}
4040
4041PRE(sys_times)
4042{
4043   PRINT("sys_times ( %#lx )", ARG1);
4044   PRE_REG_READ1(long, "times", struct tms *, buf);
4045   if (ARG1 != 0) {
4046      PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4047   }
4048}
4049
4050POST(sys_times)
4051{
4052   if (ARG1 != 0) {
4053      POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4054   }
4055}
4056
4057PRE(sys_umask)
4058{
4059   PRINT("sys_umask ( %ld )", ARG1);
4060   PRE_REG_READ1(long, "umask", int, mask);
4061}
4062
4063PRE(sys_unlink)
4064{
4065   *flags |= SfMayBlock;
4066   PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4067   PRE_REG_READ1(long, "unlink", const char *, pathname);
4068   PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4069}
4070
4071PRE(sys_newuname)
4072{
4073   PRINT("sys_newuname ( %#lx )", ARG1);
4074   PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4075   PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4076}
4077
4078POST(sys_newuname)
4079{
4080   if (ARG1 != 0) {
4081      POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4082   }
4083}
4084
4085PRE(sys_waitpid)
4086{
4087   *flags |= SfMayBlock;
4088   PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4089   PRE_REG_READ3(long, "waitpid",
4090                 vki_pid_t, pid, unsigned int *, status, int, options);
4091
4092   if (ARG2 != (Addr)NULL)
4093      PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4094}
4095
4096POST(sys_waitpid)
4097{
4098   if (ARG2 != (Addr)NULL)
4099      POST_MEM_WRITE( ARG2, sizeof(int) );
4100}
4101
4102PRE(sys_wait4)
4103{
4104   *flags |= SfMayBlock;
4105   PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4106
4107   PRE_REG_READ4(long, "wait4",
4108                 vki_pid_t, pid, unsigned int *, status, int, options,
4109                 struct rusage *, rusage);
4110   if (ARG2 != (Addr)NULL)
4111      PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4112   if (ARG4 != (Addr)NULL)
4113      PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4114}
4115
4116POST(sys_wait4)
4117{
4118   if (ARG2 != (Addr)NULL)
4119      POST_MEM_WRITE( ARG2, sizeof(int) );
4120   if (ARG4 != (Addr)NULL)
4121      POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4122}
4123
4124PRE(sys_writev)
4125{
4126   Int i;
4127   struct vki_iovec * vec;
4128   *flags |= SfMayBlock;
4129   PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4130   PRE_REG_READ3(ssize_t, "writev",
4131                 unsigned long, fd, const struct iovec *, vector,
4132                 unsigned long, count);
4133   if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4134      SET_STATUS_Failure( VKI_EBADF );
4135   } else {
4136      PRE_MEM_READ( "writev(vector)",
4137		     ARG2, ARG3 * sizeof(struct vki_iovec) );
4138      if (ARG2 != 0) {
4139         /* ToDo: don't do any of the following if the vector is invalid */
4140         vec = (struct vki_iovec *)ARG2;
4141         for (i = 0; i < (Int)ARG3; i++)
4142            PRE_MEM_READ( "writev(vector[...])",
4143                           (Addr)vec[i].iov_base, vec[i].iov_len );
4144      }
4145   }
4146}
4147
4148PRE(sys_utimes)
4149{
4150   FUSE_COMPATIBLE_MAY_BLOCK();
4151   PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4152   PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4153   PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4154   if (ARG2 != 0) {
4155      PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4156      PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4157   }
4158}
4159
4160PRE(sys_acct)
4161{
4162   PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4163   PRE_REG_READ1(long, "acct", const char *, filename);
4164   PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4165}
4166
4167PRE(sys_pause)
4168{
4169   *flags |= SfMayBlock;
4170   PRINT("sys_pause ( )");
4171   PRE_REG_READ0(long, "pause");
4172}
4173
4174PRE(sys_sigaltstack)
4175{
4176   PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4177   PRE_REG_READ2(int, "sigaltstack",
4178                 const vki_stack_t *, ss, vki_stack_t *, oss);
4179   if (ARG1 != 0) {
4180      const vki_stack_t *ss = (vki_stack_t *)ARG1;
4181      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4182      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4183      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4184   }
4185   if (ARG2 != 0) {
4186      PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4187   }
4188
4189   SET_STATUS_from_SysRes(
4190      VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4191                              (vki_stack_t*)ARG2)
4192   );
4193}
4194POST(sys_sigaltstack)
4195{
4196   vg_assert(SUCCESS);
4197   if (RES == 0 && ARG2 != 0)
4198      POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4199}
4200
4201#undef PRE
4202#undef POST
4203
4204#endif // defined(VGO_linux) || defined(VGO_darwin)
4205
4206/*--------------------------------------------------------------------*/
4207/*--- end                                                          ---*/
4208/*--------------------------------------------------------------------*/
4209