1
2/*--------------------------------------------------------------------*/
3/*--- Wrappers for generic (non-AIX5!) Unix system calls           ---*/
4/*---                                            syswrap-generic.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2010 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#if defined(VGO_linux) || defined(VGO_darwin)
33
34#include "pub_core_basics.h"
35#include "pub_core_vki.h"
36#include "pub_core_vkiscnums.h"
37#include "pub_core_threadstate.h"
38#include "pub_core_debuginfo.h"     // VG_(di_notify_*)
39#include "pub_core_aspacemgr.h"
40#include "pub_core_transtab.h"      // VG_(discard_translations)
41#include "pub_core_xarray.h"
42#include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
43#include "pub_core_debuglog.h"
44#include "pub_core_errormgr.h"
45#include "pub_core_libcbase.h"
46#include "pub_core_libcassert.h"
47#include "pub_core_libcfile.h"
48#include "pub_core_libcprint.h"
49#include "pub_core_libcproc.h"
50#include "pub_core_libcsignal.h"
51#include "pub_core_machine.h"       // VG_(get_SP)
52#include "pub_core_mallocfree.h"
53#include "pub_core_options.h"
54#include "pub_core_scheduler.h"
55#include "pub_core_signals.h"
56#include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
57#include "pub_core_syscall.h"
58#include "pub_core_syswrap.h"
59#include "pub_core_tooliface.h"
60#include "pub_core_ume.h"
61
62#include "priv_types_n_macros.h"
63#include "priv_syswrap-generic.h"
64
65#include "config.h"
66
67
68/* Returns True iff address range is something the client can
69   plausibly mess with: all of it is either already belongs to the
70   client or is free or a reservation. */
71
72Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
73                                   const Char *syscallname)
74{
75   Bool ret;
76
77   if (size == 0)
78      return True;
79
80   ret = VG_(am_is_valid_for_client_or_free_or_resvn)
81            (start,size,VKI_PROT_NONE);
82
83   if (0)
84      VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
85		  syscallname, start, start+size-1, (Int)ret);
86
87   if (!ret && syscallname != NULL) {
88      VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
89                               "to modify addresses %#lx-%#lx\n",
90                               syscallname, start, start+size-1);
91      if (VG_(clo_verbosity) > 1) {
92         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
93      }
94   }
95
96   return ret;
97}
98
99
100Bool ML_(client_signal_OK)(Int sigNo)
101{
102   /* signal 0 is OK for kill */
103   Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
104
105   //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
106
107   return ret;
108}
109
110
111/* Handy small function to help stop wrappers from segfaulting when
112   presented with bogus client addresses.  Is not used for generating
113   user-visible errors. */
114
115Bool ML_(safe_to_deref) ( void* start, SizeT size )
116{
117   return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
118}
119
120
121/* ---------------------------------------------------------------------
122   Doing mmap, mremap
123   ------------------------------------------------------------------ */
124
125/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
126   munmap, mprotect (and mremap??) work at the page level.  So addresses
127   and lengths must be adjusted for this. */
128
129/* Mash around start and length so that the area exactly covers
130   an integral number of pages.  If we don't do that, memcheck's
131   idea of addressible memory diverges from that of the
132   kernel's, which causes the leak detector to crash. */
133static
134void page_align_addr_and_len( Addr* a, SizeT* len)
135{
136   Addr ra;
137
138   ra = VG_PGROUNDDN(*a);
139   *len = VG_PGROUNDUP(*a + *len) - ra;
140   *a = ra;
141}
142
143static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
144                                UInt flags, Int fd, Off64T offset)
145{
146   Bool d;
147
148   /* 'a' is the return value from a real kernel mmap, hence: */
149   vg_assert(VG_IS_PAGE_ALIGNED(a));
150   /* whereas len is whatever the syscall supplied.  So: */
151   len = VG_PGROUNDUP(len);
152
153   d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
154
155   if (d)
156      VG_(discard_translations)( (Addr64)a, (ULong)len,
157                                 "notify_core_of_mmap" );
158}
159
160static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
161{
162   SizeT fourgig = (1ULL << 32);
163   SizeT guardpage = 10 * fourgig;
164   Bool rr, ww, xx;
165
166   /* 'a' is the return value from a real kernel mmap, hence: */
167   vg_assert(VG_IS_PAGE_ALIGNED(a));
168   /* whereas len is whatever the syscall supplied.  So: */
169   len = VG_PGROUNDUP(len);
170
171   rr = toBool(prot & VKI_PROT_READ);
172   ww = toBool(prot & VKI_PROT_WRITE);
173   xx = toBool(prot & VKI_PROT_EXEC);
174
175#ifdef VGA_amd64
176   if (len >= fourgig + 2 * guardpage) {
177     VG_(printf)("Valgrind: ignoring NaCl's mmap(84G)\n");
178     return;
179   }
180#endif  // VGA_amd64
181   VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
182}
183
184
185/* When a client mmap has been successfully done, this function must
186   be called.  It notifies both aspacem and the tool of the new
187   mapping.
188
189   JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
190   it is called from is POST(sys_io_setup).  In particular,
191   ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
192   client mmap.  But it doesn't call this function; instead it does the
193   relevant notifications itself.  Here, we just pass di_handle=0 to
194   notify_tool_of_mmap as we have no better information.  But really this
195   function should be done away with; problem is I don't understand what
196   POST(sys_io_setup) does or how it works.
197
198   [However, this function is used lots for Darwin, because
199    ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
200 */
201void
202ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
203                                    UInt flags, Int fd, Off64T offset )
204{
205   // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
206   // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
207   // Should it?  --njn
208   notify_core_of_mmap(a, len, prot, flags, fd, offset);
209   notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
210}
211
212void
213ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
214{
215   Bool d;
216
217   page_align_addr_and_len(&a, &len);
218   d = VG_(am_notify_munmap)(a, len);
219   VG_TRACK( die_mem_munmap, a, len );
220   VG_(di_notify_munmap)( a, len );
221   if (d)
222      VG_(discard_translations)( (Addr64)a, (ULong)len,
223                                 "ML_(notify_core_and_tool_of_munmap)" );
224}
225
226void
227ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
228{
229   Bool rr = toBool(prot & VKI_PROT_READ);
230   Bool ww = toBool(prot & VKI_PROT_WRITE);
231   Bool xx = toBool(prot & VKI_PROT_EXEC);
232   Bool d;
233
234   page_align_addr_and_len(&a, &len);
235   d = VG_(am_notify_mprotect)(a, len, prot);
236   VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
237   VG_(di_notify_mprotect)( a, len, prot );
238   if (d)
239      VG_(discard_translations)( (Addr64)a, (ULong)len,
240                                 "ML_(notify_core_and_tool_of_mprotect)" );
241}
242
243
244
245#if HAVE_MREMAP
246/* Expand (or shrink) an existing mapping, potentially moving it at
247   the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
248*/
249static
250SysRes do_mremap( Addr old_addr, SizeT old_len,
251                  Addr new_addr, SizeT new_len,
252                  UWord flags, ThreadId tid )
253{
254#  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
255
256   Bool      ok, d;
257   NSegment const* old_seg;
258   Addr      advised;
259   Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
260   Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
261
262   if (0)
263      VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
264                  old_addr,old_len,new_addr,new_len,
265                  flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
266                  flags & VKI_MREMAP_FIXED ? "FIXED" : "");
267   if (0)
268      VG_(am_show_nsegments)(0, "do_remap: before");
269
270   if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
271      goto eINVAL;
272
273   if (!VG_IS_PAGE_ALIGNED(old_addr))
274      goto eINVAL;
275
276   old_len = VG_PGROUNDUP(old_len);
277   new_len = VG_PGROUNDUP(new_len);
278
279   if (new_len == 0)
280      goto eINVAL;
281
282   /* kernel doesn't reject this, but we do. */
283   if (old_len == 0)
284      goto eINVAL;
285
286   /* reject wraparounds */
287   if (old_addr + old_len < old_addr
288       || new_addr + new_len < new_len)
289      goto eINVAL;
290
291   /* kernel rejects all fixed, no-move requests (which are
292      meaningless). */
293   if (f_fixed == True && f_maymove == False)
294      goto eINVAL;
295
296   /* Stay away from non-client areas. */
297   if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
298      goto eINVAL;
299
300   /* In all remaining cases, if the old range does not fall within a
301      single segment, fail. */
302   old_seg = VG_(am_find_nsegment)( old_addr );
303   if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
304      goto eINVAL;
305   if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
306      goto eINVAL;
307
308   vg_assert(old_len > 0);
309   vg_assert(new_len > 0);
310   vg_assert(VG_IS_PAGE_ALIGNED(old_len));
311   vg_assert(VG_IS_PAGE_ALIGNED(new_len));
312   vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
313
314   /* There are 3 remaining cases:
315
316      * maymove == False
317
318        new space has to be at old address, so:
319            - shrink    -> unmap end
320            - same size -> do nothing
321            - grow      -> if can grow in-place, do so, else fail
322
323      * maymove == True, fixed == False
324
325        new space can be anywhere, so:
326            - shrink    -> unmap end
327            - same size -> do nothing
328            - grow      -> if can grow in-place, do so, else
329                           move to anywhere large enough, else fail
330
331      * maymove == True, fixed == True
332
333        new space must be at new address, so:
334
335            - if new address is not page aligned, fail
336            - if new address range overlaps old one, fail
337            - if new address range cannot be allocated, fail
338            - else move to new address range with new size
339            - else fail
340   */
341
342   if (f_maymove == False) {
343      /* new space has to be at old address */
344      if (new_len < old_len)
345         goto shrink_in_place;
346      if (new_len > old_len)
347         goto grow_in_place_or_fail;
348      goto same_in_place;
349   }
350
351   if (f_maymove == True && f_fixed == False) {
352      /* new space can be anywhere */
353      if (new_len < old_len)
354         goto shrink_in_place;
355      if (new_len > old_len)
356         goto grow_in_place_or_move_anywhere_or_fail;
357      goto same_in_place;
358   }
359
360   if (f_maymove == True && f_fixed == True) {
361      /* new space can only be at the new address */
362      if (!VG_IS_PAGE_ALIGNED(new_addr))
363         goto eINVAL;
364      if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
365         /* no overlap */
366      } else {
367         goto eINVAL;
368      }
369      if (new_addr == 0)
370         goto eINVAL;
371         /* VG_(am_get_advisory_client_simple) interprets zero to mean
372            non-fixed, which is not what we want */
373      advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
374      if (!ok || advised != new_addr)
375         goto eNOMEM;
376      ok = VG_(am_relocate_nooverlap_client)
377              ( &d, old_addr, old_len, new_addr, new_len );
378      if (ok) {
379         VG_TRACK( copy_mem_remap, old_addr, new_addr,
380                                   MIN_SIZET(old_len,new_len) );
381         if (new_len > old_len)
382            VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
383                      old_seg->hasR, old_seg->hasW, old_seg->hasX,
384                      0/*di_handle*/ );
385         VG_TRACK(die_mem_munmap, old_addr, old_len);
386         if (d) {
387            VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
388            VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
389         }
390         return VG_(mk_SysRes_Success)( new_addr );
391      }
392      goto eNOMEM;
393   }
394
395   /* end of the 3 cases */
396   /*NOTREACHED*/ vg_assert(0);
397
398  grow_in_place_or_move_anywhere_or_fail:
399   {
400   /* try growing it in-place */
401   Addr   needA = old_addr + old_len;
402   SSizeT needL = new_len - old_len;
403
404   vg_assert(needL > 0);
405   if (needA == 0)
406      goto eINVAL;
407      /* VG_(am_get_advisory_client_simple) interprets zero to mean
408         non-fixed, which is not what we want */
409   advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
410   if (ok) {
411      /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
412         this-or-nothing) is too lenient, and may allow us to trash
413         the next segment along.  So make very sure that the proposed
414         new area really is free.  This is perhaps overly
415         conservative, but it fixes #129866. */
416      NSegment const* segLo = VG_(am_find_nsegment)( needA );
417      NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
418      if (segLo == NULL || segHi == NULL
419          || segLo != segHi || segLo->kind != SkFree)
420         ok = False;
421   }
422   if (ok && advised == needA) {
423      ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
424      if (ok) {
425         VG_TRACK( new_mem_mmap, needA, needL,
426                                 old_seg->hasR,
427                                 old_seg->hasW, old_seg->hasX,
428                                 0/*di_handle*/ );
429         if (d)
430            VG_(discard_translations)( needA, needL, "do_remap(3)" );
431         return VG_(mk_SysRes_Success)( old_addr );
432      }
433   }
434
435   /* that failed.  Look elsewhere. */
436   advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
437   if (ok) {
438      Bool oldR = old_seg->hasR;
439      Bool oldW = old_seg->hasW;
440      Bool oldX = old_seg->hasX;
441      /* assert new area does not overlap old */
442      vg_assert(advised+new_len-1 < old_addr
443                || advised > old_addr+old_len-1);
444      ok = VG_(am_relocate_nooverlap_client)
445              ( &d, old_addr, old_len, advised, new_len );
446      if (ok) {
447         VG_TRACK( copy_mem_remap, old_addr, advised,
448                                   MIN_SIZET(old_len,new_len) );
449         if (new_len > old_len)
450            VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
451                      oldR, oldW, oldX, 0/*di_handle*/ );
452         VG_TRACK(die_mem_munmap, old_addr, old_len);
453         if (d) {
454            VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
455            VG_(discard_translations)( advised, new_len, "do_remap(5)" );
456         }
457         return VG_(mk_SysRes_Success)( advised );
458      }
459   }
460   goto eNOMEM;
461   }
462   /*NOTREACHED*/ vg_assert(0);
463
464  grow_in_place_or_fail:
465   {
466   Addr  needA = old_addr + old_len;
467   SizeT needL = new_len - old_len;
468   if (needA == 0)
469      goto eINVAL;
470      /* VG_(am_get_advisory_client_simple) interprets zero to mean
471         non-fixed, which is not what we want */
472   advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
473   if (ok) {
474      /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
475         this-or-nothing) is too lenient, and may allow us to trash
476         the next segment along.  So make very sure that the proposed
477         new area really is free. */
478      NSegment const* segLo = VG_(am_find_nsegment)( needA );
479      NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
480      if (segLo == NULL || segHi == NULL
481          || segLo != segHi || segLo->kind != SkFree)
482         ok = False;
483   }
484   if (!ok || advised != needA)
485      goto eNOMEM;
486   ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
487   if (!ok)
488      goto eNOMEM;
489   VG_TRACK( new_mem_mmap, needA, needL,
490                           old_seg->hasR, old_seg->hasW, old_seg->hasX,
491                           0/*di_handle*/ );
492   if (d)
493      VG_(discard_translations)( needA, needL, "do_remap(6)" );
494   return VG_(mk_SysRes_Success)( old_addr );
495   }
496   /*NOTREACHED*/ vg_assert(0);
497
498  shrink_in_place:
499   {
500   SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
501   if (sr_isError(sres))
502      return sres;
503   VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
504   if (d)
505      VG_(discard_translations)( old_addr+new_len, old_len-new_len,
506                                 "do_remap(7)" );
507   return VG_(mk_SysRes_Success)( old_addr );
508   }
509   /*NOTREACHED*/ vg_assert(0);
510
511  same_in_place:
512   return VG_(mk_SysRes_Success)( old_addr );
513   /*NOTREACHED*/ vg_assert(0);
514
515  eINVAL:
516   return VG_(mk_SysRes_Error)( VKI_EINVAL );
517  eNOMEM:
518   return VG_(mk_SysRes_Error)( VKI_ENOMEM );
519
520#  undef MIN_SIZET
521}
522#endif /* HAVE_MREMAP */
523
524
525/* ---------------------------------------------------------------------
526   File-descriptor tracking
527   ------------------------------------------------------------------ */
528
529/* One of these is allocated for each open file descriptor.  */
530typedef struct OpenFd
531{
532   Int fd;                        /* The file descriptor */
533   Char *pathname;                /* NULL if not a regular file or unknown */
534   ExeContext *where;             /* NULL if inherited from parent */
535   struct OpenFd *next, *prev;
536} OpenFd;
537
538/* List of allocated file descriptors. */
539static OpenFd *allocated_fds = NULL;
540
541/* Count of open file descriptors. */
542static Int fd_count = 0;
543
544
545/* Note the fact that a file descriptor was just closed. */
546static
547void record_fd_close(Int fd)
548{
549   OpenFd *i = allocated_fds;
550
551   if (fd >= VG_(fd_hard_limit))
552      return;			/* Valgrind internal */
553
554   while(i) {
555      if(i->fd == fd) {
556         if(i->prev)
557            i->prev->next = i->next;
558         else
559            allocated_fds = i->next;
560         if(i->next)
561            i->next->prev = i->prev;
562         if(i->pathname)
563            VG_(arena_free) (VG_AR_CORE, i->pathname);
564         VG_(arena_free) (VG_AR_CORE, i);
565         fd_count--;
566         break;
567      }
568      i = i->next;
569   }
570}
571
572/* Note the fact that a file descriptor was just opened.  If the
573   tid is -1, this indicates an inherited fd.  If the pathname is NULL,
574   this either indicates a non-standard file (i.e. a pipe or socket or
575   some such thing) or that we don't know the filename.  If the fd is
576   already open, then we're probably doing a dup2() to an existing fd,
577   so just overwrite the existing one. */
578void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
579{
580   OpenFd *i;
581
582   if (fd >= VG_(fd_hard_limit))
583      return;			/* Valgrind internal */
584
585   /* Check to see if this fd is already open. */
586   i = allocated_fds;
587   while (i) {
588      if (i->fd == fd) {
589         if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
590         break;
591      }
592      i = i->next;
593   }
594
595   /* Not already one: allocate an OpenFd */
596   if (i == NULL) {
597      i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
598
599      i->prev = NULL;
600      i->next = allocated_fds;
601      if(allocated_fds) allocated_fds->prev = i;
602      allocated_fds = i;
603      fd_count++;
604   }
605
606   i->fd = fd;
607   i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
608   i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
609}
610
611// Record opening of an fd, and find its name.
612void ML_(record_fd_open_named)(ThreadId tid, Int fd)
613{
614   static HChar buf[VKI_PATH_MAX];
615   Char* name;
616   if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
617      name = buf;
618   else
619      name = NULL;
620
621   ML_(record_fd_open_with_given_name)(tid, fd, name);
622}
623
624// Record opening of a nameless fd.
625void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
626{
627   ML_(record_fd_open_with_given_name)(tid, fd, NULL);
628}
629
630static
631Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
632{
633   if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
634      VG_(sprintf)(name, "<unknown>");
635   } else {
636      VG_(sprintf)(name, "%s", sa->sun_path);
637   }
638
639   return name;
640}
641
642static
643Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
644{
645   if (sa == NULL || len == 0) {
646      VG_(sprintf)(name, "<unknown>");
647   } else {
648      UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
649      if (addr == 0) {
650         VG_(sprintf)(name, "<unbound>");
651      } else {
652         VG_(sprintf)(name, "%u.%u.%u.%u:%u",
653                      (addr>>24) & 0xFF, (addr>>16) & 0xFF,
654                      (addr>>8) & 0xFF, addr & 0xFF,
655                      VG_(ntohs)(sa->sin_port));
656      }
657   }
658
659   return name;
660}
661
662/*
663 * Try get some details about a socket.
664 */
665static void
666getsockdetails(Int fd)
667{
668   union u {
669      struct vki_sockaddr a;
670      struct vki_sockaddr_in in;
671      struct vki_sockaddr_un un;
672   } laddr;
673   UInt llen;
674
675   llen = sizeof(laddr);
676   VG_(memset)(&laddr, 0, llen);
677
678   if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
679      switch(laddr.a.sa_family) {
680      case VKI_AF_INET: {
681         static char lname[32];
682         static char pname[32];
683         struct vki_sockaddr_in paddr;
684         UInt plen = sizeof(struct vki_sockaddr_in);
685
686         if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
687            VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
688                         inet2name(&(laddr.in), llen, lname),
689                         inet2name(&paddr, plen, pname));
690         } else {
691            VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
692                         fd, inet2name(&(laddr.in), llen, lname));
693         }
694         return;
695         }
696      case VKI_AF_UNIX: {
697         static char lname[256];
698         VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
699                      unix2name(&(laddr.un), llen, lname));
700         return;
701         }
702      default:
703         VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
704                      laddr.a.sa_family, fd);
705         return;
706      }
707   }
708
709   VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
710}
711
712
713/* Dump out a summary, and a more detailed list, of open file descriptors. */
714void VG_(show_open_fds) (void)
715{
716   OpenFd *i = allocated_fds;
717
718   VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
719
720   while (i) {
721      if (i->pathname) {
722         VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
723                      i->pathname);
724      } else {
725         Int val;
726         UInt len = sizeof(val);
727
728         if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
729             == -1) {
730            VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
731         } else {
732            getsockdetails(i->fd);
733         }
734      }
735
736      if(i->where) {
737         VG_(pp_ExeContext)(i->where);
738         VG_(message)(Vg_UserMsg, "\n");
739      } else {
740         VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
741         VG_(message)(Vg_UserMsg, "\n");
742      }
743
744      i = i->next;
745   }
746
747   VG_(message)(Vg_UserMsg, "\n");
748}
749
750/* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
751   have /proc support compiled in, or a non-Linux kernel), then we need to
752   find out what file descriptors we inherited from our parent process the
753   hard way - by checking each fd in turn. */
754static
755void init_preopened_fds_without_proc_self_fd(void)
756{
757   struct vki_rlimit lim;
758   UInt count;
759   Int i;
760
761   if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
762      /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
763         an arbitrarily high number.  1024 happens to be the limit in
764         the 2.4 Linux kernels. */
765      count = 1024;
766   } else {
767      count = lim.rlim_cur;
768   }
769
770   for (i = 0; i < count; i++)
771      if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
772         ML_(record_fd_open_named)(-1, i);
773}
774
775/* Initialize the list of open file descriptors with the file descriptors
776   we inherited from out parent process. */
777
778void VG_(init_preopened_fds)(void)
779{
780// Nb: AIX5 is handled in syswrap-aix5.c.
781// DDD: should probably use HAVE_PROC here or similar, instead.
782#if defined(VGO_linux)
783   Int ret;
784   struct vki_dirent d;
785   SysRes f;
786
787   f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
788   if (sr_isError(f)) {
789      init_preopened_fds_without_proc_self_fd();
790      return;
791   }
792
793   while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
794      if (ret == -1)
795         goto out;
796
797      if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
798         Char* s;
799         Int fno = VG_(strtoll10)(d.d_name, &s);
800         if (*s == '\0') {
801            if (fno != sr_Res(f))
802               if (VG_(clo_track_fds))
803                  ML_(record_fd_open_named)(-1, fno);
804         } else {
805            VG_(message)(Vg_DebugMsg,
806               "Warning: invalid file name in /proc/self/fd: %s\n",
807               d.d_name);
808         }
809      }
810
811      VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
812   }
813
814  out:
815   VG_(close)(sr_Res(f));
816
817#elif defined(VGO_darwin)
818   init_preopened_fds_without_proc_self_fd();
819
820#else
821#  error Unknown OS
822#endif
823}
824
825static
826Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
827{
828   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
829   Char *result = VG_(arena_malloc) ( aid, cc, len );
830   VG_(strcpy) ( result, s1 );
831   VG_(strcat) ( result, s2 );
832   return result;
833}
834
835static
836void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
837                            Char *msg, Addr base, SizeT size )
838{
839   Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
840                              "socketcall.sendmsg", msg, VG_AR_CORE );
841   PRE_MEM_READ( outmsg, base, size );
842   VG_(arena_free) ( VG_AR_CORE, outmsg );
843}
844
845static
846void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
847                             Char *msg, Addr base, SizeT size )
848{
849   Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
850                              "socketcall.recvmsg", msg, VG_AR_CORE );
851   if ( read )
852      PRE_MEM_READ( outmsg, base, size );
853   else
854      PRE_MEM_WRITE( outmsg, base, size );
855   VG_(arena_free) ( VG_AR_CORE, outmsg );
856}
857
858static
859void post_mem_write_recvmsg ( ThreadId tid, Bool read,
860                              Char *fieldName, Addr base, SizeT size )
861{
862   if ( !read )
863      POST_MEM_WRITE( base, size );
864}
865
866static
867void msghdr_foreachfield (
868        ThreadId tid,
869        struct vki_msghdr *msg,
870        void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
871     )
872{
873   if ( !msg )
874      return;
875
876   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
877   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
878   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
879   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
880   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
881   foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
882   foreach_func ( tid, False, "(msg)", (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
883
884   if ( msg->msg_name )
885      foreach_func ( tid, False,
886                     "(msg.msg_name)",
887                     (Addr)msg->msg_name, msg->msg_namelen );
888
889   if ( msg->msg_iov ) {
890      struct vki_iovec *iov = msg->msg_iov;
891      UInt i;
892
893      foreach_func ( tid, True,
894                     "(msg.msg_iov)",
895                     (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
896
897      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
898         foreach_func ( tid, False,
899                        "(msg.msg_iov[i])",
900                        (Addr)iov->iov_base, iov->iov_len );
901   }
902
903   if ( msg->msg_control )
904      foreach_func ( tid, False,
905                     "(msg.msg_control)",
906                     (Addr)msg->msg_control, msg->msg_controllen );
907}
908
909static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
910{
911   struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
912
913   while (cm) {
914      if (cm->cmsg_level == VKI_SOL_SOCKET &&
915          cm->cmsg_type == VKI_SCM_RIGHTS ) {
916         Int *fds = (Int *) VKI_CMSG_DATA(cm);
917         Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
918                         / sizeof(int);
919         Int i;
920
921         for (i = 0; i < fdc; i++)
922            if(VG_(clo_track_fds))
923               // XXX: must we check the range on these fds with
924               //      ML_(fd_allowed)()?
925               ML_(record_fd_open_named)(tid, fds[i]);
926      }
927
928      cm = VKI_CMSG_NXTHDR(msg, cm);
929   }
930}
931
932/* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
933static
934void pre_mem_read_sockaddr ( ThreadId tid,
935                             Char *description,
936                             struct vki_sockaddr *sa, UInt salen )
937{
938   Char *outmsg;
939   struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
940   struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
941   struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
942
943   /* NULL/zero-length sockaddrs are legal */
944   if ( sa == NULL || salen == 0 ) return;
945
946   outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
947                                VG_(strlen)( description ) + 30 );
948
949   VG_(sprintf) ( outmsg, description, "sa_family" );
950   PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
951
952   switch (sa->sa_family) {
953
954      case VKI_AF_UNIX:
955         VG_(sprintf) ( outmsg, description, "sun_path" );
956         PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
957         // GrP fixme max of sun_len-2? what about nul char?
958         break;
959
960      case VKI_AF_INET:
961         VG_(sprintf) ( outmsg, description, "sin_port" );
962         PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
963         VG_(sprintf) ( outmsg, description, "sin_addr" );
964         PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
965         break;
966
967      case VKI_AF_INET6:
968         VG_(sprintf) ( outmsg, description, "sin6_port" );
969         PRE_MEM_READ( outmsg,
970            (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
971         VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
972         PRE_MEM_READ( outmsg,
973            (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
974         VG_(sprintf) ( outmsg, description, "sin6_addr" );
975         PRE_MEM_READ( outmsg,
976            (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
977         VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
978         PRE_MEM_READ( outmsg,
979            (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
980         break;
981
982      default:
983         VG_(sprintf) ( outmsg, description, "" );
984         PRE_MEM_READ( outmsg, (Addr) sa, salen );
985         break;
986   }
987
988   VG_(arena_free) ( VG_AR_CORE, outmsg );
989}
990
991/* Dereference a pointer to a UInt. */
992static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
993{
994   UInt* a_p = (UInt*)a;
995   PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
996   if (a_p == NULL)
997      return 0;
998   else
999      return *a_p;
1000}
1001
1002void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1003                                  Char* buf_s, Char* buflen_s )
1004{
1005   if (VG_(tdict).track_pre_mem_write) {
1006      UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1007      if (buflen_in > 0) {
1008         VG_(tdict).track_pre_mem_write(
1009            Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1010      }
1011   }
1012}
1013
1014void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1015                                   Addr buf_p, Addr buflen_p, Char* s )
1016{
1017   if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1018      UInt buflen_out = deref_UInt( tid, buflen_p, s);
1019      if (buflen_out > 0 && buf_p != (Addr)NULL) {
1020         VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1021      }
1022   }
1023}
1024
1025/* ---------------------------------------------------------------------
1026   Data seg end, for brk()
1027   ------------------------------------------------------------------ */
1028
1029/*   +--------+------------+
1030     | anon   |    resvn   |
1031     +--------+------------+
1032
1033     ^     ^  ^
1034     |     |  boundary is page aligned
1035     |     VG_(brk_limit) -- no alignment constraint
1036     VG_(brk_base) -- page aligned -- does not move
1037
1038     Both the anon part and the reservation part are always at least
1039     one page.
1040*/
1041
1042/* Set the new data segment end to NEWBRK.  If this succeeds, return
1043   NEWBRK, else return the current data segment end. */
1044
1045static Addr do_brk ( Addr newbrk )
1046{
1047   NSegment const* aseg;
1048   NSegment const* rseg;
1049   Addr newbrkP;
1050   SizeT delta;
1051   Bool ok;
1052   Bool debug = False;
1053
1054   if (debug)
1055      VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1056		  VG_(brk_base), VG_(brk_limit), newbrk);
1057
1058#  if 0
1059   if (0) show_segments("in_brk");
1060#  endif
1061
1062   if (newbrk < VG_(brk_base))
1063      /* Clearly impossible. */
1064      goto bad;
1065
1066   if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1067      /* shrinking the data segment.  Be lazy and don't munmap the
1068         excess area. */
1069      NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1070      if (seg && seg->hasT)
1071         VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1072                                    "do_brk(shrink)" );
1073      /* Since we're being lazy and not unmapping pages, we have to
1074         zero out the area, so that if the area later comes back into
1075         circulation, it will be filled with zeroes, as if it really
1076         had been unmapped and later remapped.  Be a bit paranoid and
1077         try hard to ensure we're not going to segfault by doing the
1078         write - check both ends of the range are in the same segment
1079         and that segment is writable. */
1080      if (seg) {
1081         /* pre: newbrk < VG_(brk_limit)
1082              => newbrk <= VG_(brk_limit)-1 */
1083         NSegment const * seg2;
1084         vg_assert(newbrk < VG_(brk_limit));
1085         seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1086         if (seg2 && seg == seg2 && seg->hasW)
1087            VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1088      }
1089
1090      VG_(brk_limit) = newbrk;
1091      return newbrk;
1092   }
1093
1094   /* otherwise we're expanding the brk segment. */
1095   if (VG_(brk_limit) > VG_(brk_base))
1096      aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1097   else
1098      aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1099   rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
1100
1101   /* These should be assured by setup_client_dataseg in m_main. */
1102   vg_assert(aseg);
1103   vg_assert(rseg);
1104   vg_assert(aseg->kind == SkAnonC);
1105   vg_assert(rseg->kind == SkResvn);
1106   vg_assert(aseg->end+1 == rseg->start);
1107
1108   vg_assert(newbrk >= VG_(brk_base));
1109   if (newbrk <= rseg->start) {
1110      /* still fits within the anon segment. */
1111      VG_(brk_limit) = newbrk;
1112      return newbrk;
1113   }
1114
1115   if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1116      /* request is too large -- the resvn would fall below 1 page,
1117         which isn't allowed. */
1118      goto bad;
1119   }
1120
1121   newbrkP = VG_PGROUNDUP(newbrk);
1122   vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1123   delta = newbrkP - rseg->start;
1124   vg_assert(delta > 0);
1125   vg_assert(VG_IS_PAGE_ALIGNED(delta));
1126
1127   ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
1128   if (!ok) goto bad;
1129
1130   VG_(brk_limit) = newbrk;
1131   return newbrk;
1132
1133  bad:
1134   return VG_(brk_limit);
1135}
1136
1137
1138/* ---------------------------------------------------------------------
1139   Vet file descriptors for sanity
1140   ------------------------------------------------------------------ */
1141/*
1142> - what does the "Bool soft" parameter mean?
1143
1144(Tom Hughes, 3 Oct 05):
1145
1146Whether or not to consider a file descriptor invalid if it is above
1147the current soft limit.
1148
1149Basically if we are testing whether a newly created file descriptor is
1150valid (in a post handler) then we set soft to true, and if we are
1151testing whether a file descriptor that is about to be used (in a pre
1152handler) is valid [viz, an already-existing fd] then we set it to false.
1153
1154The point is that if the (virtual) soft limit is lowered then any
1155existing descriptors can still be read/written/closed etc (so long as
1156they are below the valgrind reserved descriptors) but no new
1157descriptors can be created above the new soft limit.
1158
1159(jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1160*/
1161
1162/* Return true if we're allowed to use or create this fd */
1163Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
1164{
1165   Bool allowed = True;
1166
1167   /* hard limits always apply */
1168   if (fd < 0 || fd >= VG_(fd_hard_limit))
1169      allowed = False;
1170
1171   /* hijacking the output fds is never allowed */
1172   if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1173      allowed = False;
1174
1175   /* if creating a new fd (rather than using an existing one), the
1176      soft limit must also be observed */
1177   if (isNewFd && fd >= VG_(fd_soft_limit))
1178      allowed = False;
1179
1180   /* this looks like it ought to be included, but causes problems: */
1181   /*
1182   if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1183      allowed = False;
1184   */
1185   /* The difficulty is as follows: consider a program P which expects
1186      to be able to mess with (redirect) its own stderr (fd 2).
1187      Usually to deal with P we would issue command line flags to send
1188      logging somewhere other than stderr, so as not to disrupt P.
1189      The problem is that -d unilaterally hijacks stderr with no
1190      consultation with P.  And so, if this check is enabled, P will
1191      work OK normally but fail if -d is issued.
1192
1193      Basically -d is a hack and you take your chances when using it.
1194      It's very useful for low level debugging -- particularly at
1195      startup -- and having its presence change the behaviour of the
1196      client is exactly what we don't want.  */
1197
1198   /* croak? */
1199   if ((!allowed) && VG_(showing_core_errors)() ) {
1200      VG_(message)(Vg_UserMsg,
1201         "Warning: invalid file descriptor %d in syscall %s()\n",
1202         fd, syscallname);
1203      if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1204	 VG_(message)(Vg_UserMsg,
1205            "   Use --log-fd=<number> to select an alternative log fd.\n");
1206      if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1207	 VG_(message)(Vg_UserMsg,
1208            "   Use --xml-fd=<number> to select an alternative XML "
1209            "output fd.\n");
1210      // DDD: consider always printing this stack trace, it's useful.
1211      // Also consider also making this a proper core error, ie.
1212      // suppressible and all that.
1213      if (VG_(clo_verbosity) > 1) {
1214         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1215      }
1216   }
1217
1218   return allowed;
1219}
1220
1221
1222/* ---------------------------------------------------------------------
1223   Deal with a bunch of socket-related syscalls
1224   ------------------------------------------------------------------ */
1225
1226/* ------ */
1227
1228void
1229ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1230                                  UWord arg0, UWord arg1,
1231                                  UWord arg2, UWord arg3 )
1232{
1233   /* int socketpair(int d, int type, int protocol, int sv[2]); */
1234   PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1235                  arg3, 2*sizeof(int) );
1236}
1237
1238SysRes
1239ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1240                                   SysRes res,
1241                                   UWord arg0, UWord arg1,
1242                                   UWord arg2, UWord arg3 )
1243{
1244   SysRes r = res;
1245   Int fd1 = ((Int*)arg3)[0];
1246   Int fd2 = ((Int*)arg3)[1];
1247   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1248   POST_MEM_WRITE( arg3, 2*sizeof(int) );
1249   if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1250       !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1251      VG_(close)(fd1);
1252      VG_(close)(fd2);
1253      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1254   } else {
1255      POST_MEM_WRITE( arg3, 2*sizeof(int) );
1256      if (VG_(clo_track_fds)) {
1257         ML_(record_fd_open_nameless)(tid, fd1);
1258         ML_(record_fd_open_nameless)(tid, fd2);
1259      }
1260   }
1261   return r;
1262}
1263
1264/* ------ */
1265
1266SysRes
1267ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1268{
1269   SysRes r = res;
1270   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1271   if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1272      VG_(close)(sr_Res(res));
1273      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1274   } else {
1275      if (VG_(clo_track_fds))
1276         ML_(record_fd_open_nameless)(tid, sr_Res(res));
1277   }
1278   return r;
1279}
1280
1281/* ------ */
1282
1283void
1284ML_(generic_PRE_sys_bind) ( ThreadId tid,
1285                            UWord arg0, UWord arg1, UWord arg2 )
1286{
1287   /* int bind(int sockfd, struct sockaddr *my_addr,
1288               int addrlen); */
1289   pre_mem_read_sockaddr(
1290      tid, "socketcall.bind(my_addr.%s)",
1291      (struct vki_sockaddr *) arg1, arg2
1292   );
1293}
1294
1295/* ------ */
1296
1297void
1298ML_(generic_PRE_sys_accept) ( ThreadId tid,
1299                              UWord arg0, UWord arg1, UWord arg2 )
1300{
1301   /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1302   Addr addr_p     = arg1;
1303   Addr addrlen_p  = arg2;
1304   if (addr_p != (Addr)NULL)
1305      ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1306                                   "socketcall.accept(addr)",
1307                                   "socketcall.accept(addrlen_in)" );
1308}
1309
1310SysRes
1311ML_(generic_POST_sys_accept) ( ThreadId tid,
1312                               SysRes res,
1313                               UWord arg0, UWord arg1, UWord arg2 )
1314{
1315   SysRes r = res;
1316   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1317   if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1318      VG_(close)(sr_Res(res));
1319      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1320   } else {
1321      Addr addr_p     = arg1;
1322      Addr addrlen_p  = arg2;
1323      if (addr_p != (Addr)NULL)
1324         ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1325                                       "socketcall.accept(addrlen_out)" );
1326      if (VG_(clo_track_fds))
1327          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1328   }
1329   return r;
1330}
1331
1332/* ------ */
1333
1334void
1335ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1336                              UWord arg0, UWord arg1, UWord arg2,
1337                              UWord arg3, UWord arg4, UWord arg5 )
1338{
1339   /* int sendto(int s, const void *msg, int len,
1340                 unsigned int flags,
1341                 const struct sockaddr *to, int tolen); */
1342   PRE_MEM_READ( "socketcall.sendto(msg)",
1343                 arg1, /* msg */
1344                 arg2  /* len */ );
1345   pre_mem_read_sockaddr(
1346      tid, "socketcall.sendto(to.%s)",
1347      (struct vki_sockaddr *) arg4, arg5
1348   );
1349}
1350
1351/* ------ */
1352
1353void
1354ML_(generic_PRE_sys_send) ( ThreadId tid,
1355                            UWord arg0, UWord arg1, UWord arg2 )
1356{
1357   /* int send(int s, const void *msg, size_t len, int flags); */
1358   PRE_MEM_READ( "socketcall.send(msg)",
1359                  arg1, /* msg */
1360                  arg2  /* len */ );
1361
1362}
1363
1364/* ------ */
1365
1366void
1367ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1368                                UWord arg0, UWord arg1, UWord arg2,
1369                                UWord arg3, UWord arg4, UWord arg5 )
1370{
1371   /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1372                   struct sockaddr *from, int *fromlen); */
1373   Addr buf_p      = arg1;
1374   Int  len        = arg2;
1375   Addr from_p     = arg4;
1376   Addr fromlen_p  = arg5;
1377   PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1378   if (from_p != (Addr)NULL)
1379      ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1380                                   "socketcall.recvfrom(from)",
1381                                   "socketcall.recvfrom(fromlen_in)" );
1382}
1383
1384void
1385ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1386                                 SysRes res,
1387                                 UWord arg0, UWord arg1, UWord arg2,
1388                                 UWord arg3, UWord arg4, UWord arg5 )
1389{
1390   Addr buf_p      = arg1;
1391   Int  len        = arg2;
1392   Addr from_p     = arg4;
1393   Addr fromlen_p  = arg5;
1394
1395   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1396   if (from_p != (Addr)NULL)
1397      ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1398                                    "socketcall.recvfrom(fromlen_out)" );
1399   POST_MEM_WRITE( buf_p, len );
1400}
1401
1402/* ------ */
1403
1404void
1405ML_(generic_PRE_sys_recv) ( ThreadId tid,
1406                            UWord arg0, UWord arg1, UWord arg2 )
1407{
1408   /* int recv(int s, void *buf, int len, unsigned int flags); */
1409   /* man 2 recv says:
1410      The  recv call is normally used only on a connected socket
1411      (see connect(2)) and is identical to recvfrom with a  NULL
1412      from parameter.
1413   */
1414   PRE_MEM_WRITE( "socketcall.recv(buf)",
1415                  arg1, /* buf */
1416                  arg2  /* len */ );
1417}
1418
1419void
1420ML_(generic_POST_sys_recv) ( ThreadId tid,
1421                             UWord res,
1422                             UWord arg0, UWord arg1, UWord arg2 )
1423{
1424   if (res >= 0 && arg1 != 0) {
1425      POST_MEM_WRITE( arg1, /* buf */
1426                      arg2  /* len */ );
1427   }
1428}
1429
1430/* ------ */
1431
1432void
1433ML_(generic_PRE_sys_connect) ( ThreadId tid,
1434                               UWord arg0, UWord arg1, UWord arg2 )
1435{
1436   /* int connect(int sockfd,
1437                  struct sockaddr *serv_addr, int addrlen ); */
1438   pre_mem_read_sockaddr( tid,
1439                          "socketcall.connect(serv_addr.%s)",
1440                          (struct vki_sockaddr *) arg1, arg2);
1441}
1442
1443/* ------ */
1444
1445void
1446ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1447                                  UWord arg0, UWord arg1, UWord arg2,
1448                                  UWord arg3, UWord arg4 )
1449{
1450   /* int setsockopt(int s, int level, int optname,
1451                     const void *optval, int optlen); */
1452   PRE_MEM_READ( "socketcall.setsockopt(optval)",
1453                 arg3, /* optval */
1454                 arg4  /* optlen */ );
1455}
1456
1457/* ------ */
1458
1459void
1460ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1461                                   UWord arg0, UWord arg1, UWord arg2 )
1462{
1463   /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1464   Addr name_p     = arg1;
1465   Addr namelen_p  = arg2;
1466   /* Nb: name_p cannot be NULL */
1467   ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1468                                "socketcall.getsockname(name)",
1469                                "socketcall.getsockname(namelen_in)" );
1470}
1471
1472void
1473ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1474                                    SysRes res,
1475                                    UWord arg0, UWord arg1, UWord arg2 )
1476{
1477   Addr name_p     = arg1;
1478   Addr namelen_p  = arg2;
1479   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1480   ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1481                                 "socketcall.getsockname(namelen_out)" );
1482}
1483
1484/* ------ */
1485
1486void
1487ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1488                                   UWord arg0, UWord arg1, UWord arg2 )
1489{
1490   /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1491   Addr name_p     = arg1;
1492   Addr namelen_p  = arg2;
1493   /* Nb: name_p cannot be NULL */
1494   ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1495                                "socketcall.getpeername(name)",
1496                                "socketcall.getpeername(namelen_in)" );
1497}
1498
1499void
1500ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1501                                    SysRes res,
1502                                    UWord arg0, UWord arg1, UWord arg2 )
1503{
1504   Addr name_p     = arg1;
1505   Addr namelen_p  = arg2;
1506   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1507   ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1508                                 "socketcall.getpeername(namelen_out)" );
1509}
1510
1511/* ------ */
1512
1513void
1514ML_(generic_PRE_sys_sendmsg) ( ThreadId tid,
1515                               UWord arg0, UWord arg1 )
1516{
1517   /* int sendmsg(int s, const struct msghdr *msg, int flags); */
1518   struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1519   msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
1520}
1521
1522/* ------ */
1523
1524void
1525ML_(generic_PRE_sys_recvmsg) ( ThreadId tid,
1526                               UWord arg0, UWord arg1 )
1527{
1528   /* int recvmsg(int s, struct msghdr *msg, int flags); */
1529   struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1530   msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
1531}
1532
1533void
1534ML_(generic_POST_sys_recvmsg) ( ThreadId tid,
1535                                UWord arg0, UWord arg1 )
1536{
1537   struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1538   msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
1539   check_cmsg_for_fds( tid, msg );
1540}
1541
1542
1543/* ---------------------------------------------------------------------
1544   Deal with a bunch of IPC related syscalls
1545   ------------------------------------------------------------------ */
1546
1547/* ------ */
1548
1549void
1550ML_(generic_PRE_sys_semop) ( ThreadId tid,
1551                             UWord arg0, UWord arg1, UWord arg2 )
1552{
1553   /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1554   PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1555}
1556
1557/* ------ */
1558
1559void
1560ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1561                                  UWord arg0, UWord arg1,
1562                                  UWord arg2, UWord arg3 )
1563{
1564   /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1565                     struct timespec *timeout); */
1566   PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1567   if (arg3 != 0)
1568      PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1569}
1570
1571/* ------ */
1572
1573static
1574UInt get_sem_count( Int semid )
1575{
1576   struct vki_semid_ds buf;
1577   union vki_semun arg;
1578   SysRes res;
1579
1580   /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1581      (experimental) otherwise complains that the use in the return
1582      statement below is uninitialised. */
1583   buf.sem_nsems = 0;
1584
1585   arg.buf = &buf;
1586
1587#  ifdef __NR_semctl
1588   res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1589#  else
1590   res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1591                          VKI_IPC_STAT, (UWord)&arg);
1592#  endif
1593   if (sr_isError(res))
1594      return 0;
1595
1596   return buf.sem_nsems;
1597}
1598
1599void
1600ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1601                              UWord arg0, UWord arg1,
1602                              UWord arg2, UWord arg3 )
1603{
1604   /* int semctl(int semid, int semnum, int cmd, ...); */
1605   union vki_semun arg = *(union vki_semun *)&arg3;
1606   UInt nsems;
1607   switch (arg2 /* cmd */) {
1608#if defined(VKI_IPC_INFO)
1609   case VKI_IPC_INFO:
1610   case VKI_SEM_INFO:
1611   case VKI_IPC_INFO|VKI_IPC_64:
1612   case VKI_SEM_INFO|VKI_IPC_64:
1613      PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1614                     (Addr)arg.buf, sizeof(struct vki_seminfo) );
1615      break;
1616#endif
1617
1618   case VKI_IPC_STAT:
1619#if defined(VKI_SEM_STAT)
1620   case VKI_SEM_STAT:
1621#endif
1622      PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1623                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1624      break;
1625
1626#if defined(VKI_IPC_64)
1627   case VKI_IPC_STAT|VKI_IPC_64:
1628#if defined(VKI_SEM_STAT)
1629   case VKI_SEM_STAT|VKI_IPC_64:
1630#endif
1631      PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1632                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1633      break;
1634#endif
1635
1636   case VKI_IPC_SET:
1637      PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1638                    (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1639      break;
1640
1641#if defined(VKI_IPC_64)
1642   case VKI_IPC_SET|VKI_IPC_64:
1643      PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1644                    (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1645      break;
1646#endif
1647
1648   case VKI_GETALL:
1649#if defined(VKI_IPC_64)
1650   case VKI_GETALL|VKI_IPC_64:
1651#endif
1652      nsems = get_sem_count( arg0 );
1653      PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1654                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1655      break;
1656
1657   case VKI_SETALL:
1658#if defined(VKI_IPC_64)
1659   case VKI_SETALL|VKI_IPC_64:
1660#endif
1661      nsems = get_sem_count( arg0 );
1662      PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1663                    (Addr)arg.array, sizeof(unsigned short) * nsems );
1664      break;
1665   }
1666}
1667
1668void
1669ML_(generic_POST_sys_semctl) ( ThreadId tid,
1670                               UWord res,
1671                               UWord arg0, UWord arg1,
1672                               UWord arg2, UWord arg3 )
1673{
1674   union vki_semun arg = *(union vki_semun *)&arg3;
1675   UInt nsems;
1676   switch (arg2 /* cmd */) {
1677#if defined(VKI_IPC_INFO)
1678   case VKI_IPC_INFO:
1679   case VKI_SEM_INFO:
1680   case VKI_IPC_INFO|VKI_IPC_64:
1681   case VKI_SEM_INFO|VKI_IPC_64:
1682      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1683      break;
1684#endif
1685
1686   case VKI_IPC_STAT:
1687#if defined(VKI_SEM_STAT)
1688   case VKI_SEM_STAT:
1689#endif
1690      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1691      break;
1692
1693#if defined(VKI_IPC_64)
1694   case VKI_IPC_STAT|VKI_IPC_64:
1695   case VKI_SEM_STAT|VKI_IPC_64:
1696      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1697      break;
1698#endif
1699
1700   case VKI_GETALL:
1701#if defined(VKI_IPC_64)
1702   case VKI_GETALL|VKI_IPC_64:
1703#endif
1704      nsems = get_sem_count( arg0 );
1705      POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1706      break;
1707   }
1708}
1709
1710/* ------ */
1711
1712/* ------ */
1713
1714static
1715UInt get_shm_size ( Int shmid )
1716{
1717#ifdef __NR_shmctl
1718#  ifdef VKI_IPC_64
1719   struct vki_shmid64_ds buf;
1720#    ifdef VGP_amd64_linux
1721     /* See bug 222545 comment 7 */
1722     SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1723                                     VKI_IPC_STAT, (UWord)&buf);
1724#    else
1725     SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1726                                     VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1727#    endif
1728#  else /* !def VKI_IPC_64 */
1729   struct vki_shmid_ds buf;
1730   SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1731#  endif /* def VKI_IPC_64 */
1732#else
1733   struct vki_shmid_ds buf;
1734   SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1735                                 VKI_IPC_STAT, 0, (UWord)&buf);
1736#endif
1737   if (sr_isError(__res))
1738      return 0;
1739
1740   return buf.shm_segsz;
1741}
1742
1743UWord
1744ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1745                             UWord arg0, UWord arg1, UWord arg2 )
1746{
1747   /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1748   UInt  segmentSize = get_shm_size ( arg0 );
1749   UWord tmp;
1750   Bool  ok;
1751   if (arg1 == 0) {
1752      /* arm-linux only: work around the fact that
1753         VG_(am_get_advisory_client_simple) produces something that is
1754         VKI_PAGE_SIZE aligned, whereas what we want is something
1755         VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1756         increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1757         then round the result up to the next VKI_SHMLBA boundary.
1758         See bug 222545 comment 15.  So far, arm-linux is the only
1759         platform where this is known to be necessary. */
1760      vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1761      if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1762         segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1763      }
1764      tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1765      if (ok) {
1766         if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1767            arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1768         } else {
1769            arg1 = tmp;
1770         }
1771      }
1772   }
1773   else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1774      arg1 = 0;
1775   return arg1;
1776}
1777
1778void
1779ML_(generic_POST_sys_shmat) ( ThreadId tid,
1780                              UWord res,
1781                              UWord arg0, UWord arg1, UWord arg2 )
1782{
1783   UInt segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1784   if ( segmentSize > 0 ) {
1785      UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1786      Bool d;
1787
1788      if (arg2 & VKI_SHM_RDONLY)
1789         prot &= ~VKI_PROT_WRITE;
1790      /* It isn't exactly correct to pass 0 for the fd and offset
1791         here.  The kernel seems to think the corresponding section
1792         does have dev/ino numbers:
1793
1794         04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
1795
1796         However there is no obvious way to find them.  In order to
1797         cope with the discrepancy, aspacem's sync checker omits the
1798         dev/ino correspondence check in cases where V does not know
1799         the dev/ino. */
1800      d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1801
1802      /* we don't distinguish whether it's read-only or
1803       * read-write -- it doesn't matter really. */
1804      VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1805                              0/*di_handle*/ );
1806      if (d)
1807         VG_(discard_translations)( (Addr64)res,
1808                                    (ULong)VG_PGROUNDUP(segmentSize),
1809                                    "ML_(generic_POST_sys_shmat)" );
1810   }
1811}
1812
1813/* ------ */
1814
1815Bool
1816ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1817{
1818   /* int shmdt(const void *shmaddr); */
1819   return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1820}
1821
1822void
1823ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1824{
1825   NSegment const* s = VG_(am_find_nsegment)(arg0);
1826
1827   if (s != NULL) {
1828      Addr  s_start = s->start;
1829      SizeT s_len   = s->end+1 - s->start;
1830      Bool  d;
1831
1832      vg_assert(s->kind == SkShmC);
1833      vg_assert(s->start == arg0);
1834
1835      d = VG_(am_notify_munmap)(s_start, s_len);
1836      s = NULL; /* s is now invalid */
1837      VG_TRACK( die_mem_munmap, s_start, s_len );
1838      if (d)
1839         VG_(discard_translations)( (Addr64)s_start,
1840                                    (ULong)s_len,
1841                                    "ML_(generic_POST_sys_shmdt)" );
1842   }
1843}
1844/* ------ */
1845
1846void
1847ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1848                              UWord arg0, UWord arg1, UWord arg2 )
1849{
1850   /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1851   switch (arg1 /* cmd */) {
1852#if defined(VKI_IPC_INFO)
1853   case VKI_IPC_INFO:
1854      PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1855                     arg2, sizeof(struct vki_shminfo) );
1856      break;
1857#if defined(VKI_IPC_64)
1858   case VKI_IPC_INFO|VKI_IPC_64:
1859      PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1860                     arg2, sizeof(struct vki_shminfo64) );
1861      break;
1862#endif
1863#endif
1864
1865#if defined(VKI_SHM_INFO)
1866   case VKI_SHM_INFO:
1867#if defined(VKI_IPC_64)
1868   case VKI_SHM_INFO|VKI_IPC_64:
1869#endif
1870      PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1871                     arg2, sizeof(struct vki_shm_info) );
1872      break;
1873#endif
1874
1875   case VKI_IPC_STAT:
1876#if defined(VKI_SHM_STAT)
1877   case VKI_SHM_STAT:
1878#endif
1879      PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1880                     arg2, sizeof(struct vki_shmid_ds) );
1881      break;
1882
1883#if defined(VKI_IPC_64)
1884   case VKI_IPC_STAT|VKI_IPC_64:
1885   case VKI_SHM_STAT|VKI_IPC_64:
1886      PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1887                     arg2, sizeof(struct vki_shmid64_ds) );
1888      break;
1889#endif
1890
1891   case VKI_IPC_SET:
1892      PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1893                    arg2, sizeof(struct vki_shmid_ds) );
1894      break;
1895
1896#if defined(VKI_IPC_64)
1897   case VKI_IPC_SET|VKI_IPC_64:
1898      PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1899                    arg2, sizeof(struct vki_shmid64_ds) );
1900      break;
1901#endif
1902   }
1903}
1904
1905void
1906ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1907                               UWord res,
1908                               UWord arg0, UWord arg1, UWord arg2 )
1909{
1910   switch (arg1 /* cmd */) {
1911#if defined(VKI_IPC_INFO)
1912   case VKI_IPC_INFO:
1913      POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
1914      break;
1915   case VKI_IPC_INFO|VKI_IPC_64:
1916      POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
1917      break;
1918#endif
1919
1920#if defined(VKI_SHM_INFO)
1921   case VKI_SHM_INFO:
1922   case VKI_SHM_INFO|VKI_IPC_64:
1923      POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
1924      break;
1925#endif
1926
1927   case VKI_IPC_STAT:
1928#if defined(VKI_SHM_STAT)
1929   case VKI_SHM_STAT:
1930#endif
1931      POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
1932      break;
1933
1934#if defined(VKI_IPC_64)
1935   case VKI_IPC_STAT|VKI_IPC_64:
1936   case VKI_SHM_STAT|VKI_IPC_64:
1937      POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
1938      break;
1939#endif
1940
1941
1942   }
1943}
1944
1945
1946/* ---------------------------------------------------------------------
1947   Generic handler for mmap
1948   ------------------------------------------------------------------ */
1949
1950/*
1951 * Although mmap is specified by POSIX and the argument are generally
1952 * consistent across platforms the precise details of the low level
1953 * argument passing conventions differ. For example:
1954 *
1955 * - On x86-linux there is mmap (aka old_mmap) which takes the
1956 *   arguments in a memory block and the offset in bytes; and
1957 *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1958 *   way and the offset in pages.
1959 *
1960 * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
1961 *   arguments in the normal way and the offset in bytes; and
1962 *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1963 *   way and the offset in pages.
1964 *
1965 * - On amd64-linux everything is simple and there is just the one
1966 *   call, mmap (aka sys_mmap)  which takes the arguments in the
1967 *   normal way and the offset in bytes.
1968 *
1969 * To cope with all this we provide a generic handler function here
1970 * and then each platform implements one or more system call handlers
1971 * which call this generic routine after extracting and normalising
1972 * the arguments.
1973 */
1974
1975SysRes
1976ML_(generic_PRE_sys_mmap) ( ThreadId tid,
1977                            UWord arg1, UWord arg2, UWord arg3,
1978                            UWord arg4, UWord arg5, Off64T arg6 )
1979{
1980   Addr       advised;
1981   SysRes     sres;
1982   MapRequest mreq;
1983   Bool       mreq_ok;
1984
1985#if defined(VGO_darwin)
1986   // Nb: we can't use this on Darwin, it has races:
1987   // * needs to RETRY if advisory succeeds but map fails
1988   //   (could have been some other thread in a nonblocking call)
1989   // * needs to not use fixed-position mmap() on Darwin
1990   //   (mmap will cheerfully smash whatever's already there, which might
1991   //   be a new mapping from some other thread in a nonblocking call)
1992   VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
1993#endif
1994
1995   if (arg2 == 0) {
1996      /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
1997         shall be established. */
1998      return VG_(mk_SysRes_Error)( VKI_EINVAL );
1999   }
2000
2001   if (!VG_IS_PAGE_ALIGNED(arg1)) {
2002      /* zap any misaligned addresses. */
2003      /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2004         to fail.   Here, we catch them all. */
2005      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2006   }
2007
2008   if (!VG_IS_PAGE_ALIGNED(arg6)) {
2009      /* zap any misaligned offsets. */
2010      /* SuSV3 says: The off argument is constrained to be aligned and
2011         sized according to the value returned by sysconf() when
2012         passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2013      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2014   }
2015
2016   /* Figure out what kind of allocation constraints there are
2017      (fixed/hint/any), and ask aspacem what we should do. */
2018   mreq.start = arg1;
2019   mreq.len   = arg2;
2020   if (arg4 & VKI_MAP_FIXED) {
2021      mreq.rkind = MFixed;
2022   } else
2023   if (arg1 != 0) {
2024      mreq.rkind = MHint;
2025   } else {
2026      mreq.rkind = MAny;
2027   }
2028
2029   /* Enquire ... */
2030   advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2031   if (!mreq_ok) {
2032      /* Our request was bounced, so we'd better fail. */
2033      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2034   }
2035
2036   /* Otherwise we're OK (so far).  Install aspacem's choice of
2037      address, and let the mmap go through.  */
2038   sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2039                                    arg4 | VKI_MAP_FIXED,
2040                                    arg5, arg6);
2041
2042   /* A refinement: it may be that the kernel refused aspacem's choice
2043      of address.  If we were originally asked for a hinted mapping,
2044      there is still a last chance: try again at any address.
2045      Hence: */
2046   if (mreq.rkind == MHint && sr_isError(sres)) {
2047      mreq.start = 0;
2048      mreq.len   = arg2;
2049      mreq.rkind = MAny;
2050      advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2051      if (!mreq_ok) {
2052         /* Our request was bounced, so we'd better fail. */
2053         return VG_(mk_SysRes_Error)( VKI_EINVAL );
2054      }
2055      /* and try again with the kernel */
2056      sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2057                                       arg4 | VKI_MAP_FIXED,
2058                                       arg5, arg6);
2059   }
2060
2061   if (!sr_isError(sres)) {
2062      ULong di_handle;
2063      /* Notify aspacem. */
2064      notify_core_of_mmap(
2065         (Addr)sr_Res(sres), /* addr kernel actually assigned */
2066         arg2, /* length */
2067         arg3, /* prot */
2068         arg4, /* the original flags value */
2069         arg5, /* fd */
2070         arg6  /* offset */
2071      );
2072      /* Load symbols? */
2073      di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2074                                       False/*allow_SkFileV*/ );
2075      /* Notify the tool. */
2076      notify_tool_of_mmap(
2077         (Addr)sr_Res(sres), /* addr kernel actually assigned */
2078         arg2, /* length */
2079         arg3, /* prot */
2080         di_handle /* so the tool can refer to the read debuginfo later,
2081                      if it wants. */
2082      );
2083   }
2084
2085   /* Stay sane */
2086   if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2087      vg_assert(sr_Res(sres) == arg1);
2088
2089   return sres;
2090}
2091
2092
2093/* ---------------------------------------------------------------------
2094   The Main Entertainment ... syscall wrappers
2095   ------------------------------------------------------------------ */
2096
2097/* Note: the PRE() and POST() wrappers are for the actual functions
2098   implementing the system calls in the OS kernel.  These mostly have
2099   names like sys_write();  a few have names like old_mmap().  See the
2100   comment for ML_(syscall_table)[] for important info about the __NR_foo
2101   constants and their relationship to the sys_foo() functions.
2102
2103   Some notes about names used for syscalls and args:
2104   - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2105     ambiguity.
2106
2107   - For error messages, we generally use a somewhat generic name
2108     for the syscall (eg. "write" rather than "sys_write").  This should be
2109     good enough for the average user to understand what is happening,
2110     without confusing them with names like "sys_write".
2111
2112   - Also, for error messages the arg names are mostly taken from the man
2113     pages (even though many of those man pages are really for glibc
2114     functions of the same name), rather than from the OS kernel source,
2115     for the same reason -- a user presented with a "bogus foo(bar)" arg
2116     will most likely look at the "foo" man page to see which is the "bar"
2117     arg.
2118
2119   Note that we use our own vki_* types.  The one exception is in
2120   PRE_REG_READn calls, where pointer types haven't been changed, because
2121   they don't need to be -- eg. for "foo*" to be used, the type foo need not
2122   be visible.
2123
2124   XXX: some of these are arch-specific, and should be factored out.
2125*/
2126
2127#define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2128#define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2129
2130// Macros to support 64-bit syscall args split into two 32 bit values
2131#if defined(VG_LITTLEENDIAN)
2132#define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2133#define MERGE64_FIRST(name) name##_low
2134#define MERGE64_SECOND(name) name##_high
2135#elif defined(VG_BIGENDIAN)
2136#define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2137#define MERGE64_FIRST(name) name##_high
2138#define MERGE64_SECOND(name) name##_low
2139#else
2140#error Unknown endianness
2141#endif
2142
2143PRE(sys_exit)
2144{
2145   ThreadState* tst;
2146   /* simple; just make this thread exit */
2147   PRINT("exit( %ld )", ARG1);
2148   PRE_REG_READ1(void, "exit", int, status);
2149   tst = VG_(get_ThreadState)(tid);
2150   /* Set the thread's status to be exiting, then claim that the
2151      syscall succeeded. */
2152   tst->exitreason = VgSrc_ExitThread;
2153   tst->os_state.exitcode = ARG1;
2154   SET_STATUS_Success(0);
2155}
2156
2157PRE(sys_ni_syscall)
2158{
2159   PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2160      VG_SYSNUM_STRING(SYSNO));
2161   PRE_REG_READ0(long, "ni_syscall");
2162   SET_STATUS_Failure( VKI_ENOSYS );
2163}
2164
2165PRE(sys_iopl)
2166{
2167   PRINT("sys_iopl ( %ld )", ARG1);
2168   PRE_REG_READ1(long, "iopl", unsigned long, level);
2169}
2170
2171PRE(sys_fsync)
2172{
2173   *flags |= SfMayBlock;
2174   PRINT("sys_fsync ( %ld )", ARG1);
2175   PRE_REG_READ1(long, "fsync", unsigned int, fd);
2176}
2177
2178PRE(sys_fdatasync)
2179{
2180   *flags |= SfMayBlock;
2181   PRINT("sys_fdatasync ( %ld )", ARG1);
2182   PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2183}
2184
2185PRE(sys_msync)
2186{
2187   *flags |= SfMayBlock;
2188   PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2189   PRE_REG_READ3(long, "msync",
2190                 unsigned long, start, vki_size_t, length, int, flags);
2191   PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2192}
2193
2194// Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2195// versions of LiS (Linux Streams).  They are not part of the kernel.
2196// Therefore, we have to provide this type ourself, rather than getting it
2197// from the kernel sources.
2198struct vki_pmsg_strbuf {
2199   int     maxlen;         /* no. of bytes in buffer */
2200   int     len;            /* no. of bytes returned */
2201   vki_caddr_t buf;        /* pointer to data */
2202};
2203PRE(sys_getpmsg)
2204{
2205   /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2206   struct vki_pmsg_strbuf *ctrl;
2207   struct vki_pmsg_strbuf *data;
2208   *flags |= SfMayBlock;
2209   PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2210   PRE_REG_READ5(int, "getpmsg",
2211                 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2212                 int *, bandp, int *, flagsp);
2213   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2214   data = (struct vki_pmsg_strbuf *)ARG3;
2215   if (ctrl && ctrl->maxlen > 0)
2216      PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2217   if (data && data->maxlen > 0)
2218      PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2219   if (ARG4)
2220      PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2221   if (ARG5)
2222      PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2223}
2224POST(sys_getpmsg)
2225{
2226   struct vki_pmsg_strbuf *ctrl;
2227   struct vki_pmsg_strbuf *data;
2228   vg_assert(SUCCESS);
2229   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2230   data = (struct vki_pmsg_strbuf *)ARG3;
2231   if (RES == 0 && ctrl && ctrl->len > 0) {
2232      POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2233   }
2234   if (RES == 0 && data && data->len > 0) {
2235      POST_MEM_WRITE( (Addr)data->buf, data->len);
2236   }
2237}
2238
2239PRE(sys_putpmsg)
2240{
2241   /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2242   struct vki_pmsg_strbuf *ctrl;
2243   struct vki_pmsg_strbuf *data;
2244   *flags |= SfMayBlock;
2245   PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2246   PRE_REG_READ5(int, "putpmsg",
2247                 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2248                 int, band, int, flags);
2249   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2250   data = (struct vki_pmsg_strbuf *)ARG3;
2251   if (ctrl && ctrl->len > 0)
2252      PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2253   if (data && data->len > 0)
2254      PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2255}
2256
2257PRE(sys_getitimer)
2258{
2259   struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2260   PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2261   PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2262
2263   PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2264   PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2265}
2266
2267POST(sys_getitimer)
2268{
2269   if (ARG2 != (Addr)NULL) {
2270      struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2271      POST_timeval_WRITE( &(value->it_interval) );
2272      POST_timeval_WRITE( &(value->it_value) );
2273   }
2274}
2275
2276PRE(sys_setitimer)
2277{
2278   PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2279   PRE_REG_READ3(long, "setitimer",
2280                 int, which,
2281                 struct itimerval *, value, struct itimerval *, ovalue);
2282   if (ARG2 != (Addr)NULL) {
2283      struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2284      PRE_timeval_READ( "setitimer(&value->it_interval)",
2285                         &(value->it_interval));
2286      PRE_timeval_READ( "setitimer(&value->it_value)",
2287                         &(value->it_value));
2288   }
2289   if (ARG3 != (Addr)NULL) {
2290      struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2291      PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2292                         &(ovalue->it_interval));
2293      PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2294                         &(ovalue->it_value));
2295   }
2296}
2297
2298POST(sys_setitimer)
2299{
2300   if (ARG3 != (Addr)NULL) {
2301      struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2302      POST_timeval_WRITE( &(ovalue->it_interval) );
2303      POST_timeval_WRITE( &(ovalue->it_value) );
2304   }
2305}
2306
2307PRE(sys_chroot)
2308{
2309   PRINT("sys_chroot ( %#lx )", ARG1);
2310   PRE_REG_READ1(long, "chroot", const char *, path);
2311   PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2312}
2313
2314PRE(sys_madvise)
2315{
2316   *flags |= SfMayBlock;
2317   PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2318   PRE_REG_READ3(long, "madvise",
2319                 unsigned long, start, vki_size_t, length, int, advice);
2320}
2321
2322#if HAVE_MREMAP
2323PRE(sys_mremap)
2324{
2325   // Nb: this is different to the glibc version described in the man pages,
2326   // which lacks the fifth 'new_address' argument.
2327   if (ARG4 & VKI_MREMAP_FIXED) {
2328      PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2329            ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2330      PRE_REG_READ5(unsigned long, "mremap",
2331                    unsigned long, old_addr, unsigned long, old_size,
2332                    unsigned long, new_size, unsigned long, flags,
2333                    unsigned long, new_addr);
2334   } else {
2335      PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2336            ARG1, (ULong)ARG2, ARG3, ARG4);
2337      PRE_REG_READ4(unsigned long, "mremap",
2338                    unsigned long, old_addr, unsigned long, old_size,
2339                    unsigned long, new_size, unsigned long, flags);
2340   }
2341   SET_STATUS_from_SysRes(
2342      do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2343   );
2344}
2345#endif /* HAVE_MREMAP */
2346
2347PRE(sys_nice)
2348{
2349   PRINT("sys_nice ( %ld )", ARG1);
2350   PRE_REG_READ1(long, "nice", int, inc);
2351}
2352
2353PRE(sys_mlock)
2354{
2355   *flags |= SfMayBlock;
2356   PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2357   PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2358}
2359
2360PRE(sys_munlock)
2361{
2362   *flags |= SfMayBlock;
2363   PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2364   PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2365}
2366
2367PRE(sys_mlockall)
2368{
2369   *flags |= SfMayBlock;
2370   PRINT("sys_mlockall ( %lx )", ARG1);
2371   PRE_REG_READ1(long, "mlockall", int, flags);
2372}
2373
2374PRE(sys_setpriority)
2375{
2376   PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2377   PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2378}
2379
2380PRE(sys_getpriority)
2381{
2382   PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2383   PRE_REG_READ2(long, "getpriority", int, which, int, who);
2384}
2385
2386PRE(sys_pwrite64)
2387{
2388   *flags |= SfMayBlock;
2389#if VG_WORDSIZE == 4
2390   PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2391         ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2392   PRE_REG_READ5(ssize_t, "pwrite64",
2393                 unsigned int, fd, const char *, buf, vki_size_t, count,
2394                 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2395#elif VG_WORDSIZE == 8
2396   PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2397         ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2398   PRE_REG_READ4(ssize_t, "pwrite64",
2399                 unsigned int, fd, const char *, buf, vki_size_t, count,
2400                 Word, offset);
2401#else
2402#  error Unexpected word size
2403#endif
2404   PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2405}
2406
2407PRE(sys_sync)
2408{
2409   *flags |= SfMayBlock;
2410   PRINT("sys_sync ( )");
2411   PRE_REG_READ0(long, "sync");
2412}
2413
2414PRE(sys_fstatfs)
2415{
2416   PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2417   PRE_REG_READ2(long, "fstatfs",
2418                 unsigned int, fd, struct statfs *, buf);
2419   PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2420}
2421
2422POST(sys_fstatfs)
2423{
2424   POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2425}
2426
2427PRE(sys_fstatfs64)
2428{
2429   PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2430   PRE_REG_READ3(long, "fstatfs64",
2431                 unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2432   PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2433}
2434POST(sys_fstatfs64)
2435{
2436   POST_MEM_WRITE( ARG3, ARG2 );
2437}
2438
2439PRE(sys_getsid)
2440{
2441   PRINT("sys_getsid ( %ld )", ARG1);
2442   PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2443}
2444
2445PRE(sys_pread64)
2446{
2447   *flags |= SfMayBlock;
2448#if VG_WORDSIZE == 4
2449   PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2450         ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2451   PRE_REG_READ5(ssize_t, "pread64",
2452                 unsigned int, fd, char *, buf, vki_size_t, count,
2453                 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2454#elif VG_WORDSIZE == 8
2455   PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2456         ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2457   PRE_REG_READ4(ssize_t, "pread64",
2458                 unsigned int, fd, char *, buf, vki_size_t, count,
2459                 Word, offset);
2460#else
2461#  error Unexpected word size
2462#endif
2463   PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2464}
2465POST(sys_pread64)
2466{
2467   vg_assert(SUCCESS);
2468   if (RES > 0) {
2469      POST_MEM_WRITE( ARG2, RES );
2470   }
2471}
2472
2473PRE(sys_mknod)
2474{
2475   PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2476   PRE_REG_READ3(long, "mknod",
2477                 const char *, pathname, int, mode, unsigned, dev);
2478   PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2479}
2480
2481PRE(sys_flock)
2482{
2483   *flags |= SfMayBlock;
2484   PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2485   PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2486}
2487
2488// Pre_read a char** argument.
2489static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
2490{
2491   while (True) {
2492      Addr a_deref;
2493      Addr* a_p = (Addr*)a;
2494      PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2495      a_deref = *a_p;
2496      if (0 == a_deref)
2497         break;
2498      PRE_MEM_RASCIIZ( s2, a_deref );
2499      a += sizeof(char*);
2500   }
2501}
2502
2503static Bool i_am_the_only_thread ( void )
2504{
2505   Int c = VG_(count_living_threads)();
2506   vg_assert(c >= 1); /* stay sane */
2507   return c == 1;
2508}
2509
2510/* Wait until all other threads disappear. */
2511void VG_(reap_threads)(ThreadId self)
2512{
2513   while (!i_am_the_only_thread()) {
2514      /* Let other thread(s) run */
2515      VG_(vg_yield)();
2516      VG_(poll_signals)(self);
2517   }
2518   vg_assert(i_am_the_only_thread());
2519}
2520
2521// XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2522// but it seems to work nonetheless...
2523PRE(sys_execve)
2524{
2525   Char*        path = NULL;       /* path to executable */
2526   Char**       envp = NULL;
2527   Char**       argv = NULL;
2528   Char**       arg2copy;
2529   Char*        launcher_basename = NULL;
2530   ThreadState* tst;
2531   Int          i, j, tot_args;
2532   SysRes       res;
2533   Bool         setuid_allowed, trace_this_child;
2534
2535   PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2536   PRE_REG_READ3(vki_off_t, "execve",
2537                 char *, filename, char **, argv, char **, envp);
2538   PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2539   if (ARG2 != 0)
2540      pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2541   if (ARG3 != 0)
2542      pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2543
2544   vg_assert(VG_(is_valid_tid)(tid));
2545   tst = VG_(get_ThreadState)(tid);
2546
2547   /* Erk.  If the exec fails, then the following will have made a
2548      mess of things which makes it hard for us to continue.  The
2549      right thing to do is piece everything together again in
2550      POST(execve), but that's close to impossible.  Instead, we make
2551      an effort to check that the execve will work before actually
2552      doing it. */
2553
2554   /* Check that the name at least begins in client-accessible storage. */
2555   if (ARG1 == 0 /* obviously bogus */
2556       || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2557      SET_STATUS_Failure( VKI_EFAULT );
2558      return;
2559   }
2560
2561   // Decide whether or not we want to follow along
2562   trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1 );
2563
2564   // Do the important checks:  it is a file, is executable, permissions are
2565   // ok, etc.  We allow setuid executables to run only in the case when
2566   // we are not simulating them, that is, they to be run natively.
2567   setuid_allowed = trace_this_child  ? False  : True;
2568   res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
2569   if (sr_isError(res)) {
2570      SET_STATUS_Failure( sr_Err(res) );
2571      return;
2572   }
2573
2574   /* If we're tracing the child, and the launcher name looks bogus
2575      (possibly because launcher.c couldn't figure it out, see
2576      comments therein) then we have no option but to fail. */
2577   if (trace_this_child
2578       && (VG_(name_of_launcher) == NULL
2579           || VG_(name_of_launcher)[0] != '/')) {
2580      SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2581      return;
2582   }
2583
2584   /* After this point, we can't recover if the execve fails. */
2585   VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
2586
2587   /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2588      this. (Really, nuke them all, since the new process will make
2589      its own new thread.) */
2590   VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2591   VG_(reap_threads)(tid);
2592
2593   // Set up the child's exe path.
2594   //
2595   if (trace_this_child) {
2596
2597      // We want to exec the launcher.  Get its pre-remembered path.
2598      path = VG_(name_of_launcher);
2599      // VG_(name_of_launcher) should have been acquired by m_main at
2600      // startup.
2601      vg_assert(path);
2602
2603      launcher_basename = VG_(strrchr)(path, '/');
2604      if (launcher_basename == NULL || launcher_basename[1] == 0) {
2605         launcher_basename = path;  // hmm, tres dubious
2606      } else {
2607         launcher_basename++;
2608      }
2609
2610   } else {
2611      path = (Char*)ARG1;
2612      if (VG_(clo_xml)) {
2613        VG_(printf_xml)("\n<execv/>\n\n</valgrindoutput>\n\n");
2614      } else {
2615        VG_(umsg)("execv called - the tool will now quit\n");
2616      }
2617   }
2618
2619   // Set up the child's environment.
2620   //
2621   // Remove the valgrind-specific stuff from the environment so the
2622   // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2623   // This is done unconditionally, since if we are tracing the child,
2624   // the child valgrind will set up the appropriate client environment.
2625   // Nb: we make a copy of the environment before trying to mangle it
2626   // as it might be in read-only memory (this was bug #101881).
2627   //
2628   // Then, if tracing the child, set VALGRIND_LIB for it.
2629   //
2630   if (ARG3 == 0) {
2631      envp = NULL;
2632   } else {
2633      envp = VG_(env_clone)( (Char**)ARG3 );
2634      if (envp == NULL) goto hosed;
2635      VG_(env_remove_valgrind_env_stuff)( envp );
2636   }
2637
2638   if (trace_this_child) {
2639      // Set VALGRIND_LIB in ARG3 (the environment)
2640      VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2641   }
2642
2643   // Set up the child's args.  If not tracing it, they are
2644   // simply ARG2.  Otherwise, they are
2645   //
2646   // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2647   //
2648   // except that the first VG_(args_for_valgrind_noexecpass) args
2649   // are omitted.
2650   //
2651   if (!trace_this_child) {
2652      argv = (Char**)ARG2;
2653   } else {
2654      vg_assert( VG_(args_for_valgrind) );
2655      vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2656      vg_assert( VG_(args_for_valgrind_noexecpass)
2657                   <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2658      /* how many args in total will there be? */
2659      // launcher basename
2660      tot_args = 1;
2661      // V's args
2662      tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2663      tot_args -= VG_(args_for_valgrind_noexecpass);
2664      // name of client exe
2665      tot_args++;
2666      // args for client exe, skipping [0]
2667      arg2copy = (Char**)ARG2;
2668      if (arg2copy && arg2copy[0]) {
2669         for (i = 1; arg2copy[i]; i++)
2670            tot_args++;
2671      }
2672      // allocate
2673      argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2674                          (tot_args+1) * sizeof(HChar*) );
2675      if (argv == 0) goto hosed;
2676      // copy
2677      j = 0;
2678      argv[j++] = launcher_basename;
2679      for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2680         if (i < VG_(args_for_valgrind_noexecpass))
2681            continue;
2682         argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2683      }
2684      argv[j++] = (Char*)ARG1;
2685      if (arg2copy && arg2copy[0])
2686         for (i = 1; arg2copy[i]; i++)
2687            argv[j++] = arg2copy[i];
2688      argv[j++] = NULL;
2689      // check
2690      vg_assert(j == tot_args+1);
2691   }
2692
2693   /* restore the DATA rlimit for the child */
2694   VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2695
2696   /*
2697      Set the signal state up for exec.
2698
2699      We need to set the real signal state to make sure the exec'd
2700      process gets SIG_IGN properly.
2701
2702      Also set our real sigmask to match the client's sigmask so that
2703      the exec'd child will get the right mask.  First we need to
2704      clear out any pending signals so they they don't get delivered,
2705      which would confuse things.
2706
2707      XXX This is a bug - the signals should remain pending, and be
2708      delivered to the new process after exec.  There's also a
2709      race-condition, since if someone delivers us a signal between
2710      the sigprocmask and the execve, we'll still get the signal. Oh
2711      well.
2712   */
2713   {
2714      vki_sigset_t allsigs;
2715      vki_siginfo_t info;
2716
2717      /* What this loop does: it queries SCSS (the signal state that
2718         the client _thinks_ the kernel is in) by calling
2719         VG_(do_sys_sigaction), and modifies the real kernel signal
2720         state accordingly. */
2721      for (i = 1; i < VG_(max_signal); i++) {
2722         vki_sigaction_fromK_t sa_f;
2723         vki_sigaction_toK_t   sa_t;
2724         VG_(do_sys_sigaction)(i, NULL, &sa_f);
2725         VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2726         if (sa_t.ksa_handler == VKI_SIG_IGN)
2727            VG_(sigaction)(i, &sa_t, NULL);
2728         else {
2729            sa_t.ksa_handler = VKI_SIG_DFL;
2730            VG_(sigaction)(i, &sa_t, NULL);
2731         }
2732      }
2733
2734      VG_(sigfillset)(&allsigs);
2735      while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2736         ;
2737
2738      VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2739   }
2740
2741   if (0) {
2742      Char **cpp;
2743      VG_(printf)("exec: %s\n", path);
2744      for (cpp = argv; cpp && *cpp; cpp++)
2745         VG_(printf)("argv: %s\n", *cpp);
2746      if (0)
2747         for (cpp = envp; cpp && *cpp; cpp++)
2748            VG_(printf)("env: %s\n", *cpp);
2749   }
2750
2751   SET_STATUS_from_SysRes(
2752      VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2753   );
2754
2755   /* If we got here, then the execve failed.  We've already made way
2756      too much of a mess to continue, so we have to abort. */
2757  hosed:
2758   vg_assert(FAILURE);
2759   VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2760                ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2761   VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2762                            "execve() failing, so I'm dying.\n");
2763   VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2764                            "or work out how to recover.\n");
2765   VG_(exit)(101);
2766}
2767
2768PRE(sys_access)
2769{
2770   PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2771   PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2772   PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2773}
2774
2775PRE(sys_alarm)
2776{
2777   PRINT("sys_alarm ( %ld )", ARG1);
2778   PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2779}
2780
2781PRE(sys_brk)
2782{
2783   Addr brk_limit = VG_(brk_limit);
2784   Addr brk_new;
2785
2786   /* libc   says: int   brk(void *end_data_segment);
2787      kernel says: void* brk(void* end_data_segment);  (more or less)
2788
2789      libc returns 0 on success, and -1 (and sets errno) on failure.
2790      Nb: if you ask to shrink the dataseg end below what it
2791      currently is, that always succeeds, even if the dataseg end
2792      doesn't actually change (eg. brk(0)).  Unless it seg faults.
2793
2794      Kernel returns the new dataseg end.  If the brk() failed, this
2795      will be unchanged from the old one.  That's why calling (kernel)
2796      brk(0) gives the current dataseg end (libc brk() just returns
2797      zero in that case).
2798
2799      Both will seg fault if you shrink it back into a text segment.
2800   */
2801   PRINT("sys_brk ( %#lx )", ARG1);
2802   PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2803
2804   brk_new = do_brk(ARG1);
2805   SET_STATUS_Success( brk_new );
2806
2807   if (brk_new == ARG1) {
2808      /* brk() succeeded */
2809      if (brk_new < brk_limit) {
2810         /* successfully shrunk the data segment. */
2811         VG_TRACK( die_mem_brk, (Addr)ARG1,
2812		   brk_limit-ARG1 );
2813      } else
2814      if (brk_new > brk_limit) {
2815         /* successfully grew the data segment */
2816         VG_TRACK( new_mem_brk, brk_limit,
2817                   ARG1-brk_limit, tid );
2818      }
2819   } else {
2820      /* brk() failed */
2821      vg_assert(brk_limit == brk_new);
2822   }
2823}
2824
2825PRE(sys_chdir)
2826{
2827   PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2828   PRE_REG_READ1(long, "chdir", const char *, path);
2829   PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2830}
2831
2832PRE(sys_chmod)
2833{
2834   PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2835   PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2836   PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2837}
2838
2839PRE(sys_chown)
2840{
2841   PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2842   PRE_REG_READ3(long, "chown",
2843                 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2844   PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2845}
2846
2847PRE(sys_lchown)
2848{
2849   PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2850   PRE_REG_READ3(long, "lchown",
2851                 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2852   PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2853}
2854
2855PRE(sys_close)
2856{
2857   PRINT("sys_close ( %ld )", ARG1);
2858   PRE_REG_READ1(long, "close", unsigned int, fd);
2859
2860   /* Detect and negate attempts by the client to close Valgrind's log fd */
2861   if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2862        /* If doing -d style logging (which is to fd=2), don't
2863           allow that to be closed either. */
2864        || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
2865      SET_STATUS_Failure( VKI_EBADF );
2866}
2867
2868POST(sys_close)
2869{
2870   if (VG_(clo_track_fds)) record_fd_close(ARG1);
2871}
2872
2873PRE(sys_dup)
2874{
2875   PRINT("sys_dup ( %ld )", ARG1);
2876   PRE_REG_READ1(long, "dup", unsigned int, oldfd);
2877}
2878
2879POST(sys_dup)
2880{
2881   vg_assert(SUCCESS);
2882   if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
2883      VG_(close)(RES);
2884      SET_STATUS_Failure( VKI_EMFILE );
2885   } else {
2886      if (VG_(clo_track_fds))
2887         ML_(record_fd_open_named)(tid, RES);
2888   }
2889}
2890
2891PRE(sys_dup2)
2892{
2893   PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
2894   PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
2895   if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
2896      SET_STATUS_Failure( VKI_EBADF );
2897}
2898
2899POST(sys_dup2)
2900{
2901   vg_assert(SUCCESS);
2902   if (VG_(clo_track_fds))
2903      ML_(record_fd_open_named)(tid, RES);
2904}
2905
2906PRE(sys_fchdir)
2907{
2908   PRINT("sys_fchdir ( %ld )", ARG1);
2909   PRE_REG_READ1(long, "fchdir", unsigned int, fd);
2910}
2911
2912PRE(sys_fchown)
2913{
2914   PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
2915   PRE_REG_READ3(long, "fchown",
2916                 unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
2917}
2918
2919PRE(sys_fchmod)
2920{
2921   PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
2922   PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
2923}
2924
2925PRE(sys_newfstat)
2926{
2927   PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
2928   PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
2929   PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
2930}
2931
2932POST(sys_newfstat)
2933{
2934   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
2935}
2936
2937static vki_sigset_t fork_saved_mask;
2938
2939// In Linux, the sys_fork() function varies across architectures, but we
2940// ignore the various args it gets, and so it looks arch-neutral.  Hmm.
2941PRE(sys_fork)
2942{
2943   Bool is_child;
2944   Int child_pid;
2945   vki_sigset_t mask;
2946
2947   PRINT("sys_fork ( )");
2948   PRE_REG_READ0(long, "fork");
2949
2950   /* Block all signals during fork, so that we can fix things up in
2951      the child without being interrupted. */
2952   VG_(sigfillset)(&mask);
2953   VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
2954
2955   SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
2956
2957   if (!SUCCESS) return;
2958
2959#if defined(VGO_linux) || defined(VGO_aix5)
2960   // RES is 0 for child, non-0 (the child's PID) for parent.
2961   is_child = ( RES == 0 ? True : False );
2962   child_pid = ( is_child ? -1 : RES );
2963#elif defined(VGO_darwin)
2964   // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
2965   is_child = RESHI;
2966   child_pid = RES;
2967#else
2968#  error Unknown OS
2969#endif
2970
2971   VG_(do_atfork_pre)(tid);
2972
2973   if (is_child) {
2974      VG_(do_atfork_child)(tid);
2975
2976      /* restore signal mask */
2977      VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
2978
2979      /* If --child-silent-after-fork=yes was specified, set the
2980         output file descriptors to 'impossible' values.  This is
2981         noticed by send_bytes_to_logging_sink in m_libcprint.c, which
2982         duly stops writing any further output. */
2983      if (VG_(clo_child_silent_after_fork)) {
2984         if (!VG_(log_output_sink).is_socket)
2985            VG_(log_output_sink).fd = -1;
2986         if (!VG_(xml_output_sink).is_socket)
2987            VG_(xml_output_sink).fd = -1;
2988      }
2989
2990   } else {
2991      VG_(do_atfork_parent)(tid);
2992
2993      PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
2994
2995      /* restore signal mask */
2996      VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
2997   }
2998}
2999
3000PRE(sys_ftruncate)
3001{
3002   *flags |= SfMayBlock;
3003   PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3004   PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3005}
3006
3007PRE(sys_truncate)
3008{
3009   *flags |= SfMayBlock;
3010   PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3011   PRE_REG_READ2(long, "truncate",
3012                 const char *, path, unsigned long, length);
3013   PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3014}
3015
3016PRE(sys_ftruncate64)
3017{
3018   *flags |= SfMayBlock;
3019#if VG_WORDSIZE == 4
3020   PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3021   PRE_REG_READ3(long, "ftruncate64",
3022                 unsigned int, fd,
3023                 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3024#else
3025   PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3026   PRE_REG_READ2(long, "ftruncate64",
3027                 unsigned int,fd, UWord,length);
3028#endif
3029}
3030
3031PRE(sys_truncate64)
3032{
3033   *flags |= SfMayBlock;
3034#if VG_WORDSIZE == 4
3035   PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3036   PRE_REG_READ3(long, "truncate64",
3037                 const char *, path,
3038                 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3039#else
3040   PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3041   PRE_REG_READ2(long, "truncate64",
3042                 const char *,path, UWord,length);
3043#endif
3044   PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3045}
3046
3047PRE(sys_getdents)
3048{
3049   *flags |= SfMayBlock;
3050   PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3051   PRE_REG_READ3(long, "getdents",
3052                 unsigned int, fd, struct linux_dirent *, dirp,
3053                 unsigned int, count);
3054   PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3055}
3056
3057POST(sys_getdents)
3058{
3059   vg_assert(SUCCESS);
3060   if (RES > 0)
3061      POST_MEM_WRITE( ARG2, RES );
3062}
3063
3064PRE(sys_getdents64)
3065{
3066   *flags |= SfMayBlock;
3067   PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3068   PRE_REG_READ3(long, "getdents64",
3069                 unsigned int, fd, struct linux_dirent64 *, dirp,
3070                 unsigned int, count);
3071   PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3072}
3073
3074POST(sys_getdents64)
3075{
3076   vg_assert(SUCCESS);
3077   if (RES > 0)
3078      POST_MEM_WRITE( ARG2, RES );
3079}
3080
3081PRE(sys_getgroups)
3082{
3083   PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3084   PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3085   if (ARG1 > 0)
3086      PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3087}
3088
3089POST(sys_getgroups)
3090{
3091   vg_assert(SUCCESS);
3092   if (ARG1 > 0 && RES > 0)
3093      POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3094}
3095
3096PRE(sys_getcwd)
3097{
3098   // Comment from linux/fs/dcache.c:
3099   //   NOTE! The user-level library version returns a character pointer.
3100   //   The kernel system call just returns the length of the buffer filled
3101   //   (which includes the ending '\0' character), or a negative error
3102   //   value.
3103   // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3104   PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3105   PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3106   PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3107}
3108
3109POST(sys_getcwd)
3110{
3111   vg_assert(SUCCESS);
3112   if (RES != (Addr)NULL)
3113      POST_MEM_WRITE( ARG1, RES );
3114}
3115
3116PRE(sys_geteuid)
3117{
3118   PRINT("sys_geteuid ( )");
3119   PRE_REG_READ0(long, "geteuid");
3120}
3121
3122PRE(sys_getegid)
3123{
3124   PRINT("sys_getegid ( )");
3125   PRE_REG_READ0(long, "getegid");
3126}
3127
3128PRE(sys_getgid)
3129{
3130   PRINT("sys_getgid ( )");
3131   PRE_REG_READ0(long, "getgid");
3132}
3133
3134PRE(sys_getpid)
3135{
3136   PRINT("sys_getpid ()");
3137   PRE_REG_READ0(long, "getpid");
3138}
3139
3140PRE(sys_getpgid)
3141{
3142   PRINT("sys_getpgid ( %ld )", ARG1);
3143   PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3144}
3145
3146PRE(sys_getpgrp)
3147{
3148   PRINT("sys_getpgrp ()");
3149   PRE_REG_READ0(long, "getpgrp");
3150}
3151
3152PRE(sys_getppid)
3153{
3154   PRINT("sys_getppid ()");
3155   PRE_REG_READ0(long, "getppid");
3156}
3157
3158static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3159{
3160   POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3161
3162#ifdef _RLIMIT_POSIX_FLAG
3163   // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3164   // Unset it here to make the switch case below work correctly.
3165   a1 &= ~_RLIMIT_POSIX_FLAG;
3166#endif
3167
3168   switch (a1) {
3169   case VKI_RLIMIT_NOFILE:
3170      ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3171      ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3172      break;
3173
3174   case VKI_RLIMIT_DATA:
3175      *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3176      break;
3177
3178   case VKI_RLIMIT_STACK:
3179      *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3180      break;
3181   }
3182}
3183
3184PRE(sys_old_getrlimit)
3185{
3186   PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3187   PRE_REG_READ2(long, "old_getrlimit",
3188                 unsigned int, resource, struct rlimit *, rlim);
3189   PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3190}
3191
3192POST(sys_old_getrlimit)
3193{
3194   common_post_getrlimit(tid, ARG1, ARG2);
3195}
3196
3197PRE(sys_getrlimit)
3198{
3199   PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3200   PRE_REG_READ2(long, "getrlimit",
3201                 unsigned int, resource, struct rlimit *, rlim);
3202   PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3203}
3204
3205POST(sys_getrlimit)
3206{
3207   common_post_getrlimit(tid, ARG1, ARG2);
3208}
3209
3210PRE(sys_getrusage)
3211{
3212   PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3213   PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3214   PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3215}
3216
3217POST(sys_getrusage)
3218{
3219   vg_assert(SUCCESS);
3220   if (RES == 0)
3221      POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3222}
3223
3224PRE(sys_gettimeofday)
3225{
3226   PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3227   PRE_REG_READ2(long, "gettimeofday",
3228                 struct timeval *, tv, struct timezone *, tz);
3229   // GrP fixme does darwin write to *tz anymore?
3230   if (ARG1 != 0)
3231      PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3232   if (ARG2 != 0)
3233      PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3234}
3235
3236POST(sys_gettimeofday)
3237{
3238   vg_assert(SUCCESS);
3239   if (RES == 0) {
3240      if (ARG1 != 0)
3241         POST_timeval_WRITE( ARG1 );
3242      if (ARG2 != 0)
3243	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3244   }
3245}
3246
3247PRE(sys_settimeofday)
3248{
3249   PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3250   PRE_REG_READ2(long, "settimeofday",
3251                 struct timeval *, tv, struct timezone *, tz);
3252   if (ARG1 != 0)
3253      PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3254   if (ARG2 != 0) {
3255      PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3256      /* maybe should warn if tz->tz_dsttime is non-zero? */
3257   }
3258}
3259
3260PRE(sys_getuid)
3261{
3262   PRINT("sys_getuid ( )");
3263   PRE_REG_READ0(long, "getuid");
3264}
3265
3266void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3267{
3268   /* We don't have any specific information on it, so
3269      try to do something reasonable based on direction and
3270      size bits.  The encoding scheme is described in
3271      /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3272
3273      According to Simon Hausmann, _IOC_READ means the kernel
3274      writes a value to the ioctl value passed from the user
3275      space and the other way around with _IOC_WRITE. */
3276
3277   UInt dir  = _VKI_IOC_DIR(request);
3278   UInt size = _VKI_IOC_SIZE(request);
3279   if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3280      /*
3281       * Be very lax about ioctl handling; the only
3282       * assumption is that the size is correct. Doesn't
3283       * require the full buffer to be initialized when
3284       * writing.  Without this, using some device
3285       * drivers with a large number of strange ioctl
3286       * commands becomes very tiresome.
3287       */
3288   } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3289      //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3290      //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3291      static Int moans = 3;
3292      if (moans > 0 && !VG_(clo_xml)) {
3293         moans--;
3294         VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3295                   " with no size/direction hints\n", request);
3296         VG_(umsg)("   This could cause spurious value errors to appear.\n");
3297         VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3298                   "guidance on writing a proper wrapper.\n" );
3299      }
3300   } else {
3301      //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3302      //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3303      if ((dir & _VKI_IOC_WRITE) && size > 0)
3304         PRE_MEM_READ( "ioctl(generic)", arg, size);
3305      if ((dir & _VKI_IOC_READ) && size > 0)
3306         PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3307   }
3308}
3309
3310void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3311{
3312   /* We don't have any specific information on it, so
3313      try to do something reasonable based on direction and
3314      size bits.  The encoding scheme is described in
3315      /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3316
3317      According to Simon Hausmann, _IOC_READ means the kernel
3318      writes a value to the ioctl value passed from the user
3319      space and the other way around with _IOC_WRITE. */
3320
3321   UInt dir  = _VKI_IOC_DIR(request);
3322   UInt size = _VKI_IOC_SIZE(request);
3323   if (size > 0 && (dir & _VKI_IOC_READ)
3324       && res == 0
3325       && arg != (Addr)NULL)
3326   {
3327      POST_MEM_WRITE(arg, size);
3328   }
3329}
3330
3331/*
3332   If we're sending a SIGKILL to one of our own threads, then simulate
3333   it rather than really sending the signal, so that the target thread
3334   gets a chance to clean up.  Returns True if we did the killing (or
3335   no killing is necessary), and False if the caller should use the
3336   normal kill syscall.
3337
3338   "pid" is any pid argument which can be passed to kill; group kills
3339   (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3340   they'll most likely hit all the threads and we won't need to worry
3341   about cleanup.  In truth, we can't fully emulate these multicast
3342   kills.
3343
3344   "tgid" is a thread group id.  If it is not -1, then the target
3345   thread must be in that thread group.
3346 */
3347Bool ML_(do_sigkill)(Int pid, Int tgid)
3348{
3349   ThreadState *tst;
3350   ThreadId tid;
3351
3352   if (pid <= 0)
3353      return False;
3354
3355   tid = VG_(lwpid_to_vgtid)(pid);
3356   if (tid == VG_INVALID_THREADID)
3357      return False;		/* none of our threads */
3358
3359   tst = VG_(get_ThreadState)(tid);
3360   if (tst == NULL || tst->status == VgTs_Empty)
3361      return False;		/* hm, shouldn't happen */
3362
3363   if (tgid != -1 && tst->os_state.threadgroup != tgid)
3364      return False;		/* not the right thread group */
3365
3366   /* Check to see that the target isn't already exiting. */
3367   if (!VG_(is_exiting)(tid)) {
3368      if (VG_(clo_trace_signals))
3369	 VG_(message)(Vg_DebugMsg,
3370                      "Thread %d being killed with SIGKILL\n",
3371                      tst->tid);
3372
3373      tst->exitreason = VgSrc_FatalSig;
3374      tst->os_state.fatalsig = VKI_SIGKILL;
3375
3376      if (!VG_(is_running_thread)(tid))
3377	 VG_(get_thread_out_of_syscall)(tid);
3378   }
3379
3380   return True;
3381}
3382
3383PRE(sys_kill)
3384{
3385   PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3386   PRE_REG_READ2(long, "kill", int, pid, int, sig);
3387   if (!ML_(client_signal_OK)(ARG2)) {
3388      SET_STATUS_Failure( VKI_EINVAL );
3389      return;
3390   }
3391
3392   /* If we're sending SIGKILL, check to see if the target is one of
3393      our threads and handle it specially. */
3394   if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3395      SET_STATUS_Success(0);
3396   else
3397      /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3398         affecting how posix-compliant the call is.  I guess it is
3399         harmless to pass the 3rd arg on other platforms; hence pass
3400         it on all. */
3401      SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3402
3403   if (VG_(clo_trace_signals))
3404      VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3405		   ARG2, ARG1);
3406
3407   /* This kill might have given us a pending signal.  Ask for a check once
3408      the syscall is done. */
3409   *flags |= SfPollAfter;
3410}
3411
3412PRE(sys_link)
3413{
3414   *flags |= SfMayBlock;
3415   PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3416   PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3417   PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3418   PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3419}
3420
3421PRE(sys_newlstat)
3422{
3423   PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3424   PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3425   PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3426   PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3427}
3428
3429POST(sys_newlstat)
3430{
3431   vg_assert(SUCCESS);
3432   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3433}
3434
3435PRE(sys_mkdir)
3436{
3437   *flags |= SfMayBlock;
3438   PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3439   PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3440   PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3441}
3442
3443PRE(sys_mprotect)
3444{
3445   PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3446   PRE_REG_READ3(long, "mprotect",
3447                 unsigned long, addr, vki_size_t, len, unsigned long, prot);
3448
3449   if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3450      SET_STATUS_Failure( VKI_ENOMEM );
3451   }
3452#if defined(VKI_PROT_GROWSDOWN)
3453   else
3454   if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3455      /* Deal with mprotects on growable stack areas.
3456
3457         The critical files to understand all this are mm/mprotect.c
3458         in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3459         glibc.
3460
3461         The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3462         round the start/end address of mprotect to the start/end of
3463         the underlying vma and glibc uses that as an easy way to
3464         change the protection of the stack by calling mprotect on the
3465         last page of the stack with PROT_GROWSDOWN set.
3466
3467         The sanity check provided by the kernel is that the vma must
3468         have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3469      UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3470      NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3471      NSegment const *rseg;
3472
3473      vg_assert(aseg);
3474
3475      if (grows == VKI_PROT_GROWSDOWN) {
3476         rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
3477         if (rseg &&
3478             rseg->kind == SkResvn &&
3479             rseg->smode == SmUpper &&
3480             rseg->end+1 == aseg->start) {
3481            Addr end = ARG1 + ARG2;
3482            ARG1 = aseg->start;
3483            ARG2 = end - aseg->start;
3484            ARG3 &= ~VKI_PROT_GROWSDOWN;
3485         } else {
3486            SET_STATUS_Failure( VKI_EINVAL );
3487         }
3488      } else if (grows == VKI_PROT_GROWSUP) {
3489         rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
3490         if (rseg &&
3491             rseg->kind == SkResvn &&
3492             rseg->smode == SmLower &&
3493             aseg->end+1 == rseg->start) {
3494            ARG2 = aseg->end - ARG1 + 1;
3495            ARG3 &= ~VKI_PROT_GROWSUP;
3496         } else {
3497            SET_STATUS_Failure( VKI_EINVAL );
3498         }
3499      } else {
3500         /* both GROWSUP and GROWSDOWN */
3501         SET_STATUS_Failure( VKI_EINVAL );
3502      }
3503   }
3504#endif   // defined(VKI_PROT_GROWSDOWN)
3505}
3506
3507POST(sys_mprotect)
3508{
3509   Addr a    = ARG1;
3510   SizeT len = ARG2;
3511   Int  prot = ARG3;
3512
3513   ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3514}
3515
3516PRE(sys_munmap)
3517{
3518   if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3519   PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3520   PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3521
3522   if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3523      SET_STATUS_Failure( VKI_EINVAL );
3524}
3525
3526POST(sys_munmap)
3527{
3528   Addr  a   = ARG1;
3529   SizeT len = ARG2;
3530
3531   ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3532}
3533
3534PRE(sys_mincore)
3535{
3536   PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3537   PRE_REG_READ3(long, "mincore",
3538                 unsigned long, start, vki_size_t, length,
3539                 unsigned char *, vec);
3540   PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3541}
3542POST(sys_mincore)
3543{
3544   POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3545}
3546
3547PRE(sys_nanosleep)
3548{
3549   *flags |= SfMayBlock|SfPostOnFail;
3550   PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3551   PRE_REG_READ2(long, "nanosleep",
3552                 struct timespec *, req, struct timespec *, rem);
3553   PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3554   if (ARG2 != 0)
3555      PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3556}
3557
3558POST(sys_nanosleep)
3559{
3560   vg_assert(SUCCESS || FAILURE);
3561   if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3562      POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3563}
3564
3565PRE(sys_open)
3566{
3567   if (ARG2 & VKI_O_CREAT) {
3568      // 3-arg version
3569      PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3570      PRE_REG_READ3(long, "open",
3571                    const char *, filename, int, flags, int, mode);
3572   } else {
3573      // 2-arg version
3574      PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3575      PRE_REG_READ2(long, "open",
3576                    const char *, filename, int, flags);
3577   }
3578   PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3579
3580#if defined(VGO_linux)
3581   /* Handle the case where the open is of /proc/self/cmdline or
3582      /proc/<pid>/cmdline, and just give it a copy of the fd for the
3583      fake file we cooked up at startup (in m_main).  Also, seek the
3584      cloned fd back to the start. */
3585   {
3586      HChar  name[30];
3587      Char*  arg1s = (Char*) ARG1;
3588      SysRes sres;
3589
3590      VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3591      if (ML_(safe_to_deref)( arg1s, 1 ) &&
3592          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3593         )
3594      {
3595         sres = VG_(dup)( VG_(cl_cmdline_fd) );
3596         SET_STATUS_from_SysRes( sres );
3597         if (!sr_isError(sres)) {
3598            OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3599            if (off < 0)
3600               SET_STATUS_Failure( VKI_EMFILE );
3601         }
3602         return;
3603      }
3604   }
3605#endif // defined(VGO_linux)
3606
3607   /* Otherwise handle normally */
3608   *flags |= SfMayBlock;
3609}
3610
3611POST(sys_open)
3612{
3613   vg_assert(SUCCESS);
3614   if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3615      VG_(close)(RES);
3616      SET_STATUS_Failure( VKI_EMFILE );
3617   } else {
3618      if (VG_(clo_track_fds))
3619         ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3620   }
3621}
3622
3623PRE(sys_read)
3624{
3625   *flags |= SfMayBlock;
3626   PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3627   PRE_REG_READ3(ssize_t, "read",
3628                 unsigned int, fd, char *, buf, vki_size_t, count);
3629
3630   if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3631      SET_STATUS_Failure( VKI_EBADF );
3632   else
3633      PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3634}
3635
3636POST(sys_read)
3637{
3638   vg_assert(SUCCESS);
3639   POST_MEM_WRITE( ARG2, RES );
3640}
3641
3642PRE(sys_write)
3643{
3644   Bool ok;
3645   *flags |= SfMayBlock;
3646   PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3647   PRE_REG_READ3(ssize_t, "write",
3648                 unsigned int, fd, const char *, buf, vki_size_t, count);
3649   /* check to see if it is allowed.  If not, try for an exemption from
3650      --sim-hints=enable-outer (used for self hosting). */
3651   ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3652   if (!ok && ARG1 == 2/*stderr*/
3653           && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3654      ok = True;
3655   if (!ok)
3656      SET_STATUS_Failure( VKI_EBADF );
3657   else
3658      PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3659}
3660
3661PRE(sys_creat)
3662{
3663   *flags |= SfMayBlock;
3664   PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3665   PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3666   PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3667}
3668
3669POST(sys_creat)
3670{
3671   vg_assert(SUCCESS);
3672   if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3673      VG_(close)(RES);
3674      SET_STATUS_Failure( VKI_EMFILE );
3675   } else {
3676      if (VG_(clo_track_fds))
3677         ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3678   }
3679}
3680
3681PRE(sys_poll)
3682{
3683   /* struct pollfd {
3684        int fd;           -- file descriptor
3685        short events;     -- requested events
3686        short revents;    -- returned events
3687      };
3688      int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3689   */
3690   UInt i;
3691   struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3692   *flags |= SfMayBlock;
3693   PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3694   PRE_REG_READ3(long, "poll",
3695                 struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3696
3697   for (i = 0; i < ARG2; i++) {
3698      PRE_MEM_READ( "poll(ufds.fd)",
3699                    (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3700      PRE_MEM_READ( "poll(ufds.events)",
3701                    (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3702      PRE_MEM_WRITE( "poll(ufds.reventss)",
3703                     (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3704   }
3705}
3706
3707POST(sys_poll)
3708{
3709   if (RES >= 0) {
3710      UInt i;
3711      struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3712      for (i = 0; i < ARG2; i++)
3713	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3714   }
3715}
3716
3717PRE(sys_readlink)
3718{
3719   Word saved = SYSNO;
3720
3721   PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3722   PRE_REG_READ3(long, "readlink",
3723                 const char *, path, char *, buf, int, bufsiz);
3724   PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3725   PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3726
3727   {
3728#if defined(VGO_linux)
3729      /*
3730       * Handle the case where readlink is looking at /proc/self/exe or
3731       * /proc/<pid>/exe.
3732       */
3733      HChar name[25];
3734      Char* arg1s = (Char*) ARG1;
3735      VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3736      if (ML_(safe_to_deref)(arg1s, 1) &&
3737          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3738         )
3739      {
3740         VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3741         SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3742                                                         ARG2, ARG3));
3743      } else
3744#endif // defined(VGO_linux)
3745      {
3746         /* Normal case */
3747         SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3748      }
3749   }
3750
3751   if (SUCCESS && RES > 0)
3752      POST_MEM_WRITE( ARG2, RES );
3753}
3754
3755PRE(sys_readv)
3756{
3757   Int i;
3758   struct vki_iovec * vec;
3759   *flags |= SfMayBlock;
3760   PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3761   PRE_REG_READ3(ssize_t, "readv",
3762                 unsigned long, fd, const struct iovec *, vector,
3763                 unsigned long, count);
3764   if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3765      SET_STATUS_Failure( VKI_EBADF );
3766   } else {
3767      PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3768
3769      if (ARG2 != 0) {
3770         /* ToDo: don't do any of the following if the vector is invalid */
3771         vec = (struct vki_iovec *)ARG2;
3772         for (i = 0; i < (Int)ARG3; i++)
3773            PRE_MEM_WRITE( "readv(vector[...])",
3774                           (Addr)vec[i].iov_base, vec[i].iov_len );
3775      }
3776   }
3777}
3778
3779POST(sys_readv)
3780{
3781   vg_assert(SUCCESS);
3782   if (RES > 0) {
3783      Int i;
3784      struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3785      Int remains = RES;
3786
3787      /* RES holds the number of bytes read. */
3788      for (i = 0; i < (Int)ARG3; i++) {
3789	 Int nReadThisBuf = vec[i].iov_len;
3790	 if (nReadThisBuf > remains) nReadThisBuf = remains;
3791	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3792	 remains -= nReadThisBuf;
3793	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3794      }
3795   }
3796}
3797
3798PRE(sys_rename)
3799{
3800   PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3801   PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3802   PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3803   PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3804}
3805
3806PRE(sys_rmdir)
3807{
3808   *flags |= SfMayBlock;
3809   PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3810   PRE_REG_READ1(long, "rmdir", const char *, pathname);
3811   PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3812}
3813
3814PRE(sys_select)
3815{
3816   *flags |= SfMayBlock;
3817   PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3818   PRE_REG_READ5(long, "select",
3819                 int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3820                 vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3821   // XXX: this possibly understates how much memory is read.
3822   if (ARG2 != 0)
3823      PRE_MEM_READ( "select(readfds)",
3824		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3825   if (ARG3 != 0)
3826      PRE_MEM_READ( "select(writefds)",
3827		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3828   if (ARG4 != 0)
3829      PRE_MEM_READ( "select(exceptfds)",
3830		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3831   if (ARG5 != 0)
3832      PRE_timeval_READ( "select(timeout)", ARG5 );
3833}
3834
3835PRE(sys_setgid)
3836{
3837   PRINT("sys_setgid ( %ld )", ARG1);
3838   PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
3839}
3840
3841PRE(sys_setsid)
3842{
3843   PRINT("sys_setsid ( )");
3844   PRE_REG_READ0(long, "setsid");
3845}
3846
3847PRE(sys_setgroups)
3848{
3849   PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
3850   PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
3851   if (ARG1 > 0)
3852      PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3853}
3854
3855PRE(sys_setpgid)
3856{
3857   PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
3858   PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
3859}
3860
3861PRE(sys_setregid)
3862{
3863   PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
3864   PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
3865}
3866
3867PRE(sys_setreuid)
3868{
3869   PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
3870   PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
3871}
3872
3873PRE(sys_setrlimit)
3874{
3875   UWord arg1 = ARG1;
3876   PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
3877   PRE_REG_READ2(long, "setrlimit",
3878                 unsigned int, resource, struct rlimit *, rlim);
3879   PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3880
3881#ifdef _RLIMIT_POSIX_FLAG
3882   // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
3883   // Unset it here to make the if statements below work correctly.
3884   arg1 &= ~_RLIMIT_POSIX_FLAG;
3885#endif
3886
3887   if (arg1 == VKI_RLIMIT_NOFILE) {
3888      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
3889          ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
3890         SET_STATUS_Failure( VKI_EPERM );
3891      }
3892      else {
3893         VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
3894         SET_STATUS_Success( 0 );
3895      }
3896   }
3897   else if (arg1 == VKI_RLIMIT_DATA) {
3898      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
3899          ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
3900         SET_STATUS_Failure( VKI_EPERM );
3901      }
3902      else {
3903         VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
3904         SET_STATUS_Success( 0 );
3905      }
3906   }
3907   else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
3908      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
3909          ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
3910         SET_STATUS_Failure( VKI_EPERM );
3911      }
3912      else {
3913         VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
3914         VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
3915         SET_STATUS_Success( 0 );
3916      }
3917   }
3918}
3919
3920PRE(sys_setuid)
3921{
3922   PRINT("sys_setuid ( %ld )", ARG1);
3923   PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
3924}
3925
3926PRE(sys_newstat)
3927{
3928   PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3929   PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
3930   PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
3931   PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
3932}
3933
3934POST(sys_newstat)
3935{
3936   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3937}
3938
3939PRE(sys_statfs)
3940{
3941   PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
3942   PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
3943   PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
3944   PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
3945}
3946POST(sys_statfs)
3947{
3948   POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
3949}
3950
3951PRE(sys_statfs64)
3952{
3953   PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
3954   PRE_REG_READ3(long, "statfs64",
3955                 const char *, path, vki_size_t, size, struct statfs64 *, buf);
3956   PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
3957   PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
3958}
3959POST(sys_statfs64)
3960{
3961   POST_MEM_WRITE( ARG3, ARG2 );
3962}
3963
3964PRE(sys_symlink)
3965{
3966   *flags |= SfMayBlock;
3967   PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3968   PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
3969   PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
3970   PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
3971}
3972
3973PRE(sys_time)
3974{
3975   /* time_t time(time_t *t); */
3976   PRINT("sys_time ( %#lx )",ARG1);
3977   PRE_REG_READ1(long, "time", int *, t);
3978   if (ARG1 != 0) {
3979      PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
3980   }
3981}
3982
3983POST(sys_time)
3984{
3985   if (ARG1 != 0) {
3986      POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
3987   }
3988}
3989
3990PRE(sys_times)
3991{
3992   PRINT("sys_times ( %#lx )", ARG1);
3993   PRE_REG_READ1(long, "times", struct tms *, buf);
3994   if (ARG1 != 0) {
3995      PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
3996   }
3997}
3998
3999POST(sys_times)
4000{
4001   if (ARG1 != 0) {
4002      POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4003   }
4004}
4005
4006PRE(sys_umask)
4007{
4008   PRINT("sys_umask ( %ld )", ARG1);
4009   PRE_REG_READ1(long, "umask", int, mask);
4010}
4011
4012PRE(sys_unlink)
4013{
4014   *flags |= SfMayBlock;
4015   PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4016   PRE_REG_READ1(long, "unlink", const char *, pathname);
4017   PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4018}
4019
4020PRE(sys_newuname)
4021{
4022   PRINT("sys_newuname ( %#lx )", ARG1);
4023   PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4024   PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4025}
4026
4027POST(sys_newuname)
4028{
4029   if (ARG1 != 0) {
4030      POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4031   }
4032}
4033
4034PRE(sys_waitpid)
4035{
4036   *flags |= SfMayBlock;
4037   PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4038   PRE_REG_READ3(long, "waitpid",
4039                 vki_pid_t, pid, unsigned int *, status, int, options);
4040
4041   if (ARG2 != (Addr)NULL)
4042      PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4043}
4044
4045POST(sys_waitpid)
4046{
4047   if (ARG2 != (Addr)NULL)
4048      POST_MEM_WRITE( ARG2, sizeof(int) );
4049}
4050
4051PRE(sys_wait4)
4052{
4053   *flags |= SfMayBlock;
4054   PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4055
4056   PRE_REG_READ4(long, "wait4",
4057                 vki_pid_t, pid, unsigned int *, status, int, options,
4058                 struct rusage *, rusage);
4059   if (ARG2 != (Addr)NULL)
4060      PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4061   if (ARG4 != (Addr)NULL)
4062      PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4063}
4064
4065POST(sys_wait4)
4066{
4067   if (ARG2 != (Addr)NULL)
4068      POST_MEM_WRITE( ARG2, sizeof(int) );
4069   if (ARG4 != (Addr)NULL)
4070      POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4071}
4072
4073PRE(sys_writev)
4074{
4075   Int i;
4076   struct vki_iovec * vec;
4077   *flags |= SfMayBlock;
4078   PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4079   PRE_REG_READ3(ssize_t, "writev",
4080                 unsigned long, fd, const struct iovec *, vector,
4081                 unsigned long, count);
4082   if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4083      SET_STATUS_Failure( VKI_EBADF );
4084   } else {
4085      PRE_MEM_READ( "writev(vector)",
4086		     ARG2, ARG3 * sizeof(struct vki_iovec) );
4087      if (ARG2 != 0) {
4088         /* ToDo: don't do any of the following if the vector is invalid */
4089         vec = (struct vki_iovec *)ARG2;
4090         for (i = 0; i < (Int)ARG3; i++)
4091            PRE_MEM_READ( "writev(vector[...])",
4092                           (Addr)vec[i].iov_base, vec[i].iov_len );
4093      }
4094   }
4095}
4096
4097PRE(sys_utimes)
4098{
4099   PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4100   PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4101   PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4102   if (ARG2 != 0) {
4103      PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4104      PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4105   }
4106}
4107
4108PRE(sys_acct)
4109{
4110   PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4111   PRE_REG_READ1(long, "acct", const char *, filename);
4112   PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4113}
4114
4115PRE(sys_pause)
4116{
4117   *flags |= SfMayBlock;
4118   PRINT("sys_pause ( )");
4119   PRE_REG_READ0(long, "pause");
4120}
4121
4122PRE(sys_sigaltstack)
4123{
4124   PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4125   PRE_REG_READ2(int, "sigaltstack",
4126                 const vki_stack_t *, ss, vki_stack_t *, oss);
4127   if (ARG1 != 0) {
4128      const vki_stack_t *ss = (vki_stack_t *)ARG1;
4129      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4130      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4131      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4132   }
4133   if (ARG2 != 0) {
4134      PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4135   }
4136
4137   SET_STATUS_from_SysRes(
4138      VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4139                              (vki_stack_t*)ARG2)
4140   );
4141}
4142POST(sys_sigaltstack)
4143{
4144   vg_assert(SUCCESS);
4145   if (RES == 0 && ARG2 != 0)
4146      POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4147}
4148
4149#undef PRE
4150#undef POST
4151
4152#endif // defined(VGO_linux) || defined(VGO_darwin)
4153
4154/*--------------------------------------------------------------------*/
4155/*--- end                                                          ---*/
4156/*--------------------------------------------------------------------*/
4157