18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*--------------------------------------------------------------------*/
304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt/*--- Wrappers for generic Unix system calls                       ---*/
48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*---                                            syswrap-generic.c ---*/
5c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt/*--------------------------------------------------------------------*/
6c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt
78d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*
88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   This file is part of Valgrind, a dynamic binary instrumentation
98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   framework.
108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   Copyright (C) 2000-2013 Julian Seward
128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      jseward@acm.org
138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt   This program is free software; you can redistribute it and/or
158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   modify it under the terms of the GNU General Public License as
168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   published by the Free Software Foundation; either version 2 of the
178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   License, or (at your option) any later version.
188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   This program is distributed in the hope that it will be useful, but
208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   WITHOUT ANY WARRANTY; without even the implied warranty of
218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   General Public License for more details.
238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   You should have received a copy of the GNU General Public License
258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   along with this program; if not, write to the Free Software
268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   02111-1307, USA.
288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   The GNU General Public License is contained in the file COPYING.
308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt*/
318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#if defined(VGO_linux) || defined(VGO_darwin)
338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_basics.h"
358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_vki.h"
368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_vkiscnums.h"
378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_threadstate.h"
398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_debuginfo.h"     // VG_(di_notify_*)
408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_aspacemgr.h"
418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_transtab.h"      // VG_(discard_translations)
428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_xarray.h"
438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_debuglog.h"
458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_errormgr.h"
468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_gdbserver.h"     // VG_(gdbserver)
478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_libcbase.h"
4804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_libcassert.h"
4904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_libcfile.h"
5004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_libcprint.h"
5104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_libcproc.h"
5204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_libcsignal.h"
5304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_machine.h"       // VG_(get_SP)
5404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_mallocfree.h"
5504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_options.h"
568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_scheduler.h"
578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_signals.h"
5804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_syscall.h"
608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_syswrap.h"
618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_tooliface.h"
628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "pub_core_ume.h"
631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
641f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#include "priv_types_n_macros.h"
651f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#include "priv_syswrap-generic.h"
661f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
671f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#include "config.h"
681f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
69d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt
708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/* Returns True iff address range is something the client can
71fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt   plausibly mess with: all of it is either already belongs to the
72d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt   client or is free or a reservation. */
738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
748d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtBool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                   const HChar *syscallname)
768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   Bool ret;
788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   if (size == 0)
808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return True;
818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   ret = VG_(am_is_valid_for_client_or_free_or_resvn)
838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt            (start,size,VKI_PROT_NONE);
848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   if (0)
868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt		  syscallname, start, start+size-1, (Int)ret);
88d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt
898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   if (!ret && syscallname != NULL) {
908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                               "to modify addresses %#lx-%#lx\n",
92fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt                               syscallname, start, start+size-1);
93fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt      if (VG_(clo_verbosity) > 1) {
948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      }
968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   }
97d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt
988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   return ret;
99d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt}
100d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt
1018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
102fb79edc9df1f20461e90e478363d207348213d35Dmitry ShmidtBool ML_(client_signal_OK)(Int sigNo)
103d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt{
104d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt   /* signal 0 is OK for kill */
1058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
106
107   //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
108
109   return ret;
110}
111
112
113/* Handy small function to help stop wrappers from segfaulting when
114   presented with bogus client addresses.  Is not used for generating
115   user-visible errors. */
116
117Bool ML_(safe_to_deref) ( void* start, SizeT size )
118{
119   return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
120}
121
122
123/* ---------------------------------------------------------------------
124   Doing mmap, mremap
125   ------------------------------------------------------------------ */
126
127/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
128   munmap, mprotect (and mremap??) work at the page level.  So addresses
129   and lengths must be adjusted for this. */
130
131/* Mash around start and length so that the area exactly covers
132   an integral number of pages.  If we don't do that, memcheck's
133   idea of addressible memory diverges from that of the
134   kernel's, which causes the leak detector to crash. */
135static
136void page_align_addr_and_len( Addr* a, SizeT* len)
137{
138   Addr ra;
139
140   ra = VG_PGROUNDDN(*a);
141   *len = VG_PGROUNDUP(*a + *len) - ra;
142   *a = ra;
143}
144
145static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
146                                UInt flags, Int fd, Off64T offset)
147{
148   Bool d;
149
150   /* 'a' is the return value from a real kernel mmap, hence: */
151   vg_assert(VG_IS_PAGE_ALIGNED(a));
152   /* whereas len is whatever the syscall supplied.  So: */
153   len = VG_PGROUNDUP(len);
154
155   d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
156
157   if (d)
158      VG_(discard_translations)( (Addr64)a, (ULong)len,
159                                 "notify_core_of_mmap" );
160}
161
162static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
163{
164   Bool rr, ww, xx;
165
166   /* 'a' is the return value from a real kernel mmap, hence: */
167   vg_assert(VG_IS_PAGE_ALIGNED(a));
168   /* whereas len is whatever the syscall supplied.  So: */
169   len = VG_PGROUNDUP(len);
170
171   rr = toBool(prot & VKI_PROT_READ);
172   ww = toBool(prot & VKI_PROT_WRITE);
173   xx = toBool(prot & VKI_PROT_EXEC);
174
175   VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
176}
177
178
179/* When a client mmap has been successfully done, this function must
180   be called.  It notifies both aspacem and the tool of the new
181   mapping.
182
183   JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
184   it is called from is POST(sys_io_setup).  In particular,
185   ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
186   client mmap.  But it doesn't call this function; instead it does the
187   relevant notifications itself.  Here, we just pass di_handle=0 to
188   notify_tool_of_mmap as we have no better information.  But really this
189   function should be done away with; problem is I don't understand what
190   POST(sys_io_setup) does or how it works.
191
192   [However, this function is used lots for Darwin, because
193    ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
194 */
195void
196ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
197                                    UInt flags, Int fd, Off64T offset )
198{
199   // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
200   // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
201   // Should it?  --njn
202   notify_core_of_mmap(a, len, prot, flags, fd, offset);
203   notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
204}
205
206void
207ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
208{
209   Bool d;
210
211   page_align_addr_and_len(&a, &len);
212   d = VG_(am_notify_munmap)(a, len);
213   VG_TRACK( die_mem_munmap, a, len );
214   VG_(di_notify_munmap)( a, len );
215   if (d)
216      VG_(discard_translations)( (Addr64)a, (ULong)len,
217                                 "ML_(notify_core_and_tool_of_munmap)" );
218}
219
220void
221ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
222{
223   Bool rr = toBool(prot & VKI_PROT_READ);
224   Bool ww = toBool(prot & VKI_PROT_WRITE);
225   Bool xx = toBool(prot & VKI_PROT_EXEC);
226   Bool d;
227
228   page_align_addr_and_len(&a, &len);
229   d = VG_(am_notify_mprotect)(a, len, prot);
230   VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
231   VG_(di_notify_mprotect)( a, len, prot );
232   if (d)
233      VG_(discard_translations)( (Addr64)a, (ULong)len,
234                                 "ML_(notify_core_and_tool_of_mprotect)" );
235}
236
237
238
239#if HAVE_MREMAP
240/* Expand (or shrink) an existing mapping, potentially moving it at
241   the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
242*/
243static
244SysRes do_mremap( Addr old_addr, SizeT old_len,
245                  Addr new_addr, SizeT new_len,
246                  UWord flags, ThreadId tid )
247{
248#  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
249
250   Bool      ok, d;
251   NSegment const* old_seg;
252   Addr      advised;
253   Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
254   Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
255
256   if (0)
257      VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
258                  old_addr,old_len,new_addr,new_len,
259                  flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
260                  flags & VKI_MREMAP_FIXED ? "FIXED" : "");
261   if (0)
262      VG_(am_show_nsegments)(0, "do_remap: before");
263
264   if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
265      goto eINVAL;
266
267   if (!VG_IS_PAGE_ALIGNED(old_addr))
268      goto eINVAL;
269
270   old_len = VG_PGROUNDUP(old_len);
271   new_len = VG_PGROUNDUP(new_len);
272
273   if (new_len == 0)
274      goto eINVAL;
275
276   /* kernel doesn't reject this, but we do. */
277   if (old_len == 0)
278      goto eINVAL;
279
280   /* reject wraparounds */
281   if (old_addr + old_len < old_addr)
282      goto eINVAL;
283   if (f_fixed == True && new_addr + new_len < new_len)
284      goto eINVAL;
285
286   /* kernel rejects all fixed, no-move requests (which are
287      meaningless). */
288   if (f_fixed == True && f_maymove == False)
289      goto eINVAL;
290
291   /* Stay away from non-client areas. */
292   if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
293      goto eINVAL;
294
295   /* In all remaining cases, if the old range does not fall within a
296      single segment, fail. */
297   old_seg = VG_(am_find_nsegment)( old_addr );
298   if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
299      goto eINVAL;
300   if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
301      goto eINVAL;
302
303   vg_assert(old_len > 0);
304   vg_assert(new_len > 0);
305   vg_assert(VG_IS_PAGE_ALIGNED(old_len));
306   vg_assert(VG_IS_PAGE_ALIGNED(new_len));
307   vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
308
309   /* There are 3 remaining cases:
310
311      * maymove == False
312
313        new space has to be at old address, so:
314            - shrink    -> unmap end
315            - same size -> do nothing
316            - grow      -> if can grow in-place, do so, else fail
317
318      * maymove == True, fixed == False
319
320        new space can be anywhere, so:
321            - shrink    -> unmap end
322            - same size -> do nothing
323            - grow      -> if can grow in-place, do so, else
324                           move to anywhere large enough, else fail
325
326      * maymove == True, fixed == True
327
328        new space must be at new address, so:
329
330            - if new address is not page aligned, fail
331            - if new address range overlaps old one, fail
332            - if new address range cannot be allocated, fail
333            - else move to new address range with new size
334            - else fail
335   */
336
337   if (f_maymove == False) {
338      /* new space has to be at old address */
339      if (new_len < old_len)
340         goto shrink_in_place;
341      if (new_len > old_len)
342         goto grow_in_place_or_fail;
343      goto same_in_place;
344   }
345
346   if (f_maymove == True && f_fixed == False) {
347      /* new space can be anywhere */
348      if (new_len < old_len)
349         goto shrink_in_place;
350      if (new_len > old_len)
351         goto grow_in_place_or_move_anywhere_or_fail;
352      goto same_in_place;
353   }
354
355   if (f_maymove == True && f_fixed == True) {
356      /* new space can only be at the new address */
357      if (!VG_IS_PAGE_ALIGNED(new_addr))
358         goto eINVAL;
359      if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
360         /* no overlap */
361      } else {
362         goto eINVAL;
363      }
364      if (new_addr == 0)
365         goto eINVAL;
366         /* VG_(am_get_advisory_client_simple) interprets zero to mean
367            non-fixed, which is not what we want */
368      advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
369      if (!ok || advised != new_addr)
370         goto eNOMEM;
371      ok = VG_(am_relocate_nooverlap_client)
372              ( &d, old_addr, old_len, new_addr, new_len );
373      if (ok) {
374         VG_TRACK( copy_mem_remap, old_addr, new_addr,
375                                   MIN_SIZET(old_len,new_len) );
376         if (new_len > old_len)
377            VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
378                      old_seg->hasR, old_seg->hasW, old_seg->hasX,
379                      0/*di_handle*/ );
380         VG_TRACK(die_mem_munmap, old_addr, old_len);
381         if (d) {
382            VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
383            VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
384         }
385         return VG_(mk_SysRes_Success)( new_addr );
386      }
387      goto eNOMEM;
388   }
389
390   /* end of the 3 cases */
391   /*NOTREACHED*/ vg_assert(0);
392
393  grow_in_place_or_move_anywhere_or_fail:
394   {
395   /* try growing it in-place */
396   Addr   needA = old_addr + old_len;
397   SSizeT needL = new_len - old_len;
398
399   vg_assert(needL > 0);
400   if (needA == 0)
401      goto eINVAL;
402      /* VG_(am_get_advisory_client_simple) interprets zero to mean
403         non-fixed, which is not what we want */
404   advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
405   if (ok) {
406      /* Fixes bug #129866. */
407      ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
408   }
409   if (ok && advised == needA) {
410      ok = VG_(am_extend_map_client)( &d, old_seg, needL );
411      if (ok) {
412         VG_TRACK( new_mem_mmap, needA, needL,
413                                 old_seg->hasR,
414                                 old_seg->hasW, old_seg->hasX,
415                                 0/*di_handle*/ );
416         if (d)
417            VG_(discard_translations)( needA, needL, "do_remap(3)" );
418         return VG_(mk_SysRes_Success)( old_addr );
419      }
420   }
421
422   /* that failed.  Look elsewhere. */
423   advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
424   if (ok) {
425      Bool oldR = old_seg->hasR;
426      Bool oldW = old_seg->hasW;
427      Bool oldX = old_seg->hasX;
428      /* assert new area does not overlap old */
429      vg_assert(advised+new_len-1 < old_addr
430                || advised > old_addr+old_len-1);
431      ok = VG_(am_relocate_nooverlap_client)
432              ( &d, old_addr, old_len, advised, new_len );
433      if (ok) {
434         VG_TRACK( copy_mem_remap, old_addr, advised,
435                                   MIN_SIZET(old_len,new_len) );
436         if (new_len > old_len)
437            VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
438                      oldR, oldW, oldX, 0/*di_handle*/ );
439         VG_TRACK(die_mem_munmap, old_addr, old_len);
440         if (d) {
441            VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
442            VG_(discard_translations)( advised, new_len, "do_remap(5)" );
443         }
444         return VG_(mk_SysRes_Success)( advised );
445      }
446   }
447   goto eNOMEM;
448   }
449   /*NOTREACHED*/ vg_assert(0);
450
451  grow_in_place_or_fail:
452   {
453   Addr  needA = old_addr + old_len;
454   SizeT needL = new_len - old_len;
455   if (needA == 0)
456      goto eINVAL;
457      /* VG_(am_get_advisory_client_simple) interprets zero to mean
458         non-fixed, which is not what we want */
459   advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
460   if (ok) {
461      /* Fixes bug #129866. */
462      ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
463   }
464   if (!ok || advised != needA)
465      goto eNOMEM;
466   ok = VG_(am_extend_map_client)( &d, old_seg, needL );
467   if (!ok)
468      goto eNOMEM;
469   VG_TRACK( new_mem_mmap, needA, needL,
470                           old_seg->hasR, old_seg->hasW, old_seg->hasX,
471                           0/*di_handle*/ );
472   if (d)
473      VG_(discard_translations)( needA, needL, "do_remap(6)" );
474   return VG_(mk_SysRes_Success)( old_addr );
475   }
476   /*NOTREACHED*/ vg_assert(0);
477
478  shrink_in_place:
479   {
480   SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
481   if (sr_isError(sres))
482      return sres;
483   VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
484   if (d)
485      VG_(discard_translations)( old_addr+new_len, old_len-new_len,
486                                 "do_remap(7)" );
487   return VG_(mk_SysRes_Success)( old_addr );
488   }
489   /*NOTREACHED*/ vg_assert(0);
490
491  same_in_place:
492   return VG_(mk_SysRes_Success)( old_addr );
493   /*NOTREACHED*/ vg_assert(0);
494
495  eINVAL:
496   return VG_(mk_SysRes_Error)( VKI_EINVAL );
497  eNOMEM:
498   return VG_(mk_SysRes_Error)( VKI_ENOMEM );
499
500#  undef MIN_SIZET
501}
502#endif /* HAVE_MREMAP */
503
504
505/* ---------------------------------------------------------------------
506   File-descriptor tracking
507   ------------------------------------------------------------------ */
508
509/* One of these is allocated for each open file descriptor.  */
510typedef struct OpenFd
511{
512   Int fd;                        /* The file descriptor */
513   HChar *pathname;               /* NULL if not a regular file or unknown */
514   ExeContext *where;             /* NULL if inherited from parent */
515   struct OpenFd *next, *prev;
516} OpenFd;
517
518/* List of allocated file descriptors. */
519static OpenFd *allocated_fds = NULL;
520
521/* Count of open file descriptors. */
522static Int fd_count = 0;
523
524
525/* Note the fact that a file descriptor was just closed. */
526static
527void record_fd_close(Int fd)
528{
529   OpenFd *i = allocated_fds;
530
531   if (fd >= VG_(fd_hard_limit))
532      return;			/* Valgrind internal */
533
534   while(i) {
535      if(i->fd == fd) {
536         if(i->prev)
537            i->prev->next = i->next;
538         else
539            allocated_fds = i->next;
540         if(i->next)
541            i->next->prev = i->prev;
542         if(i->pathname)
543            VG_(arena_free) (VG_AR_CORE, i->pathname);
544         VG_(arena_free) (VG_AR_CORE, i);
545         fd_count--;
546         break;
547      }
548      i = i->next;
549   }
550}
551
552/* Note the fact that a file descriptor was just opened.  If the
553   tid is -1, this indicates an inherited fd.  If the pathname is NULL,
554   this either indicates a non-standard file (i.e. a pipe or socket or
555   some such thing) or that we don't know the filename.  If the fd is
556   already open, then we're probably doing a dup2() to an existing fd,
557   so just overwrite the existing one. */
558void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
559{
560   OpenFd *i;
561
562   if (fd >= VG_(fd_hard_limit))
563      return;			/* Valgrind internal */
564
565   /* Check to see if this fd is already open. */
566   i = allocated_fds;
567   while (i) {
568      if (i->fd == fd) {
569         if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
570         break;
571      }
572      i = i->next;
573   }
574
575   /* Not already one: allocate an OpenFd */
576   if (i == NULL) {
577      i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
578
579      i->prev = NULL;
580      i->next = allocated_fds;
581      if(allocated_fds) allocated_fds->prev = i;
582      allocated_fds = i;
583      fd_count++;
584   }
585
586   i->fd = fd;
587   i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
588   i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
589}
590
591// Record opening of an fd, and find its name.
592void ML_(record_fd_open_named)(ThreadId tid, Int fd)
593{
594   static HChar buf[VKI_PATH_MAX];
595   HChar* name;
596   if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
597      name = buf;
598   else
599      name = NULL;
600
601   ML_(record_fd_open_with_given_name)(tid, fd, name);
602}
603
604// Record opening of a nameless fd.
605void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
606{
607   ML_(record_fd_open_with_given_name)(tid, fd, NULL);
608}
609
610static
611HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
612{
613   if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
614      VG_(sprintf)(name, "<unknown>");
615   } else {
616      VG_(sprintf)(name, "%s", sa->sun_path);
617   }
618
619   return name;
620}
621
622static
623HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
624{
625   if (sa == NULL || len == 0) {
626      VG_(sprintf)(name, "<unknown>");
627   } else if (sa->sin_port == 0) {
628      VG_(sprintf)(name, "<unbound>");
629   } else {
630      UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
631      VG_(sprintf)(name, "%u.%u.%u.%u:%u",
632                   (addr>>24) & 0xFF, (addr>>16) & 0xFF,
633                   (addr>>8) & 0xFF, addr & 0xFF,
634                   VG_(ntohs)(sa->sin_port));
635   }
636
637   return name;
638}
639
640static
641void inet6_format(HChar *s, const UChar ip[16])
642{
643   static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
644
645   if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
646      struct vki_in_addr *sin_addr = (struct vki_in_addr *)(ip + 12);
647      UInt addr = VG_(ntohl)(sin_addr->s_addr);
648
649      VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
650                   (addr>>24) & 0xFF, (addr>>16) & 0xFF,
651                   (addr>>8) & 0xFF, addr & 0xFF);
652   } else {
653      Bool compressing = False;
654      Bool compressed = False;
655      Int len = 0;
656      Int i;
657
658      for (i = 0; i < 16; i += 2) {
659         UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
660         if (word == 0 && !compressed) {
661            compressing = True;
662         } else {
663            if (compressing) {
664               compressing = False;
665               compressed = True;
666               s[len++] = ':';
667            }
668            if (i > 0) {
669               s[len++] = ':';
670            }
671            len += VG_(sprintf)(s + len, "%x", word);
672         }
673      }
674
675      if (compressing) {
676         s[len++] = ':';
677         s[len++] = ':';
678      }
679
680      s[len++] = 0;
681   }
682
683   return;
684}
685
686static
687HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
688{
689   if (sa == NULL || len == 0) {
690      VG_(sprintf)(name, "<unknown>");
691   } else if (sa->sin6_port == 0) {
692      VG_(sprintf)(name, "<unbound>");
693   } else {
694      char addr[128];
695      inet6_format(addr, (void *)&(sa->sin6_addr));
696      VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
697   }
698
699   return name;
700}
701
702/*
703 * Try get some details about a socket.
704 */
705static void
706getsockdetails(Int fd)
707{
708   union u {
709      struct vki_sockaddr a;
710      struct vki_sockaddr_in in;
711      struct vki_sockaddr_in6 in6;
712      struct vki_sockaddr_un un;
713   } laddr;
714   Int llen;
715
716   llen = sizeof(laddr);
717   VG_(memset)(&laddr, 0, llen);
718
719   if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
720      switch(laddr.a.sa_family) {
721      case VKI_AF_INET: {
722         static char lname[32];
723         static char pname[32];
724         struct vki_sockaddr_in paddr;
725         Int plen = sizeof(struct vki_sockaddr_in);
726
727         if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
728            VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
729                         inet_to_name(&(laddr.in), llen, lname),
730                         inet_to_name(&paddr, plen, pname));
731         } else {
732            VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
733                         fd, inet_to_name(&(laddr.in), llen, lname));
734         }
735         return;
736         }
737      case VKI_AF_INET6: {
738         static char lname[128];
739         static char pname[128];
740         struct vki_sockaddr_in6 paddr;
741         Int plen = sizeof(struct vki_sockaddr_in6);
742
743         if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
744            VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
745                         inet6_to_name(&(laddr.in6), llen, lname),
746                         inet6_to_name(&paddr, plen, pname));
747         } else {
748            VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
749                         fd, inet6_to_name(&(laddr.in6), llen, lname));
750         }
751         return;
752         }
753      case VKI_AF_UNIX: {
754         static char lname[256];
755         VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
756                      unix_to_name(&(laddr.un), llen, lname));
757         return;
758         }
759      default:
760         VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
761                      laddr.a.sa_family, fd);
762         return;
763      }
764   }
765
766   VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
767}
768
769
770/* Dump out a summary, and a more detailed list, of open file descriptors. */
771void VG_(show_open_fds) (const HChar* when)
772{
773   OpenFd *i = allocated_fds;
774
775   VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
776
777   while (i) {
778      if (i->pathname) {
779         VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
780                      i->pathname);
781      } else {
782         Int val;
783         Int len = sizeof(val);
784
785         if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
786             == -1) {
787            VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
788         } else {
789            getsockdetails(i->fd);
790         }
791      }
792
793      if(i->where) {
794         VG_(pp_ExeContext)(i->where);
795         VG_(message)(Vg_UserMsg, "\n");
796      } else {
797         VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
798         VG_(message)(Vg_UserMsg, "\n");
799      }
800
801      i = i->next;
802   }
803
804   VG_(message)(Vg_UserMsg, "\n");
805}
806
807/* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
808   have /proc support compiled in, or a non-Linux kernel), then we need to
809   find out what file descriptors we inherited from our parent process the
810   hard way - by checking each fd in turn. */
811static
812void init_preopened_fds_without_proc_self_fd(void)
813{
814   struct vki_rlimit lim;
815   UInt count;
816   Int i;
817
818   if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
819      /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
820         an arbitrarily high number.  1024 happens to be the limit in
821         the 2.4 Linux kernels. */
822      count = 1024;
823   } else {
824      count = lim.rlim_cur;
825   }
826
827   for (i = 0; i < count; i++)
828      if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
829         ML_(record_fd_open_named)(-1, i);
830}
831
832/* Initialize the list of open file descriptors with the file descriptors
833   we inherited from out parent process. */
834
835void VG_(init_preopened_fds)(void)
836{
837// DDD: should probably use HAVE_PROC here or similar, instead.
838#if defined(VGO_linux)
839   Int ret;
840   struct vki_dirent d;
841   SysRes f;
842
843   f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
844   if (sr_isError(f)) {
845      init_preopened_fds_without_proc_self_fd();
846      return;
847   }
848
849   while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
850      if (ret == -1)
851         goto out;
852
853      if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
854         HChar* s;
855         Int fno = VG_(strtoll10)(d.d_name, &s);
856         if (*s == '\0') {
857            if (fno != sr_Res(f))
858               if (VG_(clo_track_fds))
859                  ML_(record_fd_open_named)(-1, fno);
860         } else {
861            VG_(message)(Vg_DebugMsg,
862               "Warning: invalid file name in /proc/self/fd: %s\n",
863               d.d_name);
864         }
865      }
866
867      VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
868   }
869
870  out:
871   VG_(close)(sr_Res(f));
872
873#elif defined(VGO_darwin)
874   init_preopened_fds_without_proc_self_fd();
875
876#else
877#  error Unknown OS
878#endif
879}
880
881static
882HChar *strdupcat ( const HChar* cc, const HChar *s1, const HChar *s2,
883                   ArenaId aid )
884{
885   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
886   HChar *result = VG_(arena_malloc) ( aid, cc, len );
887   VG_(strcpy) ( result, s1 );
888   VG_(strcat) ( result, s2 );
889   return result;
890}
891
892static
893void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
894                            const HChar *msg, Addr base, SizeT size )
895{
896   HChar *outmsg = strdupcat ( "di.syswrap.pmrs.1",
897                               "sendmsg", msg, VG_AR_CORE );
898   PRE_MEM_READ( outmsg, base, size );
899   VG_(arena_free) ( VG_AR_CORE, outmsg );
900}
901
902static
903void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
904                             const HChar *msg, Addr base, SizeT size )
905{
906   HChar *outmsg = strdupcat ( "di.syswrap.pmwr.1",
907                               "recvmsg", msg, VG_AR_CORE );
908   if ( read )
909      PRE_MEM_READ( outmsg, base, size );
910   else
911      PRE_MEM_WRITE( outmsg, base, size );
912   VG_(arena_free) ( VG_AR_CORE, outmsg );
913}
914
915static
916void post_mem_write_recvmsg ( ThreadId tid, Bool read,
917                              const HChar *fieldName, Addr base, SizeT size )
918{
919   if ( !read )
920      POST_MEM_WRITE( base, size );
921}
922
923static
924void msghdr_foreachfield (
925        ThreadId tid,
926        const HChar *name,
927        struct vki_msghdr *msg,
928        UInt length,
929        void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
930        Bool recv
931     )
932{
933   HChar *fieldName;
934
935   if ( !msg )
936      return;
937
938   fieldName = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.mfef", VG_(strlen)(name) + 32 );
939
940   VG_(sprintf) ( fieldName, "(%s)", name );
941
942   foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
943   foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
944   foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
945   foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
946   foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
947   foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
948
949   /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
950      the field, but does write to it. */
951   if ( recv )
952      foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
953
954   if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
955        && msg->msg_name ) {
956      VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
957      foreach_func ( tid, False, fieldName,
958                     (Addr)msg->msg_name, msg->msg_namelen );
959   }
960
961   if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
962        && msg->msg_iov ) {
963      struct vki_iovec *iov = msg->msg_iov;
964      UInt i;
965
966      VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
967
968      foreach_func ( tid, True, fieldName,
969                     (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
970
971      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
972         UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
973         VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
974         foreach_func ( tid, False, fieldName,
975                        (Addr)iov->iov_base, iov_len );
976         length = length - iov_len;
977      }
978   }
979
980   if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
981        && msg->msg_control )
982   {
983      VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
984      foreach_func ( tid, False, fieldName,
985                     (Addr)msg->msg_control, msg->msg_controllen );
986   }
987
988   VG_(arena_free) ( VG_AR_CORE, fieldName );
989}
990
991static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
992{
993   struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
994
995   while (cm) {
996      if (cm->cmsg_level == VKI_SOL_SOCKET &&
997          cm->cmsg_type == VKI_SCM_RIGHTS ) {
998         Int *fds = (Int *) VKI_CMSG_DATA(cm);
999         Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
1000                         / sizeof(int);
1001         Int i;
1002
1003         for (i = 0; i < fdc; i++)
1004            if(VG_(clo_track_fds))
1005               // XXX: must we check the range on these fds with
1006               //      ML_(fd_allowed)()?
1007               ML_(record_fd_open_named)(tid, fds[i]);
1008      }
1009
1010      cm = VKI_CMSG_NXTHDR(msg, cm);
1011   }
1012}
1013
1014/* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
1015static
1016void pre_mem_read_sockaddr ( ThreadId tid,
1017                             const HChar *description,
1018                             struct vki_sockaddr *sa, UInt salen )
1019{
1020   HChar *outmsg;
1021   struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
1022   struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
1023   struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
1024#ifdef VKI_AF_BLUETOOTH
1025   struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
1026#endif
1027
1028   /* NULL/zero-length sockaddrs are legal */
1029   if ( sa == NULL || salen == 0 ) return;
1030
1031   outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
1032                                VG_(strlen)( description ) + 30 );
1033
1034   VG_(sprintf) ( outmsg, description, "sa_family" );
1035   PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
1036
1037   switch (sa->sa_family) {
1038
1039      case VKI_AF_UNIX:
1040         VG_(sprintf) ( outmsg, description, "sun_path" );
1041         PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
1042         // GrP fixme max of sun_len-2? what about nul char?
1043         break;
1044
1045      case VKI_AF_INET:
1046         VG_(sprintf) ( outmsg, description, "sin_port" );
1047         PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
1048         VG_(sprintf) ( outmsg, description, "sin_addr" );
1049         PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
1050         break;
1051
1052      case VKI_AF_INET6:
1053         VG_(sprintf) ( outmsg, description, "sin6_port" );
1054         PRE_MEM_READ( outmsg,
1055            (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
1056         VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
1057         PRE_MEM_READ( outmsg,
1058            (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
1059         VG_(sprintf) ( outmsg, description, "sin6_addr" );
1060         PRE_MEM_READ( outmsg,
1061            (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
1062         VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
1063         PRE_MEM_READ( outmsg,
1064            (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
1065         break;
1066
1067#ifdef VKI_AF_BLUETOOTH
1068      case VKI_AF_BLUETOOTH:
1069         VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
1070         PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
1071         VG_(sprintf) ( outmsg, description, "rc_channel" );
1072         PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
1073         break;
1074#endif
1075
1076      default:
1077         VG_(sprintf) ( outmsg, description, "" );
1078         PRE_MEM_READ( outmsg, (Addr) sa, salen );
1079         break;
1080   }
1081
1082   VG_(arena_free) ( VG_AR_CORE, outmsg );
1083}
1084
1085/* Dereference a pointer to a UInt. */
1086static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
1087{
1088   UInt* a_p = (UInt*)a;
1089   PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
1090   if (a_p == NULL)
1091      return 0;
1092   else
1093      return *a_p;
1094}
1095
1096void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1097                                  const HChar* buf_s, const HChar* buflen_s )
1098{
1099   if (VG_(tdict).track_pre_mem_write) {
1100      UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1101      if (buflen_in > 0) {
1102         VG_(tdict).track_pre_mem_write(
1103            Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1104      }
1105   }
1106}
1107
1108void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1109                                   Addr buf_p, Addr buflen_p, const HChar* s )
1110{
1111   if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1112      UInt buflen_out = deref_UInt( tid, buflen_p, s);
1113      if (buflen_out > 0 && buf_p != (Addr)NULL) {
1114         VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1115      }
1116   }
1117}
1118
1119/* ---------------------------------------------------------------------
1120   Data seg end, for brk()
1121   ------------------------------------------------------------------ */
1122
1123/*   +--------+------------+
1124     | anon   |    resvn   |
1125     +--------+------------+
1126
1127     ^     ^  ^
1128     |     |  boundary is page aligned
1129     |     VG_(brk_limit) -- no alignment constraint
1130     VG_(brk_base) -- page aligned -- does not move
1131
1132     Both the anon part and the reservation part are always at least
1133     one page.
1134*/
1135
1136/* Set the new data segment end to NEWBRK.  If this succeeds, return
1137   NEWBRK, else return the current data segment end. */
1138
1139static Addr do_brk ( Addr newbrk )
1140{
1141   NSegment const* aseg;
1142   NSegment const* rseg;
1143   Addr newbrkP;
1144   SizeT delta;
1145   Bool ok;
1146   Bool debug = False;
1147
1148   if (debug)
1149      VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1150		  VG_(brk_base), VG_(brk_limit), newbrk);
1151
1152#  if 0
1153   if (0) show_segments("in_brk");
1154#  endif
1155
1156   if (newbrk < VG_(brk_base))
1157      /* Clearly impossible. */
1158      goto bad;
1159
1160   if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1161      /* shrinking the data segment.  Be lazy and don't munmap the
1162         excess area. */
1163      NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1164      if (seg && seg->hasT)
1165         VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1166                                    "do_brk(shrink)" );
1167      /* Since we're being lazy and not unmapping pages, we have to
1168         zero out the area, so that if the area later comes back into
1169         circulation, it will be filled with zeroes, as if it really
1170         had been unmapped and later remapped.  Be a bit paranoid and
1171         try hard to ensure we're not going to segfault by doing the
1172         write - check both ends of the range are in the same segment
1173         and that segment is writable. */
1174      if (seg) {
1175         /* pre: newbrk < VG_(brk_limit)
1176              => newbrk <= VG_(brk_limit)-1 */
1177         NSegment const * seg2;
1178         vg_assert(newbrk < VG_(brk_limit));
1179         seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1180         if (seg2 && seg == seg2 && seg->hasW)
1181            VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1182      }
1183
1184      VG_(brk_limit) = newbrk;
1185      return newbrk;
1186   }
1187
1188   /* otherwise we're expanding the brk segment. */
1189   if (VG_(brk_limit) > VG_(brk_base))
1190      aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1191   else
1192      aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1193   rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
1194
1195   /* These should be assured by setup_client_dataseg in m_main. */
1196   vg_assert(aseg);
1197   vg_assert(rseg);
1198   vg_assert(aseg->kind == SkAnonC);
1199   vg_assert(rseg->kind == SkResvn);
1200   vg_assert(aseg->end+1 == rseg->start);
1201
1202   vg_assert(newbrk >= VG_(brk_base));
1203   if (newbrk <= rseg->start) {
1204      /* still fits within the anon segment. */
1205      VG_(brk_limit) = newbrk;
1206      return newbrk;
1207   }
1208
1209   if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1210      /* request is too large -- the resvn would fall below 1 page,
1211         which isn't allowed. */
1212      goto bad;
1213   }
1214
1215   newbrkP = VG_PGROUNDUP(newbrk);
1216   vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1217   delta = newbrkP - rseg->start;
1218   vg_assert(delta > 0);
1219   vg_assert(VG_IS_PAGE_ALIGNED(delta));
1220
1221   ok = VG_(am_extend_into_adjacent_reservation_client)( aseg, delta );
1222   if (!ok) goto bad;
1223
1224   VG_(brk_limit) = newbrk;
1225   return newbrk;
1226
1227  bad:
1228   return VG_(brk_limit);
1229}
1230
1231
1232/* ---------------------------------------------------------------------
1233   Vet file descriptors for sanity
1234   ------------------------------------------------------------------ */
1235/*
1236> - what does the "Bool soft" parameter mean?
1237
1238(Tom Hughes, 3 Oct 05):
1239
1240Whether or not to consider a file descriptor invalid if it is above
1241the current soft limit.
1242
1243Basically if we are testing whether a newly created file descriptor is
1244valid (in a post handler) then we set soft to true, and if we are
1245testing whether a file descriptor that is about to be used (in a pre
1246handler) is valid [viz, an already-existing fd] then we set it to false.
1247
1248The point is that if the (virtual) soft limit is lowered then any
1249existing descriptors can still be read/written/closed etc (so long as
1250they are below the valgrind reserved descriptors) but no new
1251descriptors can be created above the new soft limit.
1252
1253(jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1254*/
1255
1256/* Return true if we're allowed to use or create this fd */
1257Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
1258                     Bool isNewFd)
1259{
1260   Bool allowed = True;
1261
1262   /* hard limits always apply */
1263   if (fd < 0 || fd >= VG_(fd_hard_limit))
1264      allowed = False;
1265
1266   /* hijacking the output fds is never allowed */
1267   if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1268      allowed = False;
1269
1270   /* if creating a new fd (rather than using an existing one), the
1271      soft limit must also be observed */
1272   if (isNewFd && fd >= VG_(fd_soft_limit))
1273      allowed = False;
1274
1275   /* this looks like it ought to be included, but causes problems: */
1276   /*
1277   if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1278      allowed = False;
1279   */
1280   /* The difficulty is as follows: consider a program P which expects
1281      to be able to mess with (redirect) its own stderr (fd 2).
1282      Usually to deal with P we would issue command line flags to send
1283      logging somewhere other than stderr, so as not to disrupt P.
1284      The problem is that -d unilaterally hijacks stderr with no
1285      consultation with P.  And so, if this check is enabled, P will
1286      work OK normally but fail if -d is issued.
1287
1288      Basically -d is a hack and you take your chances when using it.
1289      It's very useful for low level debugging -- particularly at
1290      startup -- and having its presence change the behaviour of the
1291      client is exactly what we don't want.  */
1292
1293   /* croak? */
1294   if ((!allowed) && VG_(showing_core_errors)() ) {
1295      VG_(message)(Vg_UserMsg,
1296         "Warning: invalid file descriptor %d in syscall %s()\n",
1297         fd, syscallname);
1298      if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1299	 VG_(message)(Vg_UserMsg,
1300            "   Use --log-fd=<number> to select an alternative log fd.\n");
1301      if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1302	 VG_(message)(Vg_UserMsg,
1303            "   Use --xml-fd=<number> to select an alternative XML "
1304            "output fd.\n");
1305      // DDD: consider always printing this stack trace, it's useful.
1306      // Also consider also making this a proper core error, ie.
1307      // suppressible and all that.
1308      if (VG_(clo_verbosity) > 1) {
1309         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1310      }
1311   }
1312
1313   return allowed;
1314}
1315
1316
1317/* ---------------------------------------------------------------------
1318   Deal with a bunch of socket-related syscalls
1319   ------------------------------------------------------------------ */
1320
1321/* ------ */
1322
1323void
1324ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1325                                  UWord arg0, UWord arg1,
1326                                  UWord arg2, UWord arg3 )
1327{
1328   /* int socketpair(int d, int type, int protocol, int sv[2]); */
1329   PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1330                  arg3, 2*sizeof(int) );
1331}
1332
1333SysRes
1334ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1335                                   SysRes res,
1336                                   UWord arg0, UWord arg1,
1337                                   UWord arg2, UWord arg3 )
1338{
1339   SysRes r = res;
1340   Int fd1 = ((Int*)arg3)[0];
1341   Int fd2 = ((Int*)arg3)[1];
1342   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1343   POST_MEM_WRITE( arg3, 2*sizeof(int) );
1344   if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1345       !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1346      VG_(close)(fd1);
1347      VG_(close)(fd2);
1348      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1349   } else {
1350      POST_MEM_WRITE( arg3, 2*sizeof(int) );
1351      if (VG_(clo_track_fds)) {
1352         ML_(record_fd_open_nameless)(tid, fd1);
1353         ML_(record_fd_open_nameless)(tid, fd2);
1354      }
1355   }
1356   return r;
1357}
1358
1359/* ------ */
1360
1361SysRes
1362ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1363{
1364   SysRes r = res;
1365   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1366   if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1367      VG_(close)(sr_Res(res));
1368      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1369   } else {
1370      if (VG_(clo_track_fds))
1371         ML_(record_fd_open_nameless)(tid, sr_Res(res));
1372   }
1373   return r;
1374}
1375
1376/* ------ */
1377
1378void
1379ML_(generic_PRE_sys_bind) ( ThreadId tid,
1380                            UWord arg0, UWord arg1, UWord arg2 )
1381{
1382   /* int bind(int sockfd, struct sockaddr *my_addr,
1383               int addrlen); */
1384   pre_mem_read_sockaddr(
1385      tid, "socketcall.bind(my_addr.%s)",
1386      (struct vki_sockaddr *) arg1, arg2
1387   );
1388}
1389
1390/* ------ */
1391
1392void
1393ML_(generic_PRE_sys_accept) ( ThreadId tid,
1394                              UWord arg0, UWord arg1, UWord arg2 )
1395{
1396   /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1397   Addr addr_p     = arg1;
1398   Addr addrlen_p  = arg2;
1399   if (addr_p != (Addr)NULL)
1400      ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1401                                   "socketcall.accept(addr)",
1402                                   "socketcall.accept(addrlen_in)" );
1403}
1404
1405SysRes
1406ML_(generic_POST_sys_accept) ( ThreadId tid,
1407                               SysRes res,
1408                               UWord arg0, UWord arg1, UWord arg2 )
1409{
1410   SysRes r = res;
1411   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1412   if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1413      VG_(close)(sr_Res(res));
1414      r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1415   } else {
1416      Addr addr_p     = arg1;
1417      Addr addrlen_p  = arg2;
1418      if (addr_p != (Addr)NULL)
1419         ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1420                                       "socketcall.accept(addrlen_out)" );
1421      if (VG_(clo_track_fds))
1422          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1423   }
1424   return r;
1425}
1426
1427/* ------ */
1428
1429void
1430ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1431                              UWord arg0, UWord arg1, UWord arg2,
1432                              UWord arg3, UWord arg4, UWord arg5 )
1433{
1434   /* int sendto(int s, const void *msg, int len,
1435                 unsigned int flags,
1436                 const struct sockaddr *to, int tolen); */
1437   PRE_MEM_READ( "socketcall.sendto(msg)",
1438                 arg1, /* msg */
1439                 arg2  /* len */ );
1440   pre_mem_read_sockaddr(
1441      tid, "socketcall.sendto(to.%s)",
1442      (struct vki_sockaddr *) arg4, arg5
1443   );
1444}
1445
1446/* ------ */
1447
1448void
1449ML_(generic_PRE_sys_send) ( ThreadId tid,
1450                            UWord arg0, UWord arg1, UWord arg2 )
1451{
1452   /* int send(int s, const void *msg, size_t len, int flags); */
1453   PRE_MEM_READ( "socketcall.send(msg)",
1454                  arg1, /* msg */
1455                  arg2  /* len */ );
1456
1457}
1458
1459/* ------ */
1460
1461void
1462ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1463                                UWord arg0, UWord arg1, UWord arg2,
1464                                UWord arg3, UWord arg4, UWord arg5 )
1465{
1466   /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1467                   struct sockaddr *from, int *fromlen); */
1468   Addr buf_p      = arg1;
1469   Int  len        = arg2;
1470   Addr from_p     = arg4;
1471   Addr fromlen_p  = arg5;
1472   PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1473   if (from_p != (Addr)NULL)
1474      ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1475                                   "socketcall.recvfrom(from)",
1476                                   "socketcall.recvfrom(fromlen_in)" );
1477}
1478
1479void
1480ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1481                                 SysRes res,
1482                                 UWord arg0, UWord arg1, UWord arg2,
1483                                 UWord arg3, UWord arg4, UWord arg5 )
1484{
1485   Addr buf_p      = arg1;
1486   Int  len        = arg2;
1487   Addr from_p     = arg4;
1488   Addr fromlen_p  = arg5;
1489
1490   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1491   if (from_p != (Addr)NULL)
1492      ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1493                                    "socketcall.recvfrom(fromlen_out)" );
1494   POST_MEM_WRITE( buf_p, len );
1495}
1496
1497/* ------ */
1498
1499void
1500ML_(generic_PRE_sys_recv) ( ThreadId tid,
1501                            UWord arg0, UWord arg1, UWord arg2 )
1502{
1503   /* int recv(int s, void *buf, int len, unsigned int flags); */
1504   /* man 2 recv says:
1505      The  recv call is normally used only on a connected socket
1506      (see connect(2)) and is identical to recvfrom with a  NULL
1507      from parameter.
1508   */
1509   PRE_MEM_WRITE( "socketcall.recv(buf)",
1510                  arg1, /* buf */
1511                  arg2  /* len */ );
1512}
1513
1514void
1515ML_(generic_POST_sys_recv) ( ThreadId tid,
1516                             UWord res,
1517                             UWord arg0, UWord arg1, UWord arg2 )
1518{
1519   if (res >= 0 && arg1 != 0) {
1520      POST_MEM_WRITE( arg1, /* buf */
1521                      arg2  /* len */ );
1522   }
1523}
1524
1525/* ------ */
1526
1527void
1528ML_(generic_PRE_sys_connect) ( ThreadId tid,
1529                               UWord arg0, UWord arg1, UWord arg2 )
1530{
1531   /* int connect(int sockfd,
1532                  struct sockaddr *serv_addr, int addrlen ); */
1533   pre_mem_read_sockaddr( tid,
1534                          "socketcall.connect(serv_addr.%s)",
1535                          (struct vki_sockaddr *) arg1, arg2);
1536}
1537
1538/* ------ */
1539
1540void
1541ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1542                                  UWord arg0, UWord arg1, UWord arg2,
1543                                  UWord arg3, UWord arg4 )
1544{
1545   /* int setsockopt(int s, int level, int optname,
1546                     const void *optval, int optlen); */
1547   PRE_MEM_READ( "socketcall.setsockopt(optval)",
1548                 arg3, /* optval */
1549                 arg4  /* optlen */ );
1550}
1551
1552/* ------ */
1553
1554void
1555ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1556                                   UWord arg0, UWord arg1, UWord arg2 )
1557{
1558   /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1559   Addr name_p     = arg1;
1560   Addr namelen_p  = arg2;
1561   /* Nb: name_p cannot be NULL */
1562   ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1563                                "socketcall.getsockname(name)",
1564                                "socketcall.getsockname(namelen_in)" );
1565}
1566
1567void
1568ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1569                                    SysRes res,
1570                                    UWord arg0, UWord arg1, UWord arg2 )
1571{
1572   Addr name_p     = arg1;
1573   Addr namelen_p  = arg2;
1574   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1575   ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1576                                 "socketcall.getsockname(namelen_out)" );
1577}
1578
1579/* ------ */
1580
1581void
1582ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1583                                   UWord arg0, UWord arg1, UWord arg2 )
1584{
1585   /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1586   Addr name_p     = arg1;
1587   Addr namelen_p  = arg2;
1588   /* Nb: name_p cannot be NULL */
1589   ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1590                                "socketcall.getpeername(name)",
1591                                "socketcall.getpeername(namelen_in)" );
1592}
1593
1594void
1595ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1596                                    SysRes res,
1597                                    UWord arg0, UWord arg1, UWord arg2 )
1598{
1599   Addr name_p     = arg1;
1600   Addr namelen_p  = arg2;
1601   vg_assert(!sr_isError(res)); /* guaranteed by caller */
1602   ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1603                                 "socketcall.getpeername(namelen_out)" );
1604}
1605
1606/* ------ */
1607
1608void
1609ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
1610                               struct vki_msghdr *msg )
1611{
1612   msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
1613}
1614
1615/* ------ */
1616
1617void
1618ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
1619                               struct vki_msghdr *msg )
1620{
1621   msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
1622}
1623
1624void
1625ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
1626                                struct vki_msghdr *msg, UInt length )
1627{
1628   msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
1629   check_cmsg_for_fds( tid, msg );
1630}
1631
1632
1633/* ---------------------------------------------------------------------
1634   Deal with a bunch of IPC related syscalls
1635   ------------------------------------------------------------------ */
1636
1637/* ------ */
1638
1639void
1640ML_(generic_PRE_sys_semop) ( ThreadId tid,
1641                             UWord arg0, UWord arg1, UWord arg2 )
1642{
1643   /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1644   PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1645}
1646
1647/* ------ */
1648
1649void
1650ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1651                                  UWord arg0, UWord arg1,
1652                                  UWord arg2, UWord arg3 )
1653{
1654   /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1655                     struct timespec *timeout); */
1656   PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1657   if (arg3 != 0)
1658      PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1659}
1660
1661/* ------ */
1662
1663static
1664UInt get_sem_count( Int semid )
1665{
1666   struct vki_semid_ds buf;
1667   union vki_semun arg;
1668   SysRes res;
1669
1670   /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1671      (experimental) otherwise complains that the use in the return
1672      statement below is uninitialised. */
1673   buf.sem_nsems = 0;
1674
1675   arg.buf = &buf;
1676
1677#  ifdef __NR_semctl
1678   res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1679#  else
1680   res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1681                          VKI_IPC_STAT, (UWord)&arg);
1682#  endif
1683   if (sr_isError(res))
1684      return 0;
1685
1686   return buf.sem_nsems;
1687}
1688
1689void
1690ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1691                              UWord arg0, UWord arg1,
1692                              UWord arg2, UWord arg3 )
1693{
1694   /* int semctl(int semid, int semnum, int cmd, ...); */
1695   union vki_semun arg = *(union vki_semun *)&arg3;
1696   UInt nsems;
1697   switch (arg2 /* cmd */) {
1698#if defined(VKI_IPC_INFO)
1699   case VKI_IPC_INFO:
1700   case VKI_SEM_INFO:
1701   case VKI_IPC_INFO|VKI_IPC_64:
1702   case VKI_SEM_INFO|VKI_IPC_64:
1703      PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1704                     (Addr)arg.buf, sizeof(struct vki_seminfo) );
1705      break;
1706#endif
1707
1708   case VKI_IPC_STAT:
1709#if defined(VKI_SEM_STAT)
1710   case VKI_SEM_STAT:
1711#endif
1712      PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1713                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1714      break;
1715
1716#if defined(VKI_IPC_64)
1717   case VKI_IPC_STAT|VKI_IPC_64:
1718#if defined(VKI_SEM_STAT)
1719   case VKI_SEM_STAT|VKI_IPC_64:
1720#endif
1721      PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1722                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1723      break;
1724#endif
1725
1726   case VKI_IPC_SET:
1727      PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1728                    (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1729      break;
1730
1731#if defined(VKI_IPC_64)
1732   case VKI_IPC_SET|VKI_IPC_64:
1733      PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1734                    (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1735      break;
1736#endif
1737
1738   case VKI_GETALL:
1739#if defined(VKI_IPC_64)
1740   case VKI_GETALL|VKI_IPC_64:
1741#endif
1742      nsems = get_sem_count( arg0 );
1743      PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1744                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1745      break;
1746
1747   case VKI_SETALL:
1748#if defined(VKI_IPC_64)
1749   case VKI_SETALL|VKI_IPC_64:
1750#endif
1751      nsems = get_sem_count( arg0 );
1752      PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1753                    (Addr)arg.array, sizeof(unsigned short) * nsems );
1754      break;
1755   }
1756}
1757
1758void
1759ML_(generic_POST_sys_semctl) ( ThreadId tid,
1760                               UWord res,
1761                               UWord arg0, UWord arg1,
1762                               UWord arg2, UWord arg3 )
1763{
1764   union vki_semun arg = *(union vki_semun *)&arg3;
1765   UInt nsems;
1766   switch (arg2 /* cmd */) {
1767#if defined(VKI_IPC_INFO)
1768   case VKI_IPC_INFO:
1769   case VKI_SEM_INFO:
1770   case VKI_IPC_INFO|VKI_IPC_64:
1771   case VKI_SEM_INFO|VKI_IPC_64:
1772      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1773      break;
1774#endif
1775
1776   case VKI_IPC_STAT:
1777#if defined(VKI_SEM_STAT)
1778   case VKI_SEM_STAT:
1779#endif
1780      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1781      break;
1782
1783#if defined(VKI_IPC_64)
1784   case VKI_IPC_STAT|VKI_IPC_64:
1785   case VKI_SEM_STAT|VKI_IPC_64:
1786      POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1787      break;
1788#endif
1789
1790   case VKI_GETALL:
1791#if defined(VKI_IPC_64)
1792   case VKI_GETALL|VKI_IPC_64:
1793#endif
1794      nsems = get_sem_count( arg0 );
1795      POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1796      break;
1797   }
1798}
1799
1800/* ------ */
1801
1802/* ------ */
1803
1804static
1805SizeT get_shm_size ( Int shmid )
1806{
1807#ifdef __NR_shmctl
1808#  ifdef VKI_IPC_64
1809   struct vki_shmid64_ds buf;
1810#    ifdef VGP_amd64_linux
1811     /* See bug 222545 comment 7 */
1812     SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1813                                     VKI_IPC_STAT, (UWord)&buf);
1814#    else
1815     SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1816                                     VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1817#    endif
1818#  else /* !def VKI_IPC_64 */
1819   struct vki_shmid_ds buf;
1820   SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1821#  endif /* def VKI_IPC_64 */
1822#else
1823   struct vki_shmid_ds buf;
1824   SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1825                                 VKI_IPC_STAT, 0, (UWord)&buf);
1826#endif
1827   if (sr_isError(__res))
1828      return 0;
1829
1830   return (SizeT) buf.shm_segsz;
1831}
1832
1833UWord
1834ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1835                             UWord arg0, UWord arg1, UWord arg2 )
1836{
1837   /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1838   SizeT  segmentSize = get_shm_size ( arg0 );
1839   UWord tmp;
1840   Bool  ok;
1841   if (arg1 == 0) {
1842      /* arm-linux only: work around the fact that
1843         VG_(am_get_advisory_client_simple) produces something that is
1844         VKI_PAGE_SIZE aligned, whereas what we want is something
1845         VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1846         increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1847         then round the result up to the next VKI_SHMLBA boundary.
1848         See bug 222545 comment 15.  So far, arm-linux is the only
1849         platform where this is known to be necessary. */
1850      vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1851      if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1852         segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1853      }
1854      tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1855      if (ok) {
1856         if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1857            arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1858         } else {
1859            arg1 = tmp;
1860         }
1861      }
1862   }
1863   else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1864      arg1 = 0;
1865   return arg1;
1866}
1867
1868void
1869ML_(generic_POST_sys_shmat) ( ThreadId tid,
1870                              UWord res,
1871                              UWord arg0, UWord arg1, UWord arg2 )
1872{
1873   SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1874   if ( segmentSize > 0 ) {
1875      UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1876      Bool d;
1877
1878      if (arg2 & VKI_SHM_RDONLY)
1879         prot &= ~VKI_PROT_WRITE;
1880      /* It isn't exactly correct to pass 0 for the fd and offset
1881         here.  The kernel seems to think the corresponding section
1882         does have dev/ino numbers:
1883
1884         04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
1885
1886         However there is no obvious way to find them.  In order to
1887         cope with the discrepancy, aspacem's sync checker omits the
1888         dev/ino correspondence check in cases where V does not know
1889         the dev/ino. */
1890      d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1891
1892      /* we don't distinguish whether it's read-only or
1893       * read-write -- it doesn't matter really. */
1894      VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1895                              0/*di_handle*/ );
1896      if (d)
1897         VG_(discard_translations)( (Addr64)res,
1898                                    (ULong)VG_PGROUNDUP(segmentSize),
1899                                    "ML_(generic_POST_sys_shmat)" );
1900   }
1901}
1902
1903/* ------ */
1904
1905Bool
1906ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1907{
1908   /* int shmdt(const void *shmaddr); */
1909   return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1910}
1911
1912void
1913ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1914{
1915   NSegment const* s = VG_(am_find_nsegment)(arg0);
1916
1917   if (s != NULL) {
1918      Addr  s_start = s->start;
1919      SizeT s_len   = s->end+1 - s->start;
1920      Bool  d;
1921
1922      vg_assert(s->kind == SkShmC);
1923      vg_assert(s->start == arg0);
1924
1925      d = VG_(am_notify_munmap)(s_start, s_len);
1926      s = NULL; /* s is now invalid */
1927      VG_TRACK( die_mem_munmap, s_start, s_len );
1928      if (d)
1929         VG_(discard_translations)( (Addr64)s_start,
1930                                    (ULong)s_len,
1931                                    "ML_(generic_POST_sys_shmdt)" );
1932   }
1933}
1934/* ------ */
1935
1936void
1937ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1938                              UWord arg0, UWord arg1, UWord arg2 )
1939{
1940   /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1941   switch (arg1 /* cmd */) {
1942#if defined(VKI_IPC_INFO)
1943   case VKI_IPC_INFO:
1944      PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1945                     arg2, sizeof(struct vki_shminfo) );
1946      break;
1947#if defined(VKI_IPC_64)
1948   case VKI_IPC_INFO|VKI_IPC_64:
1949      PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1950                     arg2, sizeof(struct vki_shminfo64) );
1951      break;
1952#endif
1953#endif
1954
1955#if defined(VKI_SHM_INFO)
1956   case VKI_SHM_INFO:
1957#if defined(VKI_IPC_64)
1958   case VKI_SHM_INFO|VKI_IPC_64:
1959#endif
1960      PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1961                     arg2, sizeof(struct vki_shm_info) );
1962      break;
1963#endif
1964
1965   case VKI_IPC_STAT:
1966#if defined(VKI_SHM_STAT)
1967   case VKI_SHM_STAT:
1968#endif
1969      PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1970                     arg2, sizeof(struct vki_shmid_ds) );
1971      break;
1972
1973#if defined(VKI_IPC_64)
1974   case VKI_IPC_STAT|VKI_IPC_64:
1975   case VKI_SHM_STAT|VKI_IPC_64:
1976      PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1977                     arg2, sizeof(struct vki_shmid64_ds) );
1978      break;
1979#endif
1980
1981   case VKI_IPC_SET:
1982      PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1983                    arg2, sizeof(struct vki_shmid_ds) );
1984      break;
1985
1986#if defined(VKI_IPC_64)
1987   case VKI_IPC_SET|VKI_IPC_64:
1988      PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1989                    arg2, sizeof(struct vki_shmid64_ds) );
1990      break;
1991#endif
1992   }
1993}
1994
1995void
1996ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1997                               UWord res,
1998                               UWord arg0, UWord arg1, UWord arg2 )
1999{
2000   switch (arg1 /* cmd */) {
2001#if defined(VKI_IPC_INFO)
2002   case VKI_IPC_INFO:
2003      POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
2004      break;
2005   case VKI_IPC_INFO|VKI_IPC_64:
2006      POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
2007      break;
2008#endif
2009
2010#if defined(VKI_SHM_INFO)
2011   case VKI_SHM_INFO:
2012   case VKI_SHM_INFO|VKI_IPC_64:
2013      POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
2014      break;
2015#endif
2016
2017   case VKI_IPC_STAT:
2018#if defined(VKI_SHM_STAT)
2019   case VKI_SHM_STAT:
2020#endif
2021      POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2022      break;
2023
2024#if defined(VKI_IPC_64)
2025   case VKI_IPC_STAT|VKI_IPC_64:
2026   case VKI_SHM_STAT|VKI_IPC_64:
2027      POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
2028      break;
2029#endif
2030
2031
2032   }
2033}
2034
2035/* ---------------------------------------------------------------------
2036   Generic handler for mmap
2037   ------------------------------------------------------------------ */
2038
2039/*
2040 * Although mmap is specified by POSIX and the argument are generally
2041 * consistent across platforms the precise details of the low level
2042 * argument passing conventions differ. For example:
2043 *
2044 * - On x86-linux there is mmap (aka old_mmap) which takes the
2045 *   arguments in a memory block and the offset in bytes; and
2046 *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2047 *   way and the offset in pages.
2048 *
2049 * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
2050 *   arguments in the normal way and the offset in bytes; and
2051 *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2052 *   way and the offset in pages.
2053 *
2054 * - On amd64-linux everything is simple and there is just the one
2055 *   call, mmap (aka sys_mmap)  which takes the arguments in the
2056 *   normal way and the offset in bytes.
2057 *
2058 * - On s390x-linux there is mmap (aka old_mmap) which takes the
2059 *   arguments in a memory block and the offset in bytes. mmap2
2060 *   is also available (but not exported via unistd.h) with
2061 *   arguments in a memory block and the offset in pages.
2062 *
2063 * To cope with all this we provide a generic handler function here
2064 * and then each platform implements one or more system call handlers
2065 * which call this generic routine after extracting and normalising
2066 * the arguments.
2067 */
2068
2069SysRes
2070ML_(generic_PRE_sys_mmap) ( ThreadId tid,
2071                            UWord arg1, UWord arg2, UWord arg3,
2072                            UWord arg4, UWord arg5, Off64T arg6 )
2073{
2074   Addr       advised;
2075   SysRes     sres;
2076   MapRequest mreq;
2077   Bool       mreq_ok;
2078
2079#  if defined(VGO_darwin)
2080   // Nb: we can't use this on Darwin, it has races:
2081   // * needs to RETRY if advisory succeeds but map fails
2082   //   (could have been some other thread in a nonblocking call)
2083   // * needs to not use fixed-position mmap() on Darwin
2084   //   (mmap will cheerfully smash whatever's already there, which might
2085   //   be a new mapping from some other thread in a nonblocking call)
2086   VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2087#  endif
2088
2089   if (arg2 == 0) {
2090      /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2091         shall be established. */
2092      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2093   }
2094
2095   if (!VG_IS_PAGE_ALIGNED(arg1)) {
2096      /* zap any misaligned addresses. */
2097      /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2098         to fail.   Here, we catch them all. */
2099      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2100   }
2101
2102   if (!VG_IS_PAGE_ALIGNED(arg6)) {
2103      /* zap any misaligned offsets. */
2104      /* SuSV3 says: The off argument is constrained to be aligned and
2105         sized according to the value returned by sysconf() when
2106         passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2107      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2108   }
2109
2110#  if defined(VKI_MAP_32BIT)
2111   /* We can't support MAP_32BIT (at least, not without significant
2112      complication), and it's royally unportable, so if the client
2113      asks for it, just fail it. */
2114   if (arg4 & VKI_MAP_32BIT) {
2115      return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2116   }
2117#  endif
2118
2119   /* Figure out what kind of allocation constraints there are
2120      (fixed/hint/any), and ask aspacem what we should do. */
2121   mreq.start = arg1;
2122   mreq.len   = arg2;
2123   if (arg4 & VKI_MAP_FIXED) {
2124      mreq.rkind = MFixed;
2125   } else
2126   if (arg1 != 0) {
2127      mreq.rkind = MHint;
2128   } else {
2129      mreq.rkind = MAny;
2130   }
2131
2132   /* Enquire ... */
2133   advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2134   if (!mreq_ok) {
2135      /* Our request was bounced, so we'd better fail. */
2136      return VG_(mk_SysRes_Error)( VKI_EINVAL );
2137   }
2138
2139   /* Otherwise we're OK (so far).  Install aspacem's choice of
2140      address, and let the mmap go through.  */
2141   sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2142                                    arg4 | VKI_MAP_FIXED,
2143                                    arg5, arg6);
2144
2145   /* A refinement: it may be that the kernel refused aspacem's choice
2146      of address.  If we were originally asked for a hinted mapping,
2147      there is still a last chance: try again at any address.
2148      Hence: */
2149   if (mreq.rkind == MHint && sr_isError(sres)) {
2150      mreq.start = 0;
2151      mreq.len   = arg2;
2152      mreq.rkind = MAny;
2153      advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2154      if (!mreq_ok) {
2155         /* Our request was bounced, so we'd better fail. */
2156         return VG_(mk_SysRes_Error)( VKI_EINVAL );
2157      }
2158      /* and try again with the kernel */
2159      sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2160                                       arg4 | VKI_MAP_FIXED,
2161                                       arg5, arg6);
2162   }
2163
2164   if (!sr_isError(sres)) {
2165      ULong di_handle;
2166      /* Notify aspacem. */
2167      notify_core_of_mmap(
2168         (Addr)sr_Res(sres), /* addr kernel actually assigned */
2169         arg2, /* length */
2170         arg3, /* prot */
2171         arg4, /* the original flags value */
2172         arg5, /* fd */
2173         arg6  /* offset */
2174      );
2175      /* Load symbols? */
2176      di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2177                                       False/*allow_SkFileV*/, (Int)arg5 );
2178      /* Notify the tool. */
2179      notify_tool_of_mmap(
2180         (Addr)sr_Res(sres), /* addr kernel actually assigned */
2181         arg2, /* length */
2182         arg3, /* prot */
2183         di_handle /* so the tool can refer to the read debuginfo later,
2184                      if it wants. */
2185      );
2186   }
2187
2188   /* Stay sane */
2189   if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2190      vg_assert(sr_Res(sres) == arg1);
2191
2192   return sres;
2193}
2194
2195
2196/* ---------------------------------------------------------------------
2197   The Main Entertainment ... syscall wrappers
2198   ------------------------------------------------------------------ */
2199
2200/* Note: the PRE() and POST() wrappers are for the actual functions
2201   implementing the system calls in the OS kernel.  These mostly have
2202   names like sys_write();  a few have names like old_mmap().  See the
2203   comment for ML_(syscall_table)[] for important info about the __NR_foo
2204   constants and their relationship to the sys_foo() functions.
2205
2206   Some notes about names used for syscalls and args:
2207   - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2208     ambiguity.
2209
2210   - For error messages, we generally use a somewhat generic name
2211     for the syscall (eg. "write" rather than "sys_write").  This should be
2212     good enough for the average user to understand what is happening,
2213     without confusing them with names like "sys_write".
2214
2215   - Also, for error messages the arg names are mostly taken from the man
2216     pages (even though many of those man pages are really for glibc
2217     functions of the same name), rather than from the OS kernel source,
2218     for the same reason -- a user presented with a "bogus foo(bar)" arg
2219     will most likely look at the "foo" man page to see which is the "bar"
2220     arg.
2221
2222   Note that we use our own vki_* types.  The one exception is in
2223   PRE_REG_READn calls, where pointer types haven't been changed, because
2224   they don't need to be -- eg. for "foo*" to be used, the type foo need not
2225   be visible.
2226
2227   XXX: some of these are arch-specific, and should be factored out.
2228*/
2229
2230#define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2231#define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2232
2233// Macros to support 64-bit syscall args split into two 32 bit values
2234#if defined(VG_LITTLEENDIAN)
2235#define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2236#define MERGE64_FIRST(name) name##_low
2237#define MERGE64_SECOND(name) name##_high
2238#elif defined(VG_BIGENDIAN)
2239#define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2240#define MERGE64_FIRST(name) name##_high
2241#define MERGE64_SECOND(name) name##_low
2242#else
2243#error Unknown endianness
2244#endif
2245
2246PRE(sys_exit)
2247{
2248   ThreadState* tst;
2249   /* simple; just make this thread exit */
2250   PRINT("exit( %ld )", ARG1);
2251   PRE_REG_READ1(void, "exit", int, status);
2252   tst = VG_(get_ThreadState)(tid);
2253   /* Set the thread's status to be exiting, then claim that the
2254      syscall succeeded. */
2255   tst->exitreason = VgSrc_ExitThread;
2256   tst->os_state.exitcode = ARG1;
2257   SET_STATUS_Success(0);
2258}
2259
2260PRE(sys_ni_syscall)
2261{
2262   PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2263      VG_SYSNUM_STRING(SYSNO));
2264   PRE_REG_READ0(long, "ni_syscall");
2265   SET_STATUS_Failure( VKI_ENOSYS );
2266}
2267
2268PRE(sys_iopl)
2269{
2270   PRINT("sys_iopl ( %ld )", ARG1);
2271   PRE_REG_READ1(long, "iopl", unsigned long, level);
2272}
2273
2274PRE(sys_fsync)
2275{
2276   *flags |= SfMayBlock;
2277   PRINT("sys_fsync ( %ld )", ARG1);
2278   PRE_REG_READ1(long, "fsync", unsigned int, fd);
2279}
2280
2281PRE(sys_fdatasync)
2282{
2283   *flags |= SfMayBlock;
2284   PRINT("sys_fdatasync ( %ld )", ARG1);
2285   PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2286}
2287
2288PRE(sys_msync)
2289{
2290   *flags |= SfMayBlock;
2291   PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2292   PRE_REG_READ3(long, "msync",
2293                 unsigned long, start, vki_size_t, length, int, flags);
2294   PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2295}
2296
2297// Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2298// versions of LiS (Linux Streams).  They are not part of the kernel.
2299// Therefore, we have to provide this type ourself, rather than getting it
2300// from the kernel sources.
2301struct vki_pmsg_strbuf {
2302   int     maxlen;         /* no. of bytes in buffer */
2303   int     len;            /* no. of bytes returned */
2304   vki_caddr_t buf;        /* pointer to data */
2305};
2306PRE(sys_getpmsg)
2307{
2308   /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2309   struct vki_pmsg_strbuf *ctrl;
2310   struct vki_pmsg_strbuf *data;
2311   *flags |= SfMayBlock;
2312   PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2313   PRE_REG_READ5(int, "getpmsg",
2314                 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2315                 int *, bandp, int *, flagsp);
2316   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2317   data = (struct vki_pmsg_strbuf *)ARG3;
2318   if (ctrl && ctrl->maxlen > 0)
2319      PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2320   if (data && data->maxlen > 0)
2321      PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2322   if (ARG4)
2323      PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2324   if (ARG5)
2325      PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2326}
2327POST(sys_getpmsg)
2328{
2329   struct vki_pmsg_strbuf *ctrl;
2330   struct vki_pmsg_strbuf *data;
2331   vg_assert(SUCCESS);
2332   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2333   data = (struct vki_pmsg_strbuf *)ARG3;
2334   if (RES == 0 && ctrl && ctrl->len > 0) {
2335      POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2336   }
2337   if (RES == 0 && data && data->len > 0) {
2338      POST_MEM_WRITE( (Addr)data->buf, data->len);
2339   }
2340}
2341
2342PRE(sys_putpmsg)
2343{
2344   /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2345   struct vki_pmsg_strbuf *ctrl;
2346   struct vki_pmsg_strbuf *data;
2347   *flags |= SfMayBlock;
2348   PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2349   PRE_REG_READ5(int, "putpmsg",
2350                 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2351                 int, band, int, flags);
2352   ctrl = (struct vki_pmsg_strbuf *)ARG2;
2353   data = (struct vki_pmsg_strbuf *)ARG3;
2354   if (ctrl && ctrl->len > 0)
2355      PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2356   if (data && data->len > 0)
2357      PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2358}
2359
2360PRE(sys_getitimer)
2361{
2362   struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2363   PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2364   PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2365
2366   PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2367   PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2368}
2369
2370POST(sys_getitimer)
2371{
2372   if (ARG2 != (Addr)NULL) {
2373      struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2374      POST_timeval_WRITE( &(value->it_interval) );
2375      POST_timeval_WRITE( &(value->it_value) );
2376   }
2377}
2378
2379PRE(sys_setitimer)
2380{
2381   PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2382   PRE_REG_READ3(long, "setitimer",
2383                 int, which,
2384                 struct itimerval *, value, struct itimerval *, ovalue);
2385   if (ARG2 != (Addr)NULL) {
2386      struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2387      PRE_timeval_READ( "setitimer(&value->it_interval)",
2388                         &(value->it_interval));
2389      PRE_timeval_READ( "setitimer(&value->it_value)",
2390                         &(value->it_value));
2391   }
2392   if (ARG3 != (Addr)NULL) {
2393      struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2394      PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2395                         &(ovalue->it_interval));
2396      PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2397                         &(ovalue->it_value));
2398   }
2399}
2400
2401POST(sys_setitimer)
2402{
2403   if (ARG3 != (Addr)NULL) {
2404      struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2405      POST_timeval_WRITE( &(ovalue->it_interval) );
2406      POST_timeval_WRITE( &(ovalue->it_value) );
2407   }
2408}
2409
2410PRE(sys_chroot)
2411{
2412   PRINT("sys_chroot ( %#lx )", ARG1);
2413   PRE_REG_READ1(long, "chroot", const char *, path);
2414   PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2415}
2416
2417PRE(sys_madvise)
2418{
2419   *flags |= SfMayBlock;
2420   PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2421   PRE_REG_READ3(long, "madvise",
2422                 unsigned long, start, vki_size_t, length, int, advice);
2423}
2424
2425#if HAVE_MREMAP
2426PRE(sys_mremap)
2427{
2428   // Nb: this is different to the glibc version described in the man pages,
2429   // which lacks the fifth 'new_address' argument.
2430   if (ARG4 & VKI_MREMAP_FIXED) {
2431      PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2432            ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2433      PRE_REG_READ5(unsigned long, "mremap",
2434                    unsigned long, old_addr, unsigned long, old_size,
2435                    unsigned long, new_size, unsigned long, flags,
2436                    unsigned long, new_addr);
2437   } else {
2438      PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2439            ARG1, (ULong)ARG2, ARG3, ARG4);
2440      PRE_REG_READ4(unsigned long, "mremap",
2441                    unsigned long, old_addr, unsigned long, old_size,
2442                    unsigned long, new_size, unsigned long, flags);
2443   }
2444   SET_STATUS_from_SysRes(
2445      do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2446   );
2447}
2448#endif /* HAVE_MREMAP */
2449
2450PRE(sys_nice)
2451{
2452   PRINT("sys_nice ( %ld )", ARG1);
2453   PRE_REG_READ1(long, "nice", int, inc);
2454}
2455
2456PRE(sys_mlock)
2457{
2458   *flags |= SfMayBlock;
2459   PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2460   PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2461}
2462
2463PRE(sys_munlock)
2464{
2465   *flags |= SfMayBlock;
2466   PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2467   PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2468}
2469
2470PRE(sys_mlockall)
2471{
2472   *flags |= SfMayBlock;
2473   PRINT("sys_mlockall ( %lx )", ARG1);
2474   PRE_REG_READ1(long, "mlockall", int, flags);
2475}
2476
2477PRE(sys_setpriority)
2478{
2479   PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2480   PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2481}
2482
2483PRE(sys_getpriority)
2484{
2485   PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2486   PRE_REG_READ2(long, "getpriority", int, which, int, who);
2487}
2488
2489PRE(sys_pwrite64)
2490{
2491   *flags |= SfMayBlock;
2492#if VG_WORDSIZE == 4
2493   PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2494         ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2495   PRE_REG_READ5(ssize_t, "pwrite64",
2496                 unsigned int, fd, const char *, buf, vki_size_t, count,
2497                 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2498#elif VG_WORDSIZE == 8
2499   PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2500         ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2501   PRE_REG_READ4(ssize_t, "pwrite64",
2502                 unsigned int, fd, const char *, buf, vki_size_t, count,
2503                 Word, offset);
2504#else
2505#  error Unexpected word size
2506#endif
2507   PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2508}
2509
2510PRE(sys_sync)
2511{
2512   *flags |= SfMayBlock;
2513   PRINT("sys_sync ( )");
2514   PRE_REG_READ0(long, "sync");
2515}
2516
2517PRE(sys_fstatfs)
2518{
2519   FUSE_COMPATIBLE_MAY_BLOCK();
2520   PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2521   PRE_REG_READ2(long, "fstatfs",
2522                 unsigned int, fd, struct statfs *, buf);
2523   PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2524}
2525
2526POST(sys_fstatfs)
2527{
2528   POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2529}
2530
2531PRE(sys_fstatfs64)
2532{
2533   FUSE_COMPATIBLE_MAY_BLOCK();
2534   PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2535   PRE_REG_READ3(long, "fstatfs64",
2536                 unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2537   PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2538}
2539POST(sys_fstatfs64)
2540{
2541   POST_MEM_WRITE( ARG3, ARG2 );
2542}
2543
2544PRE(sys_getsid)
2545{
2546   PRINT("sys_getsid ( %ld )", ARG1);
2547   PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2548}
2549
2550PRE(sys_pread64)
2551{
2552   *flags |= SfMayBlock;
2553#if VG_WORDSIZE == 4
2554   PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2555         ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2556   PRE_REG_READ5(ssize_t, "pread64",
2557                 unsigned int, fd, char *, buf, vki_size_t, count,
2558                 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2559#elif VG_WORDSIZE == 8
2560   PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2561         ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2562   PRE_REG_READ4(ssize_t, "pread64",
2563                 unsigned int, fd, char *, buf, vki_size_t, count,
2564                 Word, offset);
2565#else
2566#  error Unexpected word size
2567#endif
2568   PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2569}
2570POST(sys_pread64)
2571{
2572   vg_assert(SUCCESS);
2573   if (RES > 0) {
2574      POST_MEM_WRITE( ARG2, RES );
2575   }
2576}
2577
2578PRE(sys_mknod)
2579{
2580   FUSE_COMPATIBLE_MAY_BLOCK();
2581   PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2582   PRE_REG_READ3(long, "mknod",
2583                 const char *, pathname, int, mode, unsigned, dev);
2584   PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2585}
2586
2587PRE(sys_flock)
2588{
2589   *flags |= SfMayBlock;
2590   PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2591   PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2592}
2593
2594// Pre_read a char** argument.
2595static void pre_argv_envp(Addr a, ThreadId tid, const HChar* s1, const HChar* s2)
2596{
2597   while (True) {
2598      Addr a_deref;
2599      Addr* a_p = (Addr*)a;
2600      PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2601      a_deref = *a_p;
2602      if (0 == a_deref)
2603         break;
2604      PRE_MEM_RASCIIZ( s2, a_deref );
2605      a += sizeof(char*);
2606   }
2607}
2608
2609static Bool i_am_the_only_thread ( void )
2610{
2611   Int c = VG_(count_living_threads)();
2612   vg_assert(c >= 1); /* stay sane */
2613   return c == 1;
2614}
2615
2616/* Wait until all other threads disappear. */
2617void VG_(reap_threads)(ThreadId self)
2618{
2619   while (!i_am_the_only_thread()) {
2620      /* Let other thread(s) run */
2621      VG_(vg_yield)();
2622      VG_(poll_signals)(self);
2623   }
2624   vg_assert(i_am_the_only_thread());
2625}
2626
2627// XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2628// but it seems to work nonetheless...
2629PRE(sys_execve)
2630{
2631   HChar*       path = NULL;       /* path to executable */
2632   HChar**      envp = NULL;
2633   HChar**      argv = NULL;
2634   HChar**      arg2copy;
2635   HChar*       launcher_basename = NULL;
2636   ThreadState* tst;
2637   Int          i, j, tot_args;
2638   SysRes       res;
2639   Bool         setuid_allowed, trace_this_child;
2640
2641   PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2642   PRE_REG_READ3(vki_off_t, "execve",
2643                 char *, filename, char **, argv, char **, envp);
2644   PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2645   if (ARG2 != 0)
2646      pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2647   if (ARG3 != 0)
2648      pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2649
2650   vg_assert(VG_(is_valid_tid)(tid));
2651   tst = VG_(get_ThreadState)(tid);
2652
2653   /* Erk.  If the exec fails, then the following will have made a
2654      mess of things which makes it hard for us to continue.  The
2655      right thing to do is piece everything together again in
2656      POST(execve), but that's close to impossible.  Instead, we make
2657      an effort to check that the execve will work before actually
2658      doing it. */
2659
2660   /* Check that the name at least begins in client-accessible storage. */
2661   if (ARG1 == 0 /* obviously bogus */
2662       || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2663      SET_STATUS_Failure( VKI_EFAULT );
2664      return;
2665   }
2666
2667   // debug-only printing
2668   if (0) {
2669      VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2670      if (ARG2) {
2671         VG_(printf)("ARG2 = ");
2672         Int q;
2673         HChar** vec = (HChar**)ARG2;
2674         for (q = 0; vec[q]; q++)
2675            VG_(printf)("%p(%s) ", vec[q], vec[q]);
2676         VG_(printf)("\n");
2677      } else {
2678         VG_(printf)("ARG2 = null\n");
2679      }
2680   }
2681
2682   // Decide whether or not we want to follow along
2683   { // Make 'child_argv' be a pointer to the child's arg vector
2684     // (skipping the exe name)
2685     HChar** child_argv = (HChar**)ARG2;
2686     if (child_argv && child_argv[0] == NULL)
2687        child_argv = NULL;
2688     trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2689   }
2690
2691   // Do the important checks:  it is a file, is executable, permissions are
2692   // ok, etc.  We allow setuid executables to run only in the case when
2693   // we are not simulating them, that is, they to be run natively.
2694   setuid_allowed = trace_this_child  ? False  : True;
2695   res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
2696   if (sr_isError(res)) {
2697      SET_STATUS_Failure( sr_Err(res) );
2698      return;
2699   }
2700
2701   /* If we're tracing the child, and the launcher name looks bogus
2702      (possibly because launcher.c couldn't figure it out, see
2703      comments therein) then we have no option but to fail. */
2704   if (trace_this_child
2705       && (VG_(name_of_launcher) == NULL
2706           || VG_(name_of_launcher)[0] != '/')) {
2707      SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2708      return;
2709   }
2710
2711   /* After this point, we can't recover if the execve fails. */
2712   VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
2713
2714
2715   // Terminate gdbserver if it is active.
2716   if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2717      // If the child will not be traced, we need to terminate gdbserver
2718      // to cleanup the gdbserver resources (e.g. the FIFO files).
2719      // If child will be traced, we also terminate gdbserver: the new
2720      // Valgrind will start a fresh gdbserver after exec.
2721      VG_(gdbserver) (0);
2722   }
2723
2724   /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2725      this. (Really, nuke them all, since the new process will make
2726      its own new thread.) */
2727   VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2728   VG_(reap_threads)(tid);
2729
2730   // Set up the child's exe path.
2731   //
2732   if (trace_this_child) {
2733
2734      // We want to exec the launcher.  Get its pre-remembered path.
2735      path = VG_(name_of_launcher);
2736      // VG_(name_of_launcher) should have been acquired by m_main at
2737      // startup.
2738      vg_assert(path);
2739
2740      launcher_basename = VG_(strrchr)(path, '/');
2741      if (launcher_basename == NULL || launcher_basename[1] == 0) {
2742         launcher_basename = path;  // hmm, tres dubious
2743      } else {
2744         launcher_basename++;
2745      }
2746
2747   } else {
2748      path = (HChar*)ARG1;
2749   }
2750
2751   // Set up the child's environment.
2752   //
2753   // Remove the valgrind-specific stuff from the environment so the
2754   // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2755   // This is done unconditionally, since if we are tracing the child,
2756   // the child valgrind will set up the appropriate client environment.
2757   // Nb: we make a copy of the environment before trying to mangle it
2758   // as it might be in read-only memory (this was bug #101881).
2759   //
2760   // Then, if tracing the child, set VALGRIND_LIB for it.
2761   //
2762   if (ARG3 == 0) {
2763      envp = NULL;
2764   } else {
2765      envp = VG_(env_clone)( (HChar**)ARG3 );
2766      if (envp == NULL) goto hosed;
2767      VG_(env_remove_valgrind_env_stuff)( envp );
2768   }
2769
2770   if (trace_this_child) {
2771      // Set VALGRIND_LIB in ARG3 (the environment)
2772      VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2773   }
2774
2775   // Set up the child's args.  If not tracing it, they are
2776   // simply ARG2.  Otherwise, they are
2777   //
2778   // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2779   //
2780   // except that the first VG_(args_for_valgrind_noexecpass) args
2781   // are omitted.
2782   //
2783   if (!trace_this_child) {
2784      argv = (HChar**)ARG2;
2785   } else {
2786      vg_assert( VG_(args_for_valgrind) );
2787      vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2788      vg_assert( VG_(args_for_valgrind_noexecpass)
2789                   <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2790      /* how many args in total will there be? */
2791      // launcher basename
2792      tot_args = 1;
2793      // V's args
2794      tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2795      tot_args -= VG_(args_for_valgrind_noexecpass);
2796      // name of client exe
2797      tot_args++;
2798      // args for client exe, skipping [0]
2799      arg2copy = (HChar**)ARG2;
2800      if (arg2copy && arg2copy[0]) {
2801         for (i = 1; arg2copy[i]; i++)
2802            tot_args++;
2803      }
2804      // allocate
2805      argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2806                          (tot_args+1) * sizeof(HChar*) );
2807      if (argv == 0) goto hosed;
2808      // copy
2809      j = 0;
2810      argv[j++] = launcher_basename;
2811      for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2812         if (i < VG_(args_for_valgrind_noexecpass))
2813            continue;
2814         argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2815      }
2816      argv[j++] = (HChar*)ARG1;
2817      if (arg2copy && arg2copy[0])
2818         for (i = 1; arg2copy[i]; i++)
2819            argv[j++] = arg2copy[i];
2820      argv[j++] = NULL;
2821      // check
2822      vg_assert(j == tot_args+1);
2823   }
2824
2825   /* restore the DATA rlimit for the child */
2826   VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2827
2828   /*
2829      Set the signal state up for exec.
2830
2831      We need to set the real signal state to make sure the exec'd
2832      process gets SIG_IGN properly.
2833
2834      Also set our real sigmask to match the client's sigmask so that
2835      the exec'd child will get the right mask.  First we need to
2836      clear out any pending signals so they they don't get delivered,
2837      which would confuse things.
2838
2839      XXX This is a bug - the signals should remain pending, and be
2840      delivered to the new process after exec.  There's also a
2841      race-condition, since if someone delivers us a signal between
2842      the sigprocmask and the execve, we'll still get the signal. Oh
2843      well.
2844   */
2845   {
2846      vki_sigset_t allsigs;
2847      vki_siginfo_t info;
2848
2849      /* What this loop does: it queries SCSS (the signal state that
2850         the client _thinks_ the kernel is in) by calling
2851         VG_(do_sys_sigaction), and modifies the real kernel signal
2852         state accordingly. */
2853      for (i = 1; i < VG_(max_signal); i++) {
2854         vki_sigaction_fromK_t sa_f;
2855         vki_sigaction_toK_t   sa_t;
2856         VG_(do_sys_sigaction)(i, NULL, &sa_f);
2857         VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2858         if (sa_t.ksa_handler == VKI_SIG_IGN)
2859            VG_(sigaction)(i, &sa_t, NULL);
2860         else {
2861            sa_t.ksa_handler = VKI_SIG_DFL;
2862            VG_(sigaction)(i, &sa_t, NULL);
2863         }
2864      }
2865
2866      VG_(sigfillset)(&allsigs);
2867      while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2868         ;
2869
2870      VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2871   }
2872
2873   if (0) {
2874      HChar **cpp;
2875      VG_(printf)("exec: %s\n", path);
2876      for (cpp = argv; cpp && *cpp; cpp++)
2877         VG_(printf)("argv: %s\n", *cpp);
2878      if (0)
2879         for (cpp = envp; cpp && *cpp; cpp++)
2880            VG_(printf)("env: %s\n", *cpp);
2881   }
2882
2883   SET_STATUS_from_SysRes(
2884      VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2885   );
2886
2887   /* If we got here, then the execve failed.  We've already made way
2888      too much of a mess to continue, so we have to abort. */
2889  hosed:
2890   vg_assert(FAILURE);
2891   VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2892                ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2893   VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2894                            "execve() failing, so I'm dying.\n");
2895   VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2896                            "or work out how to recover.\n");
2897   VG_(exit)(101);
2898}
2899
2900PRE(sys_access)
2901{
2902   PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2903   PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2904   PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2905}
2906
2907PRE(sys_alarm)
2908{
2909   PRINT("sys_alarm ( %ld )", ARG1);
2910   PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2911}
2912
2913PRE(sys_brk)
2914{
2915   Addr brk_limit = VG_(brk_limit);
2916   Addr brk_new;
2917
2918   /* libc   says: int   brk(void *end_data_segment);
2919      kernel says: void* brk(void* end_data_segment);  (more or less)
2920
2921      libc returns 0 on success, and -1 (and sets errno) on failure.
2922      Nb: if you ask to shrink the dataseg end below what it
2923      currently is, that always succeeds, even if the dataseg end
2924      doesn't actually change (eg. brk(0)).  Unless it seg faults.
2925
2926      Kernel returns the new dataseg end.  If the brk() failed, this
2927      will be unchanged from the old one.  That's why calling (kernel)
2928      brk(0) gives the current dataseg end (libc brk() just returns
2929      zero in that case).
2930
2931      Both will seg fault if you shrink it back into a text segment.
2932   */
2933   PRINT("sys_brk ( %#lx )", ARG1);
2934   PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2935
2936   brk_new = do_brk(ARG1);
2937   SET_STATUS_Success( brk_new );
2938
2939   if (brk_new == ARG1) {
2940      /* brk() succeeded */
2941      if (brk_new < brk_limit) {
2942         /* successfully shrunk the data segment. */
2943         VG_TRACK( die_mem_brk, (Addr)ARG1,
2944		   brk_limit-ARG1 );
2945      } else
2946      if (brk_new > brk_limit) {
2947         /* successfully grew the data segment */
2948         VG_TRACK( new_mem_brk, brk_limit,
2949                   ARG1-brk_limit, tid );
2950      }
2951   } else {
2952      /* brk() failed */
2953      vg_assert(brk_limit == brk_new);
2954   }
2955}
2956
2957PRE(sys_chdir)
2958{
2959   FUSE_COMPATIBLE_MAY_BLOCK();
2960   PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2961   PRE_REG_READ1(long, "chdir", const char *, path);
2962   PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2963}
2964
2965PRE(sys_chmod)
2966{
2967   FUSE_COMPATIBLE_MAY_BLOCK();
2968   PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2969   PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2970   PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2971}
2972
2973PRE(sys_chown)
2974{
2975   FUSE_COMPATIBLE_MAY_BLOCK();
2976   PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2977   PRE_REG_READ3(long, "chown",
2978                 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2979   PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2980}
2981
2982PRE(sys_lchown)
2983{
2984   FUSE_COMPATIBLE_MAY_BLOCK();
2985   PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2986   PRE_REG_READ3(long, "lchown",
2987                 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2988   PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2989}
2990
2991PRE(sys_close)
2992{
2993   FUSE_COMPATIBLE_MAY_BLOCK();
2994   PRINT("sys_close ( %ld )", ARG1);
2995   PRE_REG_READ1(long, "close", unsigned int, fd);
2996
2997   /* Detect and negate attempts by the client to close Valgrind's log fd */
2998   if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2999        /* If doing -d style logging (which is to fd=2), don't
3000           allow that to be closed either. */
3001        || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
3002      SET_STATUS_Failure( VKI_EBADF );
3003}
3004
3005POST(sys_close)
3006{
3007   if (VG_(clo_track_fds)) record_fd_close(ARG1);
3008}
3009
3010PRE(sys_dup)
3011{
3012   PRINT("sys_dup ( %ld )", ARG1);
3013   PRE_REG_READ1(long, "dup", unsigned int, oldfd);
3014}
3015
3016POST(sys_dup)
3017{
3018   vg_assert(SUCCESS);
3019   if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
3020      VG_(close)(RES);
3021      SET_STATUS_Failure( VKI_EMFILE );
3022   } else {
3023      if (VG_(clo_track_fds))
3024         ML_(record_fd_open_named)(tid, RES);
3025   }
3026}
3027
3028PRE(sys_dup2)
3029{
3030   PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
3031   PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
3032   if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
3033      SET_STATUS_Failure( VKI_EBADF );
3034}
3035
3036POST(sys_dup2)
3037{
3038   vg_assert(SUCCESS);
3039   if (VG_(clo_track_fds))
3040      ML_(record_fd_open_named)(tid, RES);
3041}
3042
3043PRE(sys_fchdir)
3044{
3045   FUSE_COMPATIBLE_MAY_BLOCK();
3046   PRINT("sys_fchdir ( %ld )", ARG1);
3047   PRE_REG_READ1(long, "fchdir", unsigned int, fd);
3048}
3049
3050PRE(sys_fchown)
3051{
3052   FUSE_COMPATIBLE_MAY_BLOCK();
3053   PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
3054   PRE_REG_READ3(long, "fchown",
3055                 unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
3056}
3057
3058PRE(sys_fchmod)
3059{
3060   FUSE_COMPATIBLE_MAY_BLOCK();
3061   PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
3062   PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
3063}
3064
3065PRE(sys_newfstat)
3066{
3067   FUSE_COMPATIBLE_MAY_BLOCK();
3068   PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
3069   PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
3070   PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
3071}
3072
3073POST(sys_newfstat)
3074{
3075   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3076}
3077
3078static vki_sigset_t fork_saved_mask;
3079
3080// In Linux, the sys_fork() function varies across architectures, but we
3081// ignore the various args it gets, and so it looks arch-neutral.  Hmm.
3082PRE(sys_fork)
3083{
3084   Bool is_child;
3085   Int child_pid;
3086   vki_sigset_t mask;
3087
3088   PRINT("sys_fork ( )");
3089   PRE_REG_READ0(long, "fork");
3090
3091   /* Block all signals during fork, so that we can fix things up in
3092      the child without being interrupted. */
3093   VG_(sigfillset)(&mask);
3094   VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3095
3096   SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3097
3098   if (!SUCCESS) return;
3099
3100#if defined(VGO_linux)
3101   // RES is 0 for child, non-0 (the child's PID) for parent.
3102   is_child = ( RES == 0 ? True : False );
3103   child_pid = ( is_child ? -1 : RES );
3104#elif defined(VGO_darwin)
3105   // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3106   is_child = RESHI;
3107   child_pid = RES;
3108#else
3109#  error Unknown OS
3110#endif
3111
3112   VG_(do_atfork_pre)(tid);
3113
3114   if (is_child) {
3115      VG_(do_atfork_child)(tid);
3116
3117      /* restore signal mask */
3118      VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3119
3120      /* If --child-silent-after-fork=yes was specified, set the
3121         output file descriptors to 'impossible' values.  This is
3122         noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3123         duly stops writing any further output. */
3124      if (VG_(clo_child_silent_after_fork)) {
3125         if (!VG_(log_output_sink).is_socket)
3126            VG_(log_output_sink).fd = -1;
3127         if (!VG_(xml_output_sink).is_socket)
3128            VG_(xml_output_sink).fd = -1;
3129      }
3130
3131   } else {
3132      VG_(do_atfork_parent)(tid);
3133
3134      PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3135
3136      /* restore signal mask */
3137      VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3138   }
3139}
3140
3141PRE(sys_ftruncate)
3142{
3143   *flags |= SfMayBlock;
3144   PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3145   PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3146}
3147
3148PRE(sys_truncate)
3149{
3150   *flags |= SfMayBlock;
3151   PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3152   PRE_REG_READ2(long, "truncate",
3153                 const char *, path, unsigned long, length);
3154   PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3155}
3156
3157PRE(sys_ftruncate64)
3158{
3159   *flags |= SfMayBlock;
3160#if VG_WORDSIZE == 4
3161   PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3162   PRE_REG_READ3(long, "ftruncate64",
3163                 unsigned int, fd,
3164                 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3165#else
3166   PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3167   PRE_REG_READ2(long, "ftruncate64",
3168                 unsigned int,fd, UWord,length);
3169#endif
3170}
3171
3172PRE(sys_truncate64)
3173{
3174   *flags |= SfMayBlock;
3175#if VG_WORDSIZE == 4
3176   PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3177   PRE_REG_READ3(long, "truncate64",
3178                 const char *, path,
3179                 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3180#else
3181   PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3182   PRE_REG_READ2(long, "truncate64",
3183                 const char *,path, UWord,length);
3184#endif
3185   PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3186}
3187
3188PRE(sys_getdents)
3189{
3190   *flags |= SfMayBlock;
3191   PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3192   PRE_REG_READ3(long, "getdents",
3193                 unsigned int, fd, struct linux_dirent *, dirp,
3194                 unsigned int, count);
3195   PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3196}
3197
3198POST(sys_getdents)
3199{
3200   vg_assert(SUCCESS);
3201   if (RES > 0)
3202      POST_MEM_WRITE( ARG2, RES );
3203}
3204
3205PRE(sys_getdents64)
3206{
3207   *flags |= SfMayBlock;
3208   PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3209   PRE_REG_READ3(long, "getdents64",
3210                 unsigned int, fd, struct linux_dirent64 *, dirp,
3211                 unsigned int, count);
3212   PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3213}
3214
3215POST(sys_getdents64)
3216{
3217   vg_assert(SUCCESS);
3218   if (RES > 0)
3219      POST_MEM_WRITE( ARG2, RES );
3220}
3221
3222PRE(sys_getgroups)
3223{
3224   PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3225   PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3226   if (ARG1 > 0)
3227      PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3228}
3229
3230POST(sys_getgroups)
3231{
3232   vg_assert(SUCCESS);
3233   if (ARG1 > 0 && RES > 0)
3234      POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3235}
3236
3237PRE(sys_getcwd)
3238{
3239   // Comment from linux/fs/dcache.c:
3240   //   NOTE! The user-level library version returns a character pointer.
3241   //   The kernel system call just returns the length of the buffer filled
3242   //   (which includes the ending '\0' character), or a negative error
3243   //   value.
3244   // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3245   PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3246   PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3247   PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3248}
3249
3250POST(sys_getcwd)
3251{
3252   vg_assert(SUCCESS);
3253   if (RES != (Addr)NULL)
3254      POST_MEM_WRITE( ARG1, RES );
3255}
3256
3257PRE(sys_geteuid)
3258{
3259   PRINT("sys_geteuid ( )");
3260   PRE_REG_READ0(long, "geteuid");
3261}
3262
3263PRE(sys_getegid)
3264{
3265   PRINT("sys_getegid ( )");
3266   PRE_REG_READ0(long, "getegid");
3267}
3268
3269PRE(sys_getgid)
3270{
3271   PRINT("sys_getgid ( )");
3272   PRE_REG_READ0(long, "getgid");
3273}
3274
3275PRE(sys_getpid)
3276{
3277   PRINT("sys_getpid ()");
3278   PRE_REG_READ0(long, "getpid");
3279}
3280
3281PRE(sys_getpgid)
3282{
3283   PRINT("sys_getpgid ( %ld )", ARG1);
3284   PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3285}
3286
3287PRE(sys_getpgrp)
3288{
3289   PRINT("sys_getpgrp ()");
3290   PRE_REG_READ0(long, "getpgrp");
3291}
3292
3293PRE(sys_getppid)
3294{
3295   PRINT("sys_getppid ()");
3296   PRE_REG_READ0(long, "getppid");
3297}
3298
3299static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3300{
3301   POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3302
3303#ifdef _RLIMIT_POSIX_FLAG
3304   // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3305   // Unset it here to make the switch case below work correctly.
3306   a1 &= ~_RLIMIT_POSIX_FLAG;
3307#endif
3308
3309   switch (a1) {
3310   case VKI_RLIMIT_NOFILE:
3311      ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3312      ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3313      break;
3314
3315   case VKI_RLIMIT_DATA:
3316      *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3317      break;
3318
3319   case VKI_RLIMIT_STACK:
3320      *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3321      break;
3322   }
3323}
3324
3325PRE(sys_old_getrlimit)
3326{
3327   PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3328   PRE_REG_READ2(long, "old_getrlimit",
3329                 unsigned int, resource, struct rlimit *, rlim);
3330   PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3331}
3332
3333POST(sys_old_getrlimit)
3334{
3335   common_post_getrlimit(tid, ARG1, ARG2);
3336}
3337
3338PRE(sys_getrlimit)
3339{
3340   PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3341   PRE_REG_READ2(long, "getrlimit",
3342                 unsigned int, resource, struct rlimit *, rlim);
3343   PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3344}
3345
3346POST(sys_getrlimit)
3347{
3348   common_post_getrlimit(tid, ARG1, ARG2);
3349}
3350
3351PRE(sys_getrusage)
3352{
3353   PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3354   PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3355   PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3356}
3357
3358POST(sys_getrusage)
3359{
3360   vg_assert(SUCCESS);
3361   if (RES == 0)
3362      POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3363}
3364
3365PRE(sys_gettimeofday)
3366{
3367   PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3368   PRE_REG_READ2(long, "gettimeofday",
3369                 struct timeval *, tv, struct timezone *, tz);
3370   // GrP fixme does darwin write to *tz anymore?
3371   if (ARG1 != 0)
3372      PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3373   if (ARG2 != 0)
3374      PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3375}
3376
3377POST(sys_gettimeofday)
3378{
3379   vg_assert(SUCCESS);
3380   if (RES == 0) {
3381      if (ARG1 != 0)
3382         POST_timeval_WRITE( ARG1 );
3383      if (ARG2 != 0)
3384	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3385   }
3386}
3387
3388PRE(sys_settimeofday)
3389{
3390   PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3391   PRE_REG_READ2(long, "settimeofday",
3392                 struct timeval *, tv, struct timezone *, tz);
3393   if (ARG1 != 0)
3394      PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3395   if (ARG2 != 0) {
3396      PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3397      /* maybe should warn if tz->tz_dsttime is non-zero? */
3398   }
3399}
3400
3401PRE(sys_getuid)
3402{
3403   PRINT("sys_getuid ( )");
3404   PRE_REG_READ0(long, "getuid");
3405}
3406
3407void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3408{
3409   /* We don't have any specific information on it, so
3410      try to do something reasonable based on direction and
3411      size bits.  The encoding scheme is described in
3412      /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3413
3414      According to Simon Hausmann, _IOC_READ means the kernel
3415      writes a value to the ioctl value passed from the user
3416      space and the other way around with _IOC_WRITE. */
3417
3418   UInt dir  = _VKI_IOC_DIR(request);
3419   UInt size = _VKI_IOC_SIZE(request);
3420   if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3421      /*
3422       * Be very lax about ioctl handling; the only
3423       * assumption is that the size is correct. Doesn't
3424       * require the full buffer to be initialized when
3425       * writing.  Without this, using some device
3426       * drivers with a large number of strange ioctl
3427       * commands becomes very tiresome.
3428       */
3429   } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3430      //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3431      //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3432      static Int moans = 3;
3433      if (moans > 0 && !VG_(clo_xml)) {
3434         moans--;
3435         VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3436                   " with no size/direction hints\n", request);
3437         VG_(umsg)("   This could cause spurious value errors to appear.\n");
3438         VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3439                   "guidance on writing a proper wrapper.\n" );
3440      }
3441   } else {
3442      //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3443      //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3444      if ((dir & _VKI_IOC_WRITE) && size > 0)
3445         PRE_MEM_READ( "ioctl(generic)", arg, size);
3446      if ((dir & _VKI_IOC_READ) && size > 0)
3447         PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3448   }
3449}
3450
3451void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3452{
3453   /* We don't have any specific information on it, so
3454      try to do something reasonable based on direction and
3455      size bits.  The encoding scheme is described in
3456      /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3457
3458      According to Simon Hausmann, _IOC_READ means the kernel
3459      writes a value to the ioctl value passed from the user
3460      space and the other way around with _IOC_WRITE. */
3461
3462   UInt dir  = _VKI_IOC_DIR(request);
3463   UInt size = _VKI_IOC_SIZE(request);
3464   if (size > 0 && (dir & _VKI_IOC_READ)
3465       && res == 0
3466       && arg != (Addr)NULL)
3467   {
3468      POST_MEM_WRITE(arg, size);
3469   }
3470}
3471
3472/*
3473   If we're sending a SIGKILL to one of our own threads, then simulate
3474   it rather than really sending the signal, so that the target thread
3475   gets a chance to clean up.  Returns True if we did the killing (or
3476   no killing is necessary), and False if the caller should use the
3477   normal kill syscall.
3478
3479   "pid" is any pid argument which can be passed to kill; group kills
3480   (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3481   they'll most likely hit all the threads and we won't need to worry
3482   about cleanup.  In truth, we can't fully emulate these multicast
3483   kills.
3484
3485   "tgid" is a thread group id.  If it is not -1, then the target
3486   thread must be in that thread group.
3487 */
3488Bool ML_(do_sigkill)(Int pid, Int tgid)
3489{
3490   ThreadState *tst;
3491   ThreadId tid;
3492
3493   if (pid <= 0)
3494      return False;
3495
3496   tid = VG_(lwpid_to_vgtid)(pid);
3497   if (tid == VG_INVALID_THREADID)
3498      return False;		/* none of our threads */
3499
3500   tst = VG_(get_ThreadState)(tid);
3501   if (tst == NULL || tst->status == VgTs_Empty)
3502      return False;		/* hm, shouldn't happen */
3503
3504   if (tgid != -1 && tst->os_state.threadgroup != tgid)
3505      return False;		/* not the right thread group */
3506
3507   /* Check to see that the target isn't already exiting. */
3508   if (!VG_(is_exiting)(tid)) {
3509      if (VG_(clo_trace_signals))
3510	 VG_(message)(Vg_DebugMsg,
3511                      "Thread %d being killed with SIGKILL\n",
3512                      tst->tid);
3513
3514      tst->exitreason = VgSrc_FatalSig;
3515      tst->os_state.fatalsig = VKI_SIGKILL;
3516
3517      if (!VG_(is_running_thread)(tid))
3518	 VG_(get_thread_out_of_syscall)(tid);
3519   }
3520
3521   return True;
3522}
3523
3524PRE(sys_kill)
3525{
3526   PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3527   PRE_REG_READ2(long, "kill", int, pid, int, sig);
3528   if (!ML_(client_signal_OK)(ARG2)) {
3529      SET_STATUS_Failure( VKI_EINVAL );
3530      return;
3531   }
3532
3533   /* If we're sending SIGKILL, check to see if the target is one of
3534      our threads and handle it specially. */
3535   if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3536      SET_STATUS_Success(0);
3537   else
3538      /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3539         affecting how posix-compliant the call is.  I guess it is
3540         harmless to pass the 3rd arg on other platforms; hence pass
3541         it on all. */
3542      SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3543
3544   if (VG_(clo_trace_signals))
3545      VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3546		   ARG2, ARG1);
3547
3548   /* This kill might have given us a pending signal.  Ask for a check once
3549      the syscall is done. */
3550   *flags |= SfPollAfter;
3551}
3552
3553PRE(sys_link)
3554{
3555   *flags |= SfMayBlock;
3556   PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3557   PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3558   PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3559   PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3560}
3561
3562PRE(sys_newlstat)
3563{
3564   PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3565   PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3566   PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3567   PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3568}
3569
3570POST(sys_newlstat)
3571{
3572   vg_assert(SUCCESS);
3573   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3574}
3575
3576PRE(sys_mkdir)
3577{
3578   *flags |= SfMayBlock;
3579   PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3580   PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3581   PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3582}
3583
3584PRE(sys_mprotect)
3585{
3586   PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3587   PRE_REG_READ3(long, "mprotect",
3588                 unsigned long, addr, vki_size_t, len, unsigned long, prot);
3589
3590   if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3591      SET_STATUS_Failure( VKI_ENOMEM );
3592   }
3593#if defined(VKI_PROT_GROWSDOWN)
3594   else
3595   if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3596      /* Deal with mprotects on growable stack areas.
3597
3598         The critical files to understand all this are mm/mprotect.c
3599         in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3600         glibc.
3601
3602         The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3603         round the start/end address of mprotect to the start/end of
3604         the underlying vma and glibc uses that as an easy way to
3605         change the protection of the stack by calling mprotect on the
3606         last page of the stack with PROT_GROWSDOWN set.
3607
3608         The sanity check provided by the kernel is that the vma must
3609         have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3610      UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3611      NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3612      NSegment const *rseg;
3613
3614      vg_assert(aseg);
3615
3616      if (grows == VKI_PROT_GROWSDOWN) {
3617         rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
3618         if (rseg &&
3619             rseg->kind == SkResvn &&
3620             rseg->smode == SmUpper &&
3621             rseg->end+1 == aseg->start) {
3622            Addr end = ARG1 + ARG2;
3623            ARG1 = aseg->start;
3624            ARG2 = end - aseg->start;
3625            ARG3 &= ~VKI_PROT_GROWSDOWN;
3626         } else {
3627            SET_STATUS_Failure( VKI_EINVAL );
3628         }
3629      } else if (grows == VKI_PROT_GROWSUP) {
3630         rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
3631         if (rseg &&
3632             rseg->kind == SkResvn &&
3633             rseg->smode == SmLower &&
3634             aseg->end+1 == rseg->start) {
3635            ARG2 = aseg->end - ARG1 + 1;
3636            ARG3 &= ~VKI_PROT_GROWSUP;
3637         } else {
3638            SET_STATUS_Failure( VKI_EINVAL );
3639         }
3640      } else {
3641         /* both GROWSUP and GROWSDOWN */
3642         SET_STATUS_Failure( VKI_EINVAL );
3643      }
3644   }
3645#endif   // defined(VKI_PROT_GROWSDOWN)
3646}
3647
3648POST(sys_mprotect)
3649{
3650   Addr a    = ARG1;
3651   SizeT len = ARG2;
3652   Int  prot = ARG3;
3653
3654   ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3655}
3656
3657PRE(sys_munmap)
3658{
3659   if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3660   PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3661   PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3662
3663   if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3664      SET_STATUS_Failure( VKI_EINVAL );
3665}
3666
3667POST(sys_munmap)
3668{
3669   Addr  a   = ARG1;
3670   SizeT len = ARG2;
3671
3672   ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3673}
3674
3675PRE(sys_mincore)
3676{
3677   PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3678   PRE_REG_READ3(long, "mincore",
3679                 unsigned long, start, vki_size_t, length,
3680                 unsigned char *, vec);
3681   PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3682}
3683POST(sys_mincore)
3684{
3685   POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3686}
3687
3688PRE(sys_nanosleep)
3689{
3690   *flags |= SfMayBlock|SfPostOnFail;
3691   PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3692   PRE_REG_READ2(long, "nanosleep",
3693                 struct timespec *, req, struct timespec *, rem);
3694   PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3695   if (ARG2 != 0)
3696      PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3697}
3698
3699POST(sys_nanosleep)
3700{
3701   vg_assert(SUCCESS || FAILURE);
3702   if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3703      POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3704}
3705
3706PRE(sys_open)
3707{
3708   if (ARG2 & VKI_O_CREAT) {
3709      // 3-arg version
3710      PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3711      PRE_REG_READ3(long, "open",
3712                    const char *, filename, int, flags, int, mode);
3713   } else {
3714      // 2-arg version
3715      PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3716      PRE_REG_READ2(long, "open",
3717                    const char *, filename, int, flags);
3718   }
3719   PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3720
3721#if defined(VGO_linux)
3722   /* Handle the case where the open is of /proc/self/cmdline or
3723      /proc/<pid>/cmdline, and just give it a copy of the fd for the
3724      fake file we cooked up at startup (in m_main).  Also, seek the
3725      cloned fd back to the start. */
3726   {
3727      HChar  name[30];
3728      HChar* arg1s = (HChar*) ARG1;
3729      SysRes sres;
3730
3731      VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3732      if (ML_(safe_to_deref)( arg1s, 1 ) &&
3733          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3734         )
3735      {
3736         sres = VG_(dup)( VG_(cl_cmdline_fd) );
3737         SET_STATUS_from_SysRes( sres );
3738         if (!sr_isError(sres)) {
3739            OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3740            if (off < 0)
3741               SET_STATUS_Failure( VKI_EMFILE );
3742         }
3743         return;
3744      }
3745   }
3746
3747   /* Handle the case where the open is of /proc/self/auxv or
3748      /proc/<pid>/auxv, and just give it a copy of the fd for the
3749      fake file we cooked up at startup (in m_main).  Also, seek the
3750      cloned fd back to the start. */
3751   {
3752      HChar  name[30];
3753      HChar* arg1s = (HChar*) ARG1;
3754      SysRes sres;
3755
3756      VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
3757      if (ML_(safe_to_deref)( arg1s, 1 ) &&
3758          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/auxv"))
3759         )
3760      {
3761         sres = VG_(dup)( VG_(cl_auxv_fd) );
3762         SET_STATUS_from_SysRes( sres );
3763         if (!sr_isError(sres)) {
3764            OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3765            if (off < 0)
3766               SET_STATUS_Failure( VKI_EMFILE );
3767         }
3768         return;
3769      }
3770   }
3771#endif // defined(VGO_linux)
3772
3773   /* Otherwise handle normally */
3774   *flags |= SfMayBlock;
3775}
3776
3777POST(sys_open)
3778{
3779   vg_assert(SUCCESS);
3780   if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3781      VG_(close)(RES);
3782      SET_STATUS_Failure( VKI_EMFILE );
3783   } else {
3784      if (VG_(clo_track_fds))
3785         ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
3786   }
3787}
3788
3789PRE(sys_read)
3790{
3791   *flags |= SfMayBlock;
3792   PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3793   PRE_REG_READ3(ssize_t, "read",
3794                 unsigned int, fd, char *, buf, vki_size_t, count);
3795
3796   if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3797      SET_STATUS_Failure( VKI_EBADF );
3798   else
3799      PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3800}
3801
3802POST(sys_read)
3803{
3804   vg_assert(SUCCESS);
3805   POST_MEM_WRITE( ARG2, RES );
3806}
3807
3808PRE(sys_write)
3809{
3810   Bool ok;
3811   *flags |= SfMayBlock;
3812   PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3813   PRE_REG_READ3(ssize_t, "write",
3814                 unsigned int, fd, const char *, buf, vki_size_t, count);
3815   /* check to see if it is allowed.  If not, try for an exemption from
3816      --sim-hints=enable-outer (used for self hosting). */
3817   ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3818   if (!ok && ARG1 == 2/*stderr*/
3819           && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3820      ok = True;
3821   if (!ok)
3822      SET_STATUS_Failure( VKI_EBADF );
3823   else
3824      PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3825}
3826
3827PRE(sys_creat)
3828{
3829   *flags |= SfMayBlock;
3830   PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3831   PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3832   PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3833}
3834
3835POST(sys_creat)
3836{
3837   vg_assert(SUCCESS);
3838   if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3839      VG_(close)(RES);
3840      SET_STATUS_Failure( VKI_EMFILE );
3841   } else {
3842      if (VG_(clo_track_fds))
3843         ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
3844   }
3845}
3846
3847PRE(sys_poll)
3848{
3849   /* struct pollfd {
3850        int fd;           -- file descriptor
3851        short events;     -- requested events
3852        short revents;    -- returned events
3853      };
3854      int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3855   */
3856   UInt i;
3857   struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3858   *flags |= SfMayBlock;
3859   PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3860   PRE_REG_READ3(long, "poll",
3861                 struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3862
3863   for (i = 0; i < ARG2; i++) {
3864      PRE_MEM_READ( "poll(ufds.fd)",
3865                    (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3866      PRE_MEM_READ( "poll(ufds.events)",
3867                    (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3868      PRE_MEM_WRITE( "poll(ufds.revents)",
3869                     (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3870   }
3871}
3872
3873POST(sys_poll)
3874{
3875   if (RES >= 0) {
3876      UInt i;
3877      struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3878      for (i = 0; i < ARG2; i++)
3879	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3880   }
3881}
3882
3883PRE(sys_readlink)
3884{
3885   FUSE_COMPATIBLE_MAY_BLOCK();
3886   Word saved = SYSNO;
3887
3888   PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3889   PRE_REG_READ3(long, "readlink",
3890                 const char *, path, char *, buf, int, bufsiz);
3891   PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3892   PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3893
3894   {
3895#if defined(VGO_linux)
3896      /*
3897       * Handle the case where readlink is looking at /proc/self/exe or
3898       * /proc/<pid>/exe.
3899       */
3900      HChar name[25];
3901      HChar* arg1s = (HChar*) ARG1;
3902      VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3903      if (ML_(safe_to_deref)(arg1s, 1) &&
3904          (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3905         )
3906      {
3907         VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3908         SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3909                                                         ARG2, ARG3));
3910      } else
3911#endif // defined(VGO_linux)
3912      {
3913         /* Normal case */
3914         SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3915      }
3916   }
3917
3918   if (SUCCESS && RES > 0)
3919      POST_MEM_WRITE( ARG2, RES );
3920}
3921
3922PRE(sys_readv)
3923{
3924   Int i;
3925   struct vki_iovec * vec;
3926   *flags |= SfMayBlock;
3927   PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3928   PRE_REG_READ3(ssize_t, "readv",
3929                 unsigned long, fd, const struct iovec *, vector,
3930                 unsigned long, count);
3931   if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3932      SET_STATUS_Failure( VKI_EBADF );
3933   } else {
3934      PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3935
3936      if (ARG2 != 0) {
3937         /* ToDo: don't do any of the following if the vector is invalid */
3938         vec = (struct vki_iovec *)ARG2;
3939         for (i = 0; i < (Int)ARG3; i++)
3940            PRE_MEM_WRITE( "readv(vector[...])",
3941                           (Addr)vec[i].iov_base, vec[i].iov_len );
3942      }
3943   }
3944}
3945
3946POST(sys_readv)
3947{
3948   vg_assert(SUCCESS);
3949   if (RES > 0) {
3950      Int i;
3951      struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3952      Int remains = RES;
3953
3954      /* RES holds the number of bytes read. */
3955      for (i = 0; i < (Int)ARG3; i++) {
3956	 Int nReadThisBuf = vec[i].iov_len;
3957	 if (nReadThisBuf > remains) nReadThisBuf = remains;
3958	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3959	 remains -= nReadThisBuf;
3960	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3961      }
3962   }
3963}
3964
3965PRE(sys_rename)
3966{
3967   FUSE_COMPATIBLE_MAY_BLOCK();
3968   PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3969   PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3970   PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3971   PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3972}
3973
3974PRE(sys_rmdir)
3975{
3976   *flags |= SfMayBlock;
3977   PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3978   PRE_REG_READ1(long, "rmdir", const char *, pathname);
3979   PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3980}
3981
3982PRE(sys_select)
3983{
3984   *flags |= SfMayBlock;
3985   PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3986   PRE_REG_READ5(long, "select",
3987                 int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3988                 vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3989   // XXX: this possibly understates how much memory is read.
3990   if (ARG2 != 0)
3991      PRE_MEM_READ( "select(readfds)",
3992		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3993   if (ARG3 != 0)
3994      PRE_MEM_READ( "select(writefds)",
3995		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3996   if (ARG4 != 0)
3997      PRE_MEM_READ( "select(exceptfds)",
3998		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3999   if (ARG5 != 0)
4000      PRE_timeval_READ( "select(timeout)", ARG5 );
4001}
4002
4003PRE(sys_setgid)
4004{
4005   PRINT("sys_setgid ( %ld )", ARG1);
4006   PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
4007}
4008
4009PRE(sys_setsid)
4010{
4011   PRINT("sys_setsid ( )");
4012   PRE_REG_READ0(long, "setsid");
4013}
4014
4015PRE(sys_setgroups)
4016{
4017   PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
4018   PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
4019   if (ARG1 > 0)
4020      PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
4021}
4022
4023PRE(sys_setpgid)
4024{
4025   PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
4026   PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
4027}
4028
4029PRE(sys_setregid)
4030{
4031   PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
4032   PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
4033}
4034
4035PRE(sys_setreuid)
4036{
4037   PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
4038   PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
4039}
4040
4041PRE(sys_setrlimit)
4042{
4043   UWord arg1 = ARG1;
4044   PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
4045   PRE_REG_READ2(long, "setrlimit",
4046                 unsigned int, resource, struct rlimit *, rlim);
4047   PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
4048
4049#ifdef _RLIMIT_POSIX_FLAG
4050   // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
4051   // Unset it here to make the if statements below work correctly.
4052   arg1 &= ~_RLIMIT_POSIX_FLAG;
4053#endif
4054
4055   if (ARG2 &&
4056       ((struct vki_rlimit *)ARG2)->rlim_cur > ((struct vki_rlimit *)ARG2)->rlim_max) {
4057      SET_STATUS_Failure( VKI_EINVAL );
4058   }
4059   else if (arg1 == VKI_RLIMIT_NOFILE) {
4060      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
4061          ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
4062         SET_STATUS_Failure( VKI_EPERM );
4063      }
4064      else {
4065         VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
4066         SET_STATUS_Success( 0 );
4067      }
4068   }
4069   else if (arg1 == VKI_RLIMIT_DATA) {
4070      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
4071          ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
4072         SET_STATUS_Failure( VKI_EPERM );
4073      }
4074      else {
4075         VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
4076         SET_STATUS_Success( 0 );
4077      }
4078   }
4079   else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
4080      if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
4081          ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
4082         SET_STATUS_Failure( VKI_EPERM );
4083      }
4084      else {
4085         VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
4086         VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
4087         SET_STATUS_Success( 0 );
4088      }
4089   }
4090}
4091
4092PRE(sys_setuid)
4093{
4094   PRINT("sys_setuid ( %ld )", ARG1);
4095   PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
4096}
4097
4098PRE(sys_newstat)
4099{
4100   PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4101   PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
4102   PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
4103   PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
4104}
4105
4106POST(sys_newstat)
4107{
4108   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4109}
4110
4111PRE(sys_statfs)
4112{
4113   PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
4114   PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4115   PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4116   PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4117}
4118POST(sys_statfs)
4119{
4120   POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4121}
4122
4123PRE(sys_statfs64)
4124{
4125   PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4126   PRE_REG_READ3(long, "statfs64",
4127                 const char *, path, vki_size_t, size, struct statfs64 *, buf);
4128   PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4129   PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4130}
4131POST(sys_statfs64)
4132{
4133   POST_MEM_WRITE( ARG3, ARG2 );
4134}
4135
4136PRE(sys_symlink)
4137{
4138   *flags |= SfMayBlock;
4139   PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4140   PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4141   PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4142   PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4143}
4144
4145PRE(sys_time)
4146{
4147   /* time_t time(time_t *t); */
4148   PRINT("sys_time ( %#lx )",ARG1);
4149   PRE_REG_READ1(long, "time", int *, t);
4150   if (ARG1 != 0) {
4151      PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4152   }
4153}
4154
4155POST(sys_time)
4156{
4157   if (ARG1 != 0) {
4158      POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4159   }
4160}
4161
4162PRE(sys_times)
4163{
4164   PRINT("sys_times ( %#lx )", ARG1);
4165   PRE_REG_READ1(long, "times", struct tms *, buf);
4166   if (ARG1 != 0) {
4167      PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4168   }
4169}
4170
4171POST(sys_times)
4172{
4173   if (ARG1 != 0) {
4174      POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4175   }
4176}
4177
4178PRE(sys_umask)
4179{
4180   PRINT("sys_umask ( %ld )", ARG1);
4181   PRE_REG_READ1(long, "umask", int, mask);
4182}
4183
4184PRE(sys_unlink)
4185{
4186   *flags |= SfMayBlock;
4187   PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4188   PRE_REG_READ1(long, "unlink", const char *, pathname);
4189   PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4190}
4191
4192PRE(sys_newuname)
4193{
4194   PRINT("sys_newuname ( %#lx )", ARG1);
4195   PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4196   PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4197}
4198
4199POST(sys_newuname)
4200{
4201   if (ARG1 != 0) {
4202      POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4203   }
4204}
4205
4206PRE(sys_waitpid)
4207{
4208   *flags |= SfMayBlock;
4209   PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4210   PRE_REG_READ3(long, "waitpid",
4211                 vki_pid_t, pid, unsigned int *, status, int, options);
4212
4213   if (ARG2 != (Addr)NULL)
4214      PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4215}
4216
4217POST(sys_waitpid)
4218{
4219   if (ARG2 != (Addr)NULL)
4220      POST_MEM_WRITE( ARG2, sizeof(int) );
4221}
4222
4223PRE(sys_wait4)
4224{
4225   *flags |= SfMayBlock;
4226   PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4227
4228   PRE_REG_READ4(long, "wait4",
4229                 vki_pid_t, pid, unsigned int *, status, int, options,
4230                 struct rusage *, rusage);
4231   if (ARG2 != (Addr)NULL)
4232      PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4233   if (ARG4 != (Addr)NULL)
4234      PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4235}
4236
4237POST(sys_wait4)
4238{
4239   if (ARG2 != (Addr)NULL)
4240      POST_MEM_WRITE( ARG2, sizeof(int) );
4241   if (ARG4 != (Addr)NULL)
4242      POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4243}
4244
4245PRE(sys_writev)
4246{
4247   Int i;
4248   struct vki_iovec * vec;
4249   *flags |= SfMayBlock;
4250   PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4251   PRE_REG_READ3(ssize_t, "writev",
4252                 unsigned long, fd, const struct iovec *, vector,
4253                 unsigned long, count);
4254   if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4255      SET_STATUS_Failure( VKI_EBADF );
4256   } else {
4257      PRE_MEM_READ( "writev(vector)",
4258		     ARG2, ARG3 * sizeof(struct vki_iovec) );
4259      if (ARG2 != 0) {
4260         /* ToDo: don't do any of the following if the vector is invalid */
4261         vec = (struct vki_iovec *)ARG2;
4262         for (i = 0; i < (Int)ARG3; i++)
4263            PRE_MEM_READ( "writev(vector[...])",
4264                           (Addr)vec[i].iov_base, vec[i].iov_len );
4265      }
4266   }
4267}
4268
4269PRE(sys_utimes)
4270{
4271   FUSE_COMPATIBLE_MAY_BLOCK();
4272   PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4273   PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4274   PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4275   if (ARG2 != 0) {
4276      PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4277      PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4278   }
4279}
4280
4281PRE(sys_acct)
4282{
4283   PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4284   PRE_REG_READ1(long, "acct", const char *, filename);
4285   PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4286}
4287
4288PRE(sys_pause)
4289{
4290   *flags |= SfMayBlock;
4291   PRINT("sys_pause ( )");
4292   PRE_REG_READ0(long, "pause");
4293}
4294
4295PRE(sys_sigaltstack)
4296{
4297   PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4298   PRE_REG_READ2(int, "sigaltstack",
4299                 const vki_stack_t *, ss, vki_stack_t *, oss);
4300   if (ARG1 != 0) {
4301      const vki_stack_t *ss = (vki_stack_t *)ARG1;
4302      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4303      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4304      PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4305   }
4306   if (ARG2 != 0) {
4307      PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4308   }
4309
4310   SET_STATUS_from_SysRes(
4311      VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4312                              (vki_stack_t*)ARG2)
4313   );
4314}
4315POST(sys_sigaltstack)
4316{
4317   vg_assert(SUCCESS);
4318   if (RES == 0 && ARG2 != 0)
4319      POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4320}
4321
4322PRE(sys_sethostname)
4323{
4324   PRINT("sys_sethostname ( %#lx, %ld )", ARG1,ARG2);
4325   PRE_REG_READ2(long, "sethostname", char *, name, int, len);
4326   PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
4327}
4328
4329#undef PRE
4330#undef POST
4331
4332#endif // defined(VGO_linux) || defined(VGO_darwin)
4333
4334/*--------------------------------------------------------------------*/
4335/*--- end                                                          ---*/
4336/*--------------------------------------------------------------------*/
4337