1
2/*--------------------------------------------------------------------*/
3/*--- Thread scheduling.                               scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2012 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31/*
32   Overview
33
34   Valgrind tries to emulate the kernel's threading as closely as
35   possible.  The client does all threading via the normal syscalls
36   (on Linux: clone, etc).  Valgrind emulates this by creating exactly
37   the same process structure as would be created without Valgrind.
38   There are no extra threads.
39
40   The main difference is that Valgrind only allows one client thread
41   to run at once.  This is controlled with the CPU Big Lock,
42   "the_BigLock".  Any time a thread wants to run client code or
43   manipulate any shared state (which is anything other than its own
44   ThreadState entry), it must hold the_BigLock.
45
46   When a thread is about to block in a blocking syscall, it releases
47   the_BigLock, and re-takes it when it becomes runnable again (either
48   because the syscall finished, or we took a signal).
49
50   VG_(scheduler) therefore runs in each thread.  It returns only when
51   the thread is exiting, either because it exited itself, or it was
52   told to exit by another thread.
53
54   This file is almost entirely OS-independent.  The details of how
55   the OS handles threading and signalling are abstracted away and
56   implemented elsewhere.  [Some of the functions have worked their
57   way back for the moment, until we do an OS port in earnest...]
58*/
59
60
61#include "pub_core_basics.h"
62#include "pub_core_debuglog.h"
63#include "pub_core_vki.h"
64#include "pub_core_vkiscnums.h"    // __NR_sched_yield
65#include "pub_core_libcsetjmp.h"   // to keep _threadstate.h happy
66#include "pub_core_threadstate.h"
67#include "pub_core_aspacemgr.h"
68#include "pub_core_clreq.h"         // for VG_USERREQ__*
69#include "pub_core_dispatch.h"
70#include "pub_core_errormgr.h"      // For VG_(get_n_errs_found)()
71#include "pub_core_gdbserver.h"     // for VG_(gdbserver) and VG_(gdbserver_activity)
72#include "pub_core_libcbase.h"
73#include "pub_core_libcassert.h"
74#include "pub_core_libcprint.h"
75#include "pub_core_libcproc.h"
76#include "pub_core_libcsignal.h"
77#if defined(VGO_darwin)
78#include "pub_core_mach.h"
79#endif
80#include "pub_core_machine.h"
81#include "pub_core_mallocfree.h"
82#include "pub_core_options.h"
83#include "pub_core_replacemalloc.h"
84#include "pub_core_signals.h"
85#include "pub_core_stacks.h"
86#include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
87#include "pub_core_syscall.h"
88#include "pub_core_syswrap.h"
89#include "pub_core_tooliface.h"
90#include "pub_core_translate.h"     // For VG_(translate)()
91#include "pub_core_transtab.h"
92#include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
93#include "priv_sched-lock.h"
94#include "pub_core_scheduler.h"     // self
95#include "pub_core_redir.h"
96
97
98/* ---------------------------------------------------------------------
99   Types and globals for the scheduler.
100   ------------------------------------------------------------------ */
101
102/* ThreadId and ThreadState are defined elsewhere*/
103
104/* Defines the thread-scheduling timeslice, in terms of the number of
105   basic blocks we attempt to run each thread for.  Smaller values
106   give finer interleaving but much increased scheduling overheads. */
107#define SCHEDULING_QUANTUM   100000
108
109/* If False, a fault is Valgrind-internal (ie, a bug) */
110Bool VG_(in_generated_code) = False;
111
112/* 64-bit counter for the number of basic blocks done. */
113static ULong bbs_done = 0;
114
115/* Counter to see if vgdb activity is to be verified.
116   When nr of bbs done reaches vgdb_next_poll, scheduler will
117   poll for gdbserver activity. VG_(force_vgdb_poll) and
118   VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
119   to control when the next poll will be done. */
120static ULong vgdb_next_poll;
121
122/* Forwards */
123static void do_client_request ( ThreadId tid );
124static void scheduler_sanity ( ThreadId tid );
125static void mostly_clear_thread_record ( ThreadId tid );
126
127/* Stats. */
128static ULong n_scheduling_events_MINOR = 0;
129static ULong n_scheduling_events_MAJOR = 0;
130
131/* Stats: number of XIndirs, and number that missed in the fast
132   cache. */
133static ULong stats__n_xindirs = 0;
134static ULong stats__n_xindir_misses = 0;
135
136/* And 32-bit temp bins for the above, so that 32-bit platforms don't
137   have to do 64 bit incs on the hot path through
138   VG_(cp_disp_xindir). */
139/*global*/ UInt VG_(stats__n_xindirs_32) = 0;
140/*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
141
142/* Sanity checking counts. */
143static UInt sanity_fast_count = 0;
144static UInt sanity_slow_count = 0;
145
146void VG_(print_scheduler_stats)(void)
147{
148   VG_(message)(Vg_DebugMsg,
149      "scheduler: %'llu event checks.\n", bbs_done );
150   VG_(message)(Vg_DebugMsg,
151                "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
152                stats__n_xindirs, stats__n_xindir_misses,
153                stats__n_xindirs / (stats__n_xindir_misses
154                                    ? stats__n_xindir_misses : 1));
155   VG_(message)(Vg_DebugMsg,
156      "scheduler: %'llu/%'llu major/minor sched events.\n",
157      n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
158   VG_(message)(Vg_DebugMsg,
159                "   sanity: %d cheap, %d expensive checks.\n",
160                sanity_fast_count, sanity_slow_count );
161}
162
163/*
164 * Mutual exclusion object used to serialize threads.
165 */
166static struct sched_lock *the_BigLock;
167
168
169/* ---------------------------------------------------------------------
170   Helper functions for the scheduler.
171   ------------------------------------------------------------------ */
172
173static
174void print_sched_event ( ThreadId tid, Char* what )
175{
176   VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
177}
178
179/* For showing SB counts, if the user asks to see them. */
180#define SHOW_SBCOUNT_EVERY (20ULL * 1000 * 1000)
181static ULong bbs_done_lastcheck = 0;
182
183static
184void maybe_show_sb_counts ( void )
185{
186   Long delta = bbs_done - bbs_done_lastcheck;
187   vg_assert(delta >= 0);
188   if (UNLIKELY(delta >= SHOW_SBCOUNT_EVERY)) {
189      VG_(umsg)("%'lld superblocks executed\n", bbs_done);
190      bbs_done_lastcheck = bbs_done;
191   }
192}
193
194static
195HChar* name_of_sched_event ( UInt event )
196{
197   switch (event) {
198      case VEX_TRC_JMP_TINVAL:         return "TINVAL";
199      case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
200      case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
201      case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
202      case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
203      case VEX_TRC_JMP_EMWARN:         return "EMWARN";
204      case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
205      case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
206      case VEX_TRC_JMP_YIELD:          return "YIELD";
207      case VEX_TRC_JMP_NODECODE:       return "NODECODE";
208      case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
209      case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
210      case VEX_TRC_JMP_SYS_INT32:      return "INT32";
211      case VEX_TRC_JMP_SYS_INT128:     return "INT128";
212      case VEX_TRC_JMP_SYS_INT129:     return "INT129";
213      case VEX_TRC_JMP_SYS_INT130:     return "INT130";
214      case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
215      case VEX_TRC_JMP_BORING:         return "VEX_BORING";
216
217      case VG_TRC_BORING:              return "VG_BORING";
218      case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
219      case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
220      case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
221      case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
222      case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
223      case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
224      default:                         return "??UNKNOWN??";
225  }
226}
227
228/* Allocate a completely empty ThreadState record. */
229ThreadId VG_(alloc_ThreadState) ( void )
230{
231   Int i;
232   for (i = 1; i < VG_N_THREADS; i++) {
233      if (VG_(threads)[i].status == VgTs_Empty) {
234	 VG_(threads)[i].status = VgTs_Init;
235	 VG_(threads)[i].exitreason = VgSrc_None;
236         return i;
237      }
238   }
239   VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
240   VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
241   VG_(core_panic)("VG_N_THREADS is too low");
242   /*NOTREACHED*/
243}
244
245/*
246   Mark a thread as Runnable.  This will block until the_BigLock is
247   available, so that we get exclusive access to all the shared
248   structures and the CPU.  Up until we get the_BigLock, we must not
249   touch any shared state.
250
251   When this returns, we'll actually be running.
252 */
253void VG_(acquire_BigLock)(ThreadId tid, HChar* who)
254{
255   ThreadState *tst;
256
257#if 0
258   if (VG_(clo_trace_sched)) {
259      HChar buf[100];
260      vg_assert(VG_(strlen)(who) <= 100-50);
261      VG_(sprintf)(buf, "waiting for lock (%s)", who);
262      print_sched_event(tid, buf);
263   }
264#endif
265
266   /* First, acquire the_BigLock.  We can't do anything else safely
267      prior to this point.  Even doing debug printing prior to this
268      point is, technically, wrong. */
269   VG_(acquire_BigLock_LL)(NULL);
270
271   tst = VG_(get_ThreadState)(tid);
272
273   vg_assert(tst->status != VgTs_Runnable);
274
275   tst->status = VgTs_Runnable;
276
277   if (VG_(running_tid) != VG_INVALID_THREADID)
278      VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
279   vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
280   VG_(running_tid) = tid;
281
282   { Addr gsp = VG_(get_SP)(tid);
283     VG_(unknown_SP_update)(gsp, gsp, 0/*unknown origin*/);
284   }
285
286   if (VG_(clo_trace_sched)) {
287      HChar buf[150];
288      vg_assert(VG_(strlen)(who) <= 150-50);
289      VG_(sprintf)(buf, " acquired lock (%s)", who);
290      print_sched_event(tid, buf);
291   }
292}
293
294/*
295   Set a thread into a sleeping state, and give up exclusive access to
296   the CPU.  On return, the thread must be prepared to block until it
297   is ready to run again (generally this means blocking in a syscall,
298   but it may mean that we remain in a Runnable state and we're just
299   yielding the CPU to another thread).
300 */
301void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate, HChar* who)
302{
303   ThreadState *tst = VG_(get_ThreadState)(tid);
304
305   vg_assert(tst->status == VgTs_Runnable);
306
307   vg_assert(sleepstate == VgTs_WaitSys ||
308	     sleepstate == VgTs_Yielding);
309
310   tst->status = sleepstate;
311
312   vg_assert(VG_(running_tid) == tid);
313   VG_(running_tid) = VG_INVALID_THREADID;
314
315   if (VG_(clo_trace_sched)) {
316      Char buf[200];
317      vg_assert(VG_(strlen)(who) <= 200-100);
318      VG_(sprintf)(buf, "releasing lock (%s) -> %s",
319                        who, VG_(name_of_ThreadStatus)(sleepstate));
320      print_sched_event(tid, buf);
321   }
322
323   /* Release the_BigLock; this will reschedule any runnable
324      thread. */
325   VG_(release_BigLock_LL)(NULL);
326}
327
328static void init_BigLock(void)
329{
330   vg_assert(!the_BigLock);
331   the_BigLock = ML_(create_sched_lock)();
332}
333
334static void deinit_BigLock(void)
335{
336   ML_(destroy_sched_lock)(the_BigLock);
337   the_BigLock = NULL;
338}
339
340/* See pub_core_scheduler.h for description */
341void VG_(acquire_BigLock_LL) ( HChar* who )
342{
343   ML_(acquire_sched_lock)(the_BigLock);
344}
345
346/* See pub_core_scheduler.h for description */
347void VG_(release_BigLock_LL) ( HChar* who )
348{
349   ML_(release_sched_lock)(the_BigLock);
350}
351
352Bool VG_(owns_BigLock_LL) ( ThreadId tid )
353{
354   return (ML_(get_sched_lock_owner)(the_BigLock)
355           == VG_(threads)[tid].os_state.lwpid);
356}
357
358
359/* Clear out the ThreadState and release the semaphore. Leaves the
360   ThreadState in VgTs_Zombie state, so that it doesn't get
361   reallocated until the caller is really ready. */
362void VG_(exit_thread)(ThreadId tid)
363{
364   vg_assert(VG_(is_valid_tid)(tid));
365   vg_assert(VG_(is_running_thread)(tid));
366   vg_assert(VG_(is_exiting)(tid));
367
368   mostly_clear_thread_record(tid);
369   VG_(running_tid) = VG_INVALID_THREADID;
370
371   /* There should still be a valid exitreason for this thread */
372   vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
373
374   if (VG_(clo_trace_sched))
375      print_sched_event(tid, "release lock in VG_(exit_thread)");
376
377   VG_(release_BigLock_LL)(NULL);
378}
379
380/* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
381   out of the syscall and onto doing the next thing, whatever that is.
382   If it isn't blocked in a syscall, has no effect on the thread. */
383void VG_(get_thread_out_of_syscall)(ThreadId tid)
384{
385   vg_assert(VG_(is_valid_tid)(tid));
386   vg_assert(!VG_(is_running_thread)(tid));
387
388   if (VG_(threads)[tid].status == VgTs_WaitSys) {
389      if (VG_(clo_trace_signals)) {
390	 VG_(message)(Vg_DebugMsg,
391                      "get_thread_out_of_syscall zaps tid %d lwp %d\n",
392		      tid, VG_(threads)[tid].os_state.lwpid);
393      }
394#     if defined(VGO_darwin)
395      {
396         // GrP fixme use mach primitives on darwin?
397         // GrP fixme thread_abort_safely?
398         // GrP fixme race for thread with WaitSys set but not in syscall yet?
399         extern kern_return_t thread_abort(mach_port_t);
400         thread_abort(VG_(threads)[tid].os_state.lwpid);
401      }
402#     else
403      {
404         __attribute__((unused))
405         Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
406         /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
407            I'm really not sure.  Here's a race scenario which argues
408            that we shoudn't; but equally I'm not sure the scenario is
409            even possible, because of constraints caused by the question
410            of who holds the BigLock when.
411
412            Target thread tid does sys_read on a socket and blocks.  This
413            function gets called, and we observe correctly that tid's
414            status is WaitSys but then for whatever reason this function
415            goes very slowly for a while.  Then data arrives from
416            wherever, tid's sys_read returns, tid exits.  Then we do
417            tkill on tid, but tid no longer exists; tkill returns an
418            error code and the assert fails. */
419         /* vg_assert(r == 0); */
420      }
421#     endif
422   }
423}
424
425/*
426   Yield the CPU for a short time to let some other thread run.
427 */
428void VG_(vg_yield)(void)
429{
430   ThreadId tid = VG_(running_tid);
431
432   vg_assert(tid != VG_INVALID_THREADID);
433   vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
434
435   VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
436
437   /*
438      Tell the kernel we're yielding.
439    */
440   VG_(do_syscall0)(__NR_sched_yield);
441
442   VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
443}
444
445
446/* Set the standard set of blocked signals, used whenever we're not
447   running a client syscall. */
448static void block_signals(void)
449{
450   vki_sigset_t mask;
451
452   VG_(sigfillset)(&mask);
453
454   /* Don't block these because they're synchronous */
455   VG_(sigdelset)(&mask, VKI_SIGSEGV);
456   VG_(sigdelset)(&mask, VKI_SIGBUS);
457   VG_(sigdelset)(&mask, VKI_SIGFPE);
458   VG_(sigdelset)(&mask, VKI_SIGILL);
459   VG_(sigdelset)(&mask, VKI_SIGTRAP);
460
461   /* Can't block these anyway */
462   VG_(sigdelset)(&mask, VKI_SIGSTOP);
463   VG_(sigdelset)(&mask, VKI_SIGKILL);
464
465   VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
466}
467
468static void os_state_clear(ThreadState *tst)
469{
470   tst->os_state.lwpid       = 0;
471   tst->os_state.threadgroup = 0;
472#  if defined(VGO_linux)
473   /* no other fields to clear */
474#  elif defined(VGO_darwin)
475   tst->os_state.post_mach_trap_fn = NULL;
476   tst->os_state.pthread           = 0;
477   tst->os_state.func_arg          = 0;
478   VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
479   VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
480   tst->os_state.wq_jmpbuf_valid   = False;
481   tst->os_state.remote_port       = 0;
482   tst->os_state.msgh_id           = 0;
483   VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
484#  else
485#    error "Unknown OS"
486#  endif
487}
488
489static void os_state_init(ThreadState *tst)
490{
491   tst->os_state.valgrind_stack_base    = 0;
492   tst->os_state.valgrind_stack_init_SP = 0;
493   os_state_clear(tst);
494}
495
496static
497void mostly_clear_thread_record ( ThreadId tid )
498{
499   vki_sigset_t savedmask;
500
501   vg_assert(tid >= 0 && tid < VG_N_THREADS);
502   VG_(cleanup_thread)(&VG_(threads)[tid].arch);
503   VG_(threads)[tid].tid = tid;
504
505   /* Leave the thread in Zombie, so that it doesn't get reallocated
506      until the caller is finally done with the thread stack. */
507   VG_(threads)[tid].status               = VgTs_Zombie;
508
509   VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
510   VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
511
512   os_state_clear(&VG_(threads)[tid]);
513
514   /* start with no altstack */
515   VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
516   VG_(threads)[tid].altstack.ss_size = 0;
517   VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
518
519   VG_(clear_out_queued_signals)(tid, &savedmask);
520
521   VG_(threads)[tid].sched_jmpbuf_valid = False;
522}
523
524/*
525   Called in the child after fork.  If the parent has multiple
526   threads, then we've inherited a VG_(threads) array describing them,
527   but only the thread which called fork() is actually alive in the
528   child.  This functions needs to clean up all those other thread
529   structures.
530
531   Whichever tid in the parent which called fork() becomes the
532   master_tid in the child.  That's because the only living slot in
533   VG_(threads) in the child after fork is VG_(threads)[tid], and it
534   would be too hard to try to re-number the thread and relocate the
535   thread state down to VG_(threads)[1].
536
537   This function also needs to reinitialize the_BigLock, since
538   otherwise we may end up sharing its state with the parent, which
539   would be deeply confusing.
540*/
541static void sched_fork_cleanup(ThreadId me)
542{
543   ThreadId tid;
544   vg_assert(VG_(running_tid) == me);
545
546#  if defined(VGO_darwin)
547   // GrP fixme hack reset Mach ports
548   VG_(mach_init)();
549#  endif
550
551   VG_(threads)[me].os_state.lwpid = VG_(gettid)();
552   VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
553
554   /* clear out all the unused thread slots */
555   for (tid = 1; tid < VG_N_THREADS; tid++) {
556      if (tid != me) {
557         mostly_clear_thread_record(tid);
558	 VG_(threads)[tid].status = VgTs_Empty;
559         VG_(clear_syscallInfo)(tid);
560      }
561   }
562
563   /* re-init and take the sema */
564   deinit_BigLock();
565   init_BigLock();
566   VG_(acquire_BigLock_LL)(NULL);
567}
568
569
570/* First phase of initialisation of the scheduler.  Initialise the
571   bigLock, zeroise the VG_(threads) structure and decide on the
572   ThreadId of the root thread.
573*/
574ThreadId VG_(scheduler_init_phase1) ( void )
575{
576   Int i;
577   ThreadId tid_main;
578
579   VG_(debugLog)(1,"sched","sched_init_phase1\n");
580
581   if (VG_(clo_fair_sched) != disable_fair_sched
582       && !ML_(set_sched_lock_impl)(sched_lock_ticket)
583       && VG_(clo_fair_sched) == enable_fair_sched)
584   {
585      VG_(printf)("Error: fair scheduling is not supported on this system.\n");
586      VG_(exit)(1);
587   }
588
589   if (VG_(clo_verbosity) > 1) {
590      VG_(message)(Vg_DebugMsg,
591                   "Scheduler: using %s scheduler lock implementation.\n",
592                   ML_(get_sched_lock_name)());
593   }
594
595   init_BigLock();
596
597   for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
598      /* Paranoia .. completely zero it out. */
599      VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
600
601      VG_(threads)[i].sig_queue = NULL;
602
603      os_state_init(&VG_(threads)[i]);
604      mostly_clear_thread_record(i);
605
606      VG_(threads)[i].status                    = VgTs_Empty;
607      VG_(threads)[i].client_stack_szB          = 0;
608      VG_(threads)[i].client_stack_highest_word = (Addr)NULL;
609      VG_(threads)[i].err_disablement_level     = 0;
610   }
611
612   tid_main = VG_(alloc_ThreadState)();
613
614   /* Bleh.  Unfortunately there are various places in the system that
615      assume that the main thread has a ThreadId of 1.
616      - Helgrind (possibly)
617      - stack overflow message in default_action() in m_signals.c
618      - definitely a lot more places
619   */
620   vg_assert(tid_main == 1);
621
622   return tid_main;
623}
624
625
626/* Second phase of initialisation of the scheduler.  Given the root
627   ThreadId computed by first phase of initialisation, fill in stack
628   details and acquire bigLock.  Initialise the scheduler.  This is
629   called at startup.  The caller subsequently initialises the guest
630   state components of this main thread.
631*/
632void VG_(scheduler_init_phase2) ( ThreadId tid_main,
633                                  Addr     clstack_end,
634                                  SizeT    clstack_size )
635{
636   VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
637                   "cls_end=0x%lx, cls_sz=%ld\n",
638                   tid_main, clstack_end, clstack_size);
639
640   vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
641   vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
642
643   VG_(threads)[tid_main].client_stack_highest_word
644      = clstack_end + 1 - sizeof(UWord);
645   VG_(threads)[tid_main].client_stack_szB
646      = clstack_size;
647
648   VG_(atfork)(NULL, NULL, sched_fork_cleanup);
649}
650
651
652/* ---------------------------------------------------------------------
653   Helpers for running translations.
654   ------------------------------------------------------------------ */
655
656/* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
657   mask state, but does need to pass "val" through.  jumped must be a
658   volatile UWord. */
659#define SCHEDSETJMP(tid, jumped, stmt)					\
660   do {									\
661      ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
662									\
663      (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
664      if ((jumped) == ((UWord)0)) {                                     \
665	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
666	 _qq_tst->sched_jmpbuf_valid = True;				\
667	 stmt;								\
668      }	else if (VG_(clo_trace_sched))					\
669	 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n",       \
670                     __LINE__, tid, jumped);                            \
671      vg_assert(_qq_tst->sched_jmpbuf_valid);				\
672      _qq_tst->sched_jmpbuf_valid = False;				\
673   } while(0)
674
675
676/* Do various guest state alignment checks prior to running a thread.
677   Specifically, check that what we have matches Vex's guest state
678   layout requirements.  See libvex.h for details, but in short the
679   requirements are: There must be no holes in between the primary
680   guest state, its two copies, and the spill area.  In short, all 4
681   areas must have a 16-aligned size and be 16-aligned, and placed
682   back-to-back. */
683static void do_pre_run_checks ( ThreadState* tst )
684{
685   Addr a_vex     = (Addr) & tst->arch.vex;
686   Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
687   Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
688   Addr a_spill   = (Addr) & tst->arch.vex_spill;
689   UInt sz_vex    = (UInt) sizeof tst->arch.vex;
690   UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
691   UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
692   UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
693
694   if (0)
695   VG_(printf)("gst %p %d, sh1 %p %d, "
696               "sh2 %p %d, spill %p %d\n",
697               (void*)a_vex, sz_vex,
698               (void*)a_vexsh1, sz_vexsh1,
699               (void*)a_vexsh2, sz_vexsh2,
700               (void*)a_spill, sz_spill );
701
702   vg_assert(VG_IS_16_ALIGNED(sz_vex));
703   vg_assert(VG_IS_16_ALIGNED(sz_vexsh1));
704   vg_assert(VG_IS_16_ALIGNED(sz_vexsh2));
705   vg_assert(VG_IS_16_ALIGNED(sz_spill));
706
707   vg_assert(VG_IS_16_ALIGNED(a_vex));
708   vg_assert(VG_IS_16_ALIGNED(a_vexsh1));
709   vg_assert(VG_IS_16_ALIGNED(a_vexsh2));
710   vg_assert(VG_IS_16_ALIGNED(a_spill));
711
712   /* Check that the guest state and its two shadows have the same
713      size, and that there are no holes in between.  The latter is
714      important because Memcheck assumes that it can reliably access
715      the shadows by indexing off a pointer to the start of the
716      primary guest state area. */
717   vg_assert(sz_vex == sz_vexsh1);
718   vg_assert(sz_vex == sz_vexsh2);
719   vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
720   vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
721   /* Also check there's no hole between the second shadow area and
722      the spill area. */
723   vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
724   vg_assert(a_vex + 3 * sz_vex == a_spill);
725
726#  if defined(VGA_x86)
727   /* x86 XMM regs must form an array, ie, have no holes in
728      between. */
729   vg_assert(
730      (offsetof(VexGuestX86State,guest_XMM7)
731       - offsetof(VexGuestX86State,guest_XMM0))
732      == (8/*#regs*/-1) * 16/*bytes per reg*/
733   );
734   vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
735   vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
736   vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
737   vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
738   vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
739#  endif
740
741#  if defined(VGA_amd64)
742   /* amd64 YMM regs must form an array, ie, have no holes in
743      between. */
744   vg_assert(
745      (offsetof(VexGuestAMD64State,guest_YMM16)
746       - offsetof(VexGuestAMD64State,guest_YMM0))
747      == (17/*#regs*/-1) * 32/*bytes per reg*/
748   );
749   vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
750   vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
751   vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
752   vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
753   vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
754#  endif
755
756#  if defined(VGA_ppc32) || defined(VGA_ppc64)
757   /* ppc guest_state vector regs must be 16 byte aligned for
758      loads/stores.  This is important! */
759   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
760   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
761   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
762   /* be extra paranoid .. */
763   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
764   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
765   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
766#  endif
767
768#  if defined(VGA_arm)
769   /* arm guest_state VFP regs must be 8 byte aligned for
770      loads/stores.  Let's use 16 just to be on the safe side. */
771   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
772   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
773   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
774   /* be extra paranoid .. */
775   vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
776   vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
777   vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
778#  endif
779
780#  if defined(VGA_s390x)
781   /* no special requirements */
782#  endif
783
784#  if defined(VGA_mips32)
785  /* no special requirements */
786#  endif
787}
788
789// NO_VGDB_POLL value ensures vgdb is not polled, while
790// VGDB_POLL_ASAP ensures that the next scheduler call
791// will cause a poll.
792#define NO_VGDB_POLL    0xffffffffffffffffULL
793#define VGDB_POLL_ASAP  0x0ULL
794
795void VG_(disable_vgdb_poll) (void )
796{
797   vgdb_next_poll = NO_VGDB_POLL;
798}
799void VG_(force_vgdb_poll) ( void )
800{
801   vgdb_next_poll = VGDB_POLL_ASAP;
802}
803
804/* Run the thread tid for a while, and return a VG_TRC_* value
805   indicating why VG_(disp_run_translations) stopped, and possibly an
806   auxiliary word.  Also, only allow the thread to run for at most
807   *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
808   is False, we are running ordinary redir'd translations, and we
809   should therefore start by looking up the guest next IP in TT.  If
810   it is True then we ignore the guest next IP and just run from
811   alt_host_addr, which presumably points at host code for a no-redir
812   translation.
813
814   Return results are placed in two_words.  two_words[0] is set to the
815   TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
816   the address to patch is placed in two_words[1].
817*/
818static
819void run_thread_for_a_while ( /*OUT*/HWord* two_words,
820                              /*MOD*/Int*   dispatchCtrP,
821                              ThreadId      tid,
822                              HWord         alt_host_addr,
823                              Bool          use_alt_host_addr )
824{
825   volatile HWord        jumped         = 0;
826   volatile ThreadState* tst            = NULL; /* stop gcc complaining */
827   volatile Int          done_this_time = 0;
828   volatile HWord        host_code_addr = 0;
829
830   /* Paranoia */
831   vg_assert(VG_(is_valid_tid)(tid));
832   vg_assert(VG_(is_running_thread)(tid));
833   vg_assert(!VG_(is_exiting)(tid));
834   vg_assert(*dispatchCtrP > 0);
835
836   tst = VG_(get_ThreadState)(tid);
837   do_pre_run_checks( (ThreadState*)tst );
838   /* end Paranoia */
839
840   /* Futz with the XIndir stats counters. */
841   vg_assert(VG_(stats__n_xindirs_32) == 0);
842   vg_assert(VG_(stats__n_xindir_misses_32) == 0);
843
844   /* Clear return area. */
845   two_words[0] = two_words[1] = 0;
846
847   /* Figure out where we're starting from. */
848   if (use_alt_host_addr) {
849      /* unusual case -- no-redir translation */
850      host_code_addr = alt_host_addr;
851   } else {
852      /* normal case -- redir translation */
853      UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
854      if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
855         host_code_addr = VG_(tt_fast)[cno].host;
856      else {
857         AddrH res   = 0;
858         /* not found in VG_(tt_fast). Searching here the transtab
859            improves the performance compared to returning directly
860            to the scheduler. */
861         Bool  found = VG_(search_transtab)(&res, NULL, NULL,
862                                            (Addr)tst->arch.vex.VG_INSTR_PTR,
863                                            True/*upd cache*/
864                                            );
865         if (LIKELY(found)) {
866            host_code_addr = res;
867         } else {
868            /* At this point, we know that we intended to start at a
869               normal redir translation, but it was not found.  In
870               which case we can return now claiming it's not
871               findable. */
872            two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
873            return;
874         }
875      }
876   }
877   /* We have either a no-redir or a redir translation. */
878   vg_assert(host_code_addr != 0); /* implausible */
879
880   /* there should be no undealt-with signals */
881   //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
882
883   /* Set up event counter stuff for the run. */
884   tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
885   tst->arch.vex.host_EvC_FAILADDR
886      = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
887
888   if (0) {
889      vki_sigset_t m;
890      Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
891      vg_assert(err == 0);
892      VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
893      for (i = 1; i <= _VKI_NSIG; i++)
894         if (!VG_(sigismember)(&m, i))
895            VG_(printf)("%d ", i);
896      VG_(printf)("\n");
897   }
898
899   /* Set up return-value area. */
900
901   // Tell the tool this thread is about to run client code
902   VG_TRACK( start_client_code, tid, bbs_done );
903
904   vg_assert(VG_(in_generated_code) == False);
905   VG_(in_generated_code) = True;
906
907   SCHEDSETJMP(
908      tid,
909      jumped,
910      VG_(disp_run_translations)(
911         two_words,
912         (void*)&tst->arch.vex,
913         host_code_addr
914      )
915   );
916
917   vg_assert(VG_(in_generated_code) == True);
918   VG_(in_generated_code) = False;
919
920   if (jumped != (HWord)0) {
921      /* We get here if the client took a fault that caused our signal
922         handler to longjmp. */
923      vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
924      two_words[0] = VG_TRC_FAULT_SIGNAL;
925      two_words[1] = 0;
926      block_signals();
927   }
928
929   /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
930      and zero out the 32-bit ones in preparation for the next run of
931      generated code. */
932   stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
933   VG_(stats__n_xindirs_32) = 0;
934   stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
935   VG_(stats__n_xindir_misses_32) = 0;
936
937   /* Inspect the event counter. */
938   vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
939   vg_assert(tst->arch.vex.host_EvC_FAILADDR
940             == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
941
942   done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1);
943
944   vg_assert(done_this_time >= 0);
945   bbs_done += (ULong)done_this_time;
946
947   *dispatchCtrP -= done_this_time;
948   vg_assert(*dispatchCtrP >= 0);
949
950   // Tell the tool this thread has stopped running client code
951   VG_TRACK( stop_client_code, tid, bbs_done );
952
953   if (bbs_done >= vgdb_next_poll) {
954      if (VG_(clo_vgdb_poll))
955         vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
956      else
957         /* value was changed due to gdbserver invocation via ptrace */
958         vgdb_next_poll = NO_VGDB_POLL;
959      if (VG_(gdbserver_activity) (tid))
960         VG_(gdbserver) (tid);
961   }
962
963   /* TRC value and possible auxiliary patch-address word are already
964      in two_words[0] and [1] respectively, as a result of the call to
965      VG_(run_innerloop). */
966   /* Stay sane .. */
967   if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
968       || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
969      vg_assert(two_words[1] != 0); /* we have a legit patch addr */
970   } else {
971      vg_assert(two_words[1] == 0); /* nobody messed with it */
972   }
973}
974
975
976/* ---------------------------------------------------------------------
977   The scheduler proper.
978   ------------------------------------------------------------------ */
979
980static void handle_tt_miss ( ThreadId tid )
981{
982   Bool found;
983   Addr ip = VG_(get_IP)(tid);
984
985   /* Trivial event.  Miss in the fast-cache.  Do a full
986      lookup for it. */
987   found = VG_(search_transtab)( NULL, NULL, NULL,
988                                 ip, True/*upd_fast_cache*/ );
989   if (UNLIKELY(!found)) {
990      /* Not found; we need to request a translation. */
991      if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
992                          bbs_done, True/*allow redirection*/ )) {
993         found = VG_(search_transtab)( NULL, NULL, NULL,
994                                       ip, True );
995         vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
996
997      } else {
998	 // If VG_(translate)() fails, it's because it had to throw a
999	 // signal because the client jumped to a bad address.  That
1000	 // means that either a signal has been set up for delivery,
1001	 // or the thread has been marked for termination.  Either
1002	 // way, we just need to go back into the scheduler loop.
1003      }
1004   }
1005}
1006
1007static
1008void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1009{
1010   Bool found          = False;
1011   Addr ip             = VG_(get_IP)(tid);
1012   UInt to_sNo         = (UInt)-1;
1013   UInt to_tteNo       = (UInt)-1;
1014
1015   found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1016                                 ip, False/*dont_upd_fast_cache*/ );
1017   if (!found) {
1018      /* Not found; we need to request a translation. */
1019      if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1020                          bbs_done, True/*allow redirection*/ )) {
1021         found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1022                                       ip, False );
1023         vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1024      } else {
1025	 // If VG_(translate)() fails, it's because it had to throw a
1026	 // signal because the client jumped to a bad address.  That
1027	 // means that either a signal has been set up for delivery,
1028	 // or the thread has been marked for termination.  Either
1029	 // way, we just need to go back into the scheduler loop.
1030        return;
1031      }
1032   }
1033   vg_assert(found);
1034   vg_assert(to_sNo != -1);
1035   vg_assert(to_tteNo != -1);
1036
1037   /* So, finally we know where to patch through to.  Do the patching
1038      and update the various admin tables that allow it to be undone
1039      in the case that the destination block gets deleted. */
1040   VG_(tt_tc_do_chaining)( place_to_chain,
1041                           to_sNo, to_tteNo, toFastEP );
1042}
1043
1044static void handle_syscall(ThreadId tid, UInt trc)
1045{
1046   ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1047   volatile UWord jumped;
1048
1049   /* Syscall may or may not block; either way, it will be
1050      complete by the time this call returns, and we'll be
1051      runnable again.  We could take a signal while the
1052      syscall runs. */
1053
1054   if (VG_(clo_sanity_level >= 3))
1055      VG_(am_do_sync_check)("(BEFORE SYSCALL)",__FILE__,__LINE__);
1056
1057   SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1058
1059   if (VG_(clo_sanity_level >= 3))
1060      VG_(am_do_sync_check)("(AFTER SYSCALL)",__FILE__,__LINE__);
1061
1062   if (!VG_(is_running_thread)(tid))
1063      VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
1064		  tid, VG_(running_tid), tid, tst->status);
1065   vg_assert(VG_(is_running_thread)(tid));
1066
1067   if (jumped != (UWord)0) {
1068      block_signals();
1069      VG_(poll_signals)(tid);
1070   }
1071}
1072
1073/* tid just requested a jump to the noredir version of its current
1074   program counter.  So make up that translation if needed, run it,
1075   and return the resulting thread return code in two_words[]. */
1076static
1077void handle_noredir_jump ( /*OUT*/HWord* two_words,
1078                           /*MOD*/Int*   dispatchCtrP,
1079                           ThreadId tid )
1080{
1081   /* Clear return area. */
1082   two_words[0] = two_words[1] = 0;
1083
1084   AddrH hcode = 0;
1085   Addr  ip    = VG_(get_IP)(tid);
1086
1087   Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1088   if (!found) {
1089      /* Not found; we need to request a translation. */
1090      if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1091                          False/*NO REDIRECTION*/ )) {
1092
1093         found = VG_(search_unredir_transtab)( &hcode, ip );
1094         vg_assert2(found, "unredir translation missing after creation?!");
1095      } else {
1096	 // If VG_(translate)() fails, it's because it had to throw a
1097	 // signal because the client jumped to a bad address.  That
1098	 // means that either a signal has been set up for delivery,
1099	 // or the thread has been marked for termination.  Either
1100	 // way, we just need to go back into the scheduler loop.
1101         two_words[0] = VG_TRC_BORING;
1102         return;
1103      }
1104
1105   }
1106
1107   vg_assert(found);
1108   vg_assert(hcode != 0);
1109
1110   /* Otherwise run it and return the resulting VG_TRC_* value. */
1111   vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1112   run_thread_for_a_while( two_words, dispatchCtrP, tid,
1113                           hcode, True/*use hcode*/ );
1114}
1115
1116
1117/*
1118   Run a thread until it wants to exit.
1119
1120   We assume that the caller has already called VG_(acquire_BigLock) for
1121   us, so we own the VCPU.  Also, all signals are blocked.
1122 */
1123VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1124{
1125   /* Holds the remaining size of this thread's "timeslice". */
1126   Int dispatch_ctr = 0;
1127
1128   ThreadState *tst = VG_(get_ThreadState)(tid);
1129   static Bool vgdb_startup_action_done = False;
1130
1131   if (VG_(clo_trace_sched))
1132      print_sched_event(tid, "entering VG_(scheduler)");
1133
1134   /* Do vgdb initialization (but once). Only the first (main) task
1135      starting up will do the below.
1136      Initialize gdbserver earlier than at the first
1137      thread VG_(scheduler) is causing problems:
1138      * at the end of VG_(scheduler_init_phase2) :
1139        The main thread is in VgTs_Init state, but in a not yet
1140        consistent state => the thread cannot be reported to gdb
1141        (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1142        back the guest registers to gdb).
1143      * at end of valgrind_main, just
1144        before VG_(main_thread_wrapper_NORETURN)(1) :
1145        The main thread is still in VgTs_Init state but in a
1146        more advanced state. However, the thread state is not yet
1147        completely initialized : a.o., the os_state is not yet fully
1148        set => the thread is then not properly reported to gdb,
1149        which is then confused (causing e.g. a duplicate thread be
1150        shown, without thread id).
1151      * it would be possible to initialize gdbserver "lower" in the
1152        call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1153        these are platform dependent and the place at which
1154        the thread state is completely initialized is not
1155        specific anymore to the main thread (so a similar "do it only
1156        once" would be needed).
1157
1158        => a "once only" initialization here is the best compromise. */
1159   if (!vgdb_startup_action_done) {
1160      vg_assert(tid == 1); // it must be the main thread.
1161      vgdb_startup_action_done = True;
1162      if (VG_(clo_vgdb) != Vg_VgdbNo) {
1163         /* If we have to poll, ensures we do an initial poll at first
1164            scheduler call. Otherwise, ensure no poll (unless interrupted
1165            by ptrace). */
1166         if (VG_(clo_vgdb_poll))
1167            VG_(force_vgdb_poll) ();
1168         else
1169            VG_(disable_vgdb_poll) ();
1170
1171         vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1172         /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1173            changed yet. */
1174
1175         VG_(gdbserver_prerun_action) (1);
1176      } else {
1177         VG_(disable_vgdb_poll) ();
1178      }
1179   }
1180
1181   /* set the proper running signal mask */
1182   block_signals();
1183
1184   vg_assert(VG_(is_running_thread)(tid));
1185
1186   dispatch_ctr = SCHEDULING_QUANTUM;
1187
1188   while (!VG_(is_exiting)(tid)) {
1189
1190      vg_assert(dispatch_ctr >= 0);
1191      if (dispatch_ctr == 0) {
1192
1193	 /* Our slice is done, so yield the CPU to another thread.  On
1194            Linux, this doesn't sleep between sleeping and running,
1195            since that would take too much time. */
1196
1197	 /* 4 July 06: it seems that a zero-length nsleep is needed to
1198            cause async thread cancellation (canceller.c) to terminate
1199            in finite time; else it is in some kind of race/starvation
1200            situation and completion is arbitrarily delayed (although
1201            this is not a deadlock).
1202
1203            Unfortunately these sleeps cause MPI jobs not to terminate
1204            sometimes (some kind of livelock).  So sleeping once
1205            every N opportunities appears to work. */
1206
1207	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1208            sys_yield also helps the problem, whilst not crashing apps. */
1209
1210	 VG_(release_BigLock)(tid, VgTs_Yielding,
1211                                   "VG_(scheduler):timeslice");
1212	 /* ------------ now we don't have The Lock ------------ */
1213
1214	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1215	 /* ------------ now we do have The Lock ------------ */
1216
1217	 /* OK, do some relatively expensive housekeeping stuff */
1218	 scheduler_sanity(tid);
1219	 VG_(sanity_check_general)(False);
1220
1221	 /* Look for any pending signals for this thread, and set them up
1222	    for delivery */
1223	 VG_(poll_signals)(tid);
1224
1225	 if (VG_(is_exiting)(tid))
1226	    break;		/* poll_signals picked up a fatal signal */
1227
1228	 /* For stats purposes only. */
1229	 n_scheduling_events_MAJOR++;
1230
1231	 /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
1232	    that it decrements the counter before testing it for zero, so
1233	    that if tst->dispatch_ctr is set to N you get at most N-1
1234	    iterations.  Also this means that tst->dispatch_ctr must
1235	    exceed zero before entering the innerloop.  Also also, the
1236	    decrement is done before the bb is actually run, so you
1237	    always get at least one decrement even if nothing happens. */
1238         // FIXME is this right?
1239         dispatch_ctr = SCHEDULING_QUANTUM;
1240
1241	 /* paranoia ... */
1242	 vg_assert(tst->tid == tid);
1243	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1244      }
1245
1246      /* For stats purposes only. */
1247      n_scheduling_events_MINOR++;
1248
1249      if (0)
1250         VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
1251                                   tid, dispatch_ctr - 1 );
1252
1253      HWord trc[2]; /* "two_words" */
1254      run_thread_for_a_while( &trc[0],
1255                              &dispatch_ctr,
1256                              tid, 0/*ignored*/, False );
1257
1258      if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1259	 HChar buf[50];
1260	 VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc[0]));
1261	 print_sched_event(tid, buf);
1262      }
1263
1264      if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1265         /* If we got a request to run a no-redir version of
1266            something, do so now -- handle_noredir_jump just (creates
1267            and) runs that one translation.  The flip side is that the
1268            noredir translation can't itself return another noredir
1269            request -- that would be nonsensical.  It can, however,
1270            return VG_TRC_BORING, which just means keep going as
1271            normal. */
1272         /* Note that the fact that we need to continue with a
1273            no-redir jump is not recorded anywhere else in this
1274            thread's state.  So we *must* execute the block right now
1275            -- we can't fail to execute it and later resume with it,
1276            because by then we'll have forgotten the fact that it
1277            should be run as no-redir, but will get run as a normal
1278            potentially-redir'd, hence screwing up.  This really ought
1279            to be cleaned up, by noting in the guest state that the
1280            next block to be executed should be no-redir.  Then we can
1281            suspend and resume at any point, which isn't the case at
1282            the moment. */
1283         handle_noredir_jump( &trc[0],
1284                              &dispatch_ctr,
1285                              tid );
1286         vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1287
1288         /* This can't be allowed to happen, since it means the block
1289            didn't execute, and we have no way to resume-as-noredir
1290            after we get more timeslice.  But I don't think it ever
1291            can, since handle_noredir_jump will assert if the counter
1292            is zero on entry. */
1293         vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1294
1295         /* A no-redir translation can't return with a chain-me
1296            request, since chaining in the no-redir cache is too
1297            complex. */
1298         vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1299                   && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1300      }
1301
1302      switch (trc[0]) {
1303      case VEX_TRC_JMP_BORING:
1304         /* assisted dispatch, no event.  Used by no-redir
1305            translations to force return to the scheduler. */
1306      case VG_TRC_BORING:
1307         /* no special event, just keep going. */
1308         break;
1309
1310      case VG_TRC_INNER_FASTMISS:
1311	 vg_assert(dispatch_ctr > 0);
1312	 handle_tt_miss(tid);
1313	 break;
1314
1315      case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1316         if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1317         handle_chain_me(tid, (void*)trc[1], False);
1318         break;
1319      }
1320
1321      case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1322         if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1323         handle_chain_me(tid, (void*)trc[1], True);
1324         break;
1325      }
1326
1327      case VEX_TRC_JMP_CLIENTREQ:
1328	 do_client_request(tid);
1329	 break;
1330
1331      case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1332      case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1333      case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1334      case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
1335	 handle_syscall(tid, trc[0]);
1336	 if (VG_(clo_sanity_level) > 2)
1337	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
1338	 break;
1339
1340      case VEX_TRC_JMP_YIELD:
1341	 /* Explicit yield, because this thread is in a spin-lock
1342	    or something.  Only let the thread run for a short while
1343            longer.  Because swapping to another thread is expensive,
1344            we're prepared to let this thread eat a little more CPU
1345            before swapping to another.  That means that short term
1346            spins waiting for hardware to poke memory won't cause a
1347            thread swap. */
1348	 if (dispatch_ctr > 2000)
1349            dispatch_ctr = 2000;
1350	 break;
1351
1352      case VG_TRC_INNER_COUNTERZERO:
1353	 /* Timeslice is out.  Let a new thread be scheduled. */
1354	 vg_assert(dispatch_ctr == 0);
1355	 break;
1356
1357      case VG_TRC_FAULT_SIGNAL:
1358	 /* Everything should be set up (either we're exiting, or
1359	    about to start in a signal handler). */
1360	 break;
1361
1362      case VEX_TRC_JMP_MAPFAIL:
1363         /* Failure of arch-specific address translation (x86/amd64
1364            segment override use) */
1365         /* jrs 2005 03 11: is this correct? */
1366         VG_(synth_fault)(tid);
1367         break;
1368
1369      case VEX_TRC_JMP_EMWARN: {
1370         static Int  counts[EmWarn_NUMBER];
1371         static Bool counts_initted = False;
1372         VexEmWarn ew;
1373         HChar*    what;
1374         Bool      show;
1375         Int       q;
1376         if (!counts_initted) {
1377            counts_initted = True;
1378            for (q = 0; q < EmWarn_NUMBER; q++)
1379               counts[q] = 0;
1380         }
1381         ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
1382         what = (ew < 0 || ew >= EmWarn_NUMBER)
1383                   ? "unknown (?!)"
1384                   : LibVEX_EmWarn_string(ew);
1385         show = (ew < 0 || ew >= EmWarn_NUMBER)
1386                   ? True
1387                   : counts[ew]++ < 3;
1388         if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1389            VG_(message)( Vg_UserMsg,
1390                          "Emulation warning: unsupported action:\n");
1391            VG_(message)( Vg_UserMsg, "  %s\n", what);
1392            VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1393         }
1394         break;
1395      }
1396
1397      case VEX_TRC_JMP_EMFAIL: {
1398         VexEmWarn ew;
1399         HChar*    what;
1400         ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
1401         what = (ew < 0 || ew >= EmWarn_NUMBER)
1402                   ? "unknown (?!)"
1403                   : LibVEX_EmWarn_string(ew);
1404         VG_(message)( Vg_UserMsg,
1405                       "Emulation fatal error -- Valgrind cannot continue:\n");
1406         VG_(message)( Vg_UserMsg, "  %s\n", what);
1407         VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1408         VG_(message)(Vg_UserMsg, "\n");
1409         VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1410         VG_(message)(Vg_UserMsg, "\n");
1411         VG_(exit)(1);
1412         break;
1413      }
1414
1415      case VEX_TRC_JMP_SIGTRAP:
1416         VG_(synth_sigtrap)(tid);
1417         break;
1418
1419      case VEX_TRC_JMP_SIGSEGV:
1420         VG_(synth_fault)(tid);
1421         break;
1422
1423      case VEX_TRC_JMP_SIGBUS:
1424         VG_(synth_sigbus)(tid);
1425         break;
1426
1427      case VEX_TRC_JMP_NODECODE: {
1428         Addr addr = VG_(get_IP)(tid);
1429
1430         VG_(umsg)(
1431            "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1432         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1433#define M(a) VG_(umsg)(a "\n");
1434   M("Your program just tried to execute an instruction that Valgrind" );
1435   M("did not recognise.  There are two possible reasons for this."    );
1436   M("1. Your program has a bug and erroneously jumped to a non-code"  );
1437   M("   location.  If you are running Memcheck and you just saw a"    );
1438   M("   warning about a bad jump, it's probably your program's fault.");
1439   M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1440   M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1441   M("   you are not sure, please let us know and we'll try to fix it.");
1442   M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1443   M("probably kill your program."                                     );
1444#undef M
1445
1446#if defined(VGA_s390x)
1447         /* Now that the complaint is out we need to adjust the guest_IA. The
1448            reason is that -- after raising the exception -- execution will
1449            continue with the insn that follows the invalid insn. As the first
1450            2 bits of the invalid insn determine its length in the usual way,
1451            we can compute the address of the next insn here and adjust the
1452            guest_IA accordingly. This adjustment is essential and tested by
1453            none/tests/s390x/op_exception.c (which would loop forever
1454            otherwise) */
1455         UChar byte = ((UChar *)addr)[0];
1456         UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1457         Addr  next_insn_addr = addr + insn_length;
1458
1459         VG_(set_IP)(tid, next_insn_addr);
1460#endif
1461         VG_(synth_sigill)(tid, addr);
1462         break;
1463      }
1464      case VEX_TRC_JMP_TINVAL:
1465         VG_(discard_translations)(
1466            (Addr64)VG_(threads)[tid].arch.vex.guest_TISTART,
1467            VG_(threads)[tid].arch.vex.guest_TILEN,
1468            "scheduler(VEX_TRC_JMP_TINVAL)"
1469         );
1470         if (0)
1471            VG_(printf)("dump translations done.\n");
1472         break;
1473
1474      case VG_TRC_INVARIANT_FAILED:
1475         /* This typically happens if, after running generated code,
1476            it is detected that host CPU settings (eg, FPU/Vector
1477            control words) are not as they should be.  Vex's code
1478            generation specifies the state such control words should
1479            be in on entry to Vex-generated code, and they should be
1480            unchanged on exit from it.  Failure of this assertion
1481            usually means a bug in Vex's code generation. */
1482         //{ UInt xx;
1483         //  __asm__ __volatile__ (
1484         //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1485         //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1486         //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1487         //}
1488         vg_assert2(0, "VG_(scheduler), phase 3: "
1489                       "run_innerloop detected host "
1490                       "state invariant failure", trc);
1491
1492      case VEX_TRC_JMP_SYS_SYSENTER:
1493         /* Do whatever simulation is appropriate for an x86 sysenter
1494            instruction.  Note that it is critical to set this thread's
1495            guest_EIP to point at the code to execute after the
1496            sysenter, since Vex-generated code will not have set it --
1497            vex does not know what it should be.  Vex sets the next
1498            address to zero, so if you don't set guest_EIP, the thread
1499            will jump to zero afterwards and probably die as a result. */
1500#        if defined(VGP_x86_linux)
1501         vg_assert2(0, "VG_(scheduler), phase 3: "
1502                       "sysenter_x86 on x86-linux is not supported");
1503#        elif defined(VGP_x86_darwin)
1504         /* return address in client edx */
1505         VG_(threads)[tid].arch.vex.guest_EIP
1506            = VG_(threads)[tid].arch.vex.guest_EDX;
1507         handle_syscall(tid, trc[0]);
1508#        else
1509         vg_assert2(0, "VG_(scheduler), phase 3: "
1510                       "sysenter_x86 on non-x86 platform?!?!");
1511#        endif
1512         break;
1513
1514      default:
1515	 vg_assert2(0, "VG_(scheduler), phase 3: "
1516                       "unexpected thread return code (%u)", trc[0]);
1517	 /* NOTREACHED */
1518	 break;
1519
1520      } /* switch (trc) */
1521
1522      if (0)
1523         maybe_show_sb_counts();
1524   }
1525
1526   if (VG_(clo_trace_sched))
1527      print_sched_event(tid, "exiting VG_(scheduler)");
1528
1529   vg_assert(VG_(is_exiting)(tid));
1530
1531   return tst->exitreason;
1532}
1533
1534
1535/*
1536   This causes all threads to forceably exit.  They aren't actually
1537   dead by the time this returns; you need to call
1538   VG_(reap_threads)() to wait for them.
1539 */
1540void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1541{
1542   ThreadId tid;
1543
1544   vg_assert(VG_(is_running_thread)(me));
1545
1546   for (tid = 1; tid < VG_N_THREADS; tid++) {
1547      if (tid == me
1548          || VG_(threads)[tid].status == VgTs_Empty)
1549         continue;
1550      if (0)
1551         VG_(printf)(
1552            "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
1553
1554      VG_(threads)[tid].exitreason = src;
1555      if (src == VgSrc_FatalSig)
1556         VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1557      VG_(get_thread_out_of_syscall)(tid);
1558   }
1559}
1560
1561
1562/* ---------------------------------------------------------------------
1563   Specifying shadow register values
1564   ------------------------------------------------------------------ */
1565
1566#if defined(VGA_x86)
1567#  define VG_CLREQ_ARGS       guest_EAX
1568#  define VG_CLREQ_RET        guest_EDX
1569#elif defined(VGA_amd64)
1570#  define VG_CLREQ_ARGS       guest_RAX
1571#  define VG_CLREQ_RET        guest_RDX
1572#elif defined(VGA_ppc32) || defined(VGA_ppc64)
1573#  define VG_CLREQ_ARGS       guest_GPR4
1574#  define VG_CLREQ_RET        guest_GPR3
1575#elif defined(VGA_arm)
1576#  define VG_CLREQ_ARGS       guest_R4
1577#  define VG_CLREQ_RET        guest_R3
1578#elif defined (VGA_s390x)
1579#  define VG_CLREQ_ARGS       guest_r2
1580#  define VG_CLREQ_RET        guest_r3
1581#elif defined(VGA_mips32)
1582#  define VG_CLREQ_ARGS       guest_r12
1583#  define VG_CLREQ_RET        guest_r11
1584#else
1585#  error Unknown arch
1586#endif
1587
1588#define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1589#define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1590#define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1591
1592// These macros write a value to a client's thread register, and tell the
1593// tool that it's happened (if necessary).
1594
1595#define SET_CLREQ_RETVAL(zztid, zzval) \
1596   do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1597        VG_TRACK( post_reg_write, \
1598                  Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1599   } while (0)
1600
1601#define SET_CLCALL_RETVAL(zztid, zzval, f) \
1602   do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1603        VG_TRACK( post_reg_write_clientcall_return, \
1604                  zztid, O_CLREQ_RET, sizeof(UWord), f); \
1605   } while (0)
1606
1607
1608/* ---------------------------------------------------------------------
1609   Handle client requests.
1610   ------------------------------------------------------------------ */
1611
1612// OS-specific(?) client requests
1613static Bool os_client_request(ThreadId tid, UWord *args)
1614{
1615   Bool handled = True;
1616
1617   vg_assert(VG_(is_running_thread)(tid));
1618
1619   switch(args[0]) {
1620   case VG_USERREQ__LIBC_FREERES_DONE:
1621      /* This is equivalent to an exit() syscall, but we don't set the
1622	 exitcode (since it might already be set) */
1623      if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1624         VG_(message)(Vg_DebugMsg,
1625                      "__libc_freeres() done; really quitting!\n");
1626      VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1627      break;
1628
1629   default:
1630      handled = False;
1631      break;
1632   }
1633
1634   return handled;
1635}
1636
1637
1638/* Do a client request for the thread tid.  After the request, tid may
1639   or may not still be runnable; if not, the scheduler will have to
1640   choose a new thread to run.
1641*/
1642static
1643void do_client_request ( ThreadId tid )
1644{
1645   UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1646   UWord req_no = arg[0];
1647
1648   if (0)
1649      VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
1650   switch (req_no) {
1651
1652      case VG_USERREQ__CLIENT_CALL0: {
1653         UWord (*f)(ThreadId) = (void*)arg[1];
1654	 if (f == NULL)
1655	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1656	 else
1657	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1658         break;
1659      }
1660      case VG_USERREQ__CLIENT_CALL1: {
1661         UWord (*f)(ThreadId, UWord) = (void*)arg[1];
1662	 if (f == NULL)
1663	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1664	 else
1665	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1666         break;
1667      }
1668      case VG_USERREQ__CLIENT_CALL2: {
1669         UWord (*f)(ThreadId, UWord, UWord) = (void*)arg[1];
1670	 if (f == NULL)
1671	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1672	 else
1673	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1674         break;
1675      }
1676      case VG_USERREQ__CLIENT_CALL3: {
1677         UWord (*f)(ThreadId, UWord, UWord, UWord) = (void*)arg[1];
1678	 if (f == NULL)
1679	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1680	 else
1681	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1682         break;
1683      }
1684
1685      // Nb: this looks like a circular definition, because it kind of is.
1686      // See comment in valgrind.h to understand what's going on.
1687      case VG_USERREQ__RUNNING_ON_VALGRIND:
1688         SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1689         break;
1690
1691      case VG_USERREQ__PRINTF: {
1692         /* JRS 2010-Jan-28: this is DEPRECATED; use the
1693            _VALIST_BY_REF version instead */
1694         if (sizeof(va_list) != sizeof(UWord))
1695            goto va_list_casting_error_NORETURN;
1696         union {
1697            va_list vargs;
1698            unsigned long uw;
1699         } u;
1700         u.uw = (unsigned long)arg[2];
1701         Int count =
1702            VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
1703         VG_(message_flush)();
1704         SET_CLREQ_RETVAL( tid, count );
1705         break;
1706      }
1707
1708      case VG_USERREQ__PRINTF_BACKTRACE: {
1709         /* JRS 2010-Jan-28: this is DEPRECATED; use the
1710            _VALIST_BY_REF version instead */
1711         if (sizeof(va_list) != sizeof(UWord))
1712            goto va_list_casting_error_NORETURN;
1713         union {
1714            va_list vargs;
1715            unsigned long uw;
1716         } u;
1717         u.uw = (unsigned long)arg[2];
1718         Int count =
1719            VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
1720         VG_(message_flush)();
1721         VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1722         SET_CLREQ_RETVAL( tid, count );
1723         break;
1724      }
1725
1726      case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1727         va_list* vargsp = (va_list*)arg[2];
1728         Int count =
1729            VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
1730         VG_(message_flush)();
1731         SET_CLREQ_RETVAL( tid, count );
1732         break;
1733      }
1734
1735      case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1736         va_list* vargsp = (va_list*)arg[2];
1737         Int count =
1738            VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
1739         VG_(message_flush)();
1740         VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1741         SET_CLREQ_RETVAL( tid, count );
1742         break;
1743      }
1744
1745      case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1746         va_list* vargsp = (va_list*)arg[2];
1747         Int count =
1748            VG_(vmessage)( Vg_DebugMsg, (char *)arg[1], *vargsp );
1749         VG_(message_flush)();
1750         SET_CLREQ_RETVAL( tid, count );
1751         break;
1752      }
1753
1754      case VG_USERREQ__ADD_IFUNC_TARGET: {
1755         VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1756         SET_CLREQ_RETVAL( tid, 0);
1757         break; }
1758
1759      case VG_USERREQ__STACK_REGISTER: {
1760         UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1761         SET_CLREQ_RETVAL( tid, sid );
1762         break; }
1763
1764      case VG_USERREQ__STACK_DEREGISTER: {
1765         VG_(deregister_stack)(arg[1]);
1766         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1767         break; }
1768
1769      case VG_USERREQ__STACK_CHANGE: {
1770         VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1771         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1772         break; }
1773
1774      case VG_USERREQ__GET_MALLOCFUNCS: {
1775	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1776
1777	 info->tl_malloc               = VG_(tdict).tool_malloc;
1778	 info->tl_calloc               = VG_(tdict).tool_calloc;
1779	 info->tl_realloc              = VG_(tdict).tool_realloc;
1780	 info->tl_memalign             = VG_(tdict).tool_memalign;
1781	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
1782	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
1783	 info->tl_free                 = VG_(tdict).tool_free;
1784	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
1785	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1786         info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
1787
1788	 info->mallinfo                = VG_(mallinfo);
1789	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
1790
1791         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1792
1793	 break;
1794      }
1795
1796      /* Requests from the client program */
1797
1798      case VG_USERREQ__DISCARD_TRANSLATIONS:
1799         if (VG_(clo_verbosity) > 2)
1800            VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
1801                         " addr %p,  len %lu\n",
1802                         (void*)arg[1], arg[2] );
1803
1804         VG_(discard_translations)(
1805            arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
1806         );
1807
1808         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1809	 break;
1810
1811      case VG_USERREQ__COUNT_ERRORS:
1812         SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
1813         break;
1814
1815      case VG_USERREQ__LOAD_PDB_DEBUGINFO:
1816         VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
1817         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1818         break;
1819
1820      case VG_USERREQ__MAP_IP_TO_SRCLOC: {
1821         Addr   ip    = arg[1];
1822         UChar* buf64 = (UChar*)arg[2];
1823
1824         VG_(memset)(buf64, 0, 64);
1825         UInt linenum = 0;
1826         Bool ok = VG_(get_filename_linenum)(
1827                      ip, &buf64[0], 50, NULL, 0, NULL, &linenum
1828                   );
1829         if (ok) {
1830            /* Find the terminating zero in the first 50 bytes. */
1831            UInt i;
1832            for (i = 0; i < 50; i++) {
1833               if (buf64[i] == 0)
1834                  break;
1835            }
1836            /* We must find a zero somewhere in 0 .. 49.  Else
1837               VG_(get_filename_linenum) is not properly zero
1838               terminating. */
1839            vg_assert(i < 50);
1840            VG_(sprintf)(&buf64[i], ":%u", linenum);
1841         } else {
1842            buf64[0] = 0;
1843         }
1844
1845         SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1846         break;
1847      }
1848
1849      case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
1850         Word delta = arg[1];
1851         vg_assert(delta == 1 || delta == -1);
1852         ThreadState* tst = VG_(get_ThreadState)(tid);
1853         vg_assert(tst);
1854         if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
1855            tst->err_disablement_level++;
1856         }
1857         else
1858         if (delta == -1 && tst->err_disablement_level > 0) {
1859            tst->err_disablement_level--;
1860         }
1861         SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1862         break;
1863      }
1864
1865      case VG_USERREQ__MALLOCLIKE_BLOCK:
1866      case VG_USERREQ__RESIZEINPLACE_BLOCK:
1867      case VG_USERREQ__FREELIKE_BLOCK:
1868         // Ignore them if the addr is NULL;  otherwise pass onto the tool.
1869         if (!arg[1]) {
1870            SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1871            break;
1872         } else {
1873            goto my_default;
1874         }
1875
1876      default:
1877       my_default:
1878	 if (os_client_request(tid, arg)) {
1879	    // do nothing, os_client_request() handled it
1880         } else if (VG_(needs).client_requests) {
1881	    UWord ret;
1882
1883            if (VG_(clo_verbosity) > 2)
1884               VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
1885                           arg[0], (void*)arg[1], arg[2] );
1886
1887	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
1888	       SET_CLREQ_RETVAL(tid, ret);
1889         } else {
1890	    static Bool whined = False;
1891
1892	    if (!whined && VG_(clo_verbosity) > 2) {
1893               // Allow for requests in core, but defined by tools, which
1894               // have 0 and 0 in their two high bytes.
1895               Char c1 = (arg[0] >> 24) & 0xff;
1896               Char c2 = (arg[0] >> 16) & 0xff;
1897               if (c1 == 0) c1 = '_';
1898               if (c2 == 0) c2 = '_';
1899	       VG_(message)(Vg_UserMsg, "Warning:\n"
1900                   "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
1901		   "  VG_(needs).client_requests should be set?\n",
1902			    arg[0], c1, c2, arg[0] & 0xffff);
1903	       whined = True;
1904	    }
1905         }
1906         break;
1907   }
1908   return;
1909
1910   /*NOTREACHED*/
1911  va_list_casting_error_NORETURN:
1912   VG_(umsg)(
1913      "Valgrind: fatal error - cannot continue: use of the deprecated\n"
1914      "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
1915      "on a platform where they cannot be supported.  Please use the\n"
1916      "equivalent _VALIST_BY_REF versions instead.\n"
1917      "\n"
1918      "This is a binary-incompatible change in Valgrind's client request\n"
1919      "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
1920      "are expected to almost never see this message.  The only case in\n"
1921      "which you might see this message is if your code uses the macros\n"
1922      "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
1923      "to recompile such code, using the header files from this version of\n"
1924      "Valgrind, and not any previous version.\n"
1925      "\n"
1926      "If you see this mesage in any other circumstances, it is probably\n"
1927      "a bug in Valgrind.  In this case, please file a bug report at\n"
1928      "\n"
1929      "   http://www.valgrind.org/support/bug_reports.html\n"
1930      "\n"
1931      "Will now abort.\n"
1932   );
1933   vg_assert(0);
1934}
1935
1936
1937/* ---------------------------------------------------------------------
1938   Sanity checking (permanently engaged)
1939   ------------------------------------------------------------------ */
1940
1941/* Internal consistency checks on the sched structures. */
1942static
1943void scheduler_sanity ( ThreadId tid )
1944{
1945   Bool bad = False;
1946   static UInt lasttime = 0;
1947   UInt now;
1948   Int lwpid = VG_(gettid)();
1949
1950   if (!VG_(is_running_thread)(tid)) {
1951      VG_(message)(Vg_DebugMsg,
1952		   "Thread %d is supposed to be running, "
1953                   "but doesn't own the_BigLock (owned by %d)\n",
1954		   tid, VG_(running_tid));
1955      bad = True;
1956   }
1957
1958   if (lwpid != VG_(threads)[tid].os_state.lwpid) {
1959      VG_(message)(Vg_DebugMsg,
1960                   "Thread %d supposed to be in LWP %d, but we're actually %d\n",
1961                   tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
1962      bad = True;
1963   }
1964
1965   if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
1966      VG_(message)(Vg_DebugMsg,
1967                   "Thread (LWPID) %d doesn't own the_BigLock\n",
1968                   tid);
1969      bad = True;
1970   }
1971
1972   /* Periodically show the state of all threads, for debugging
1973      purposes. */
1974   now = VG_(read_millisecond_timer)();
1975   if (0 && (!bad) && (lasttime + 4000/*ms*/ <= now)) {
1976      lasttime = now;
1977      VG_(printf)("\n------------ Sched State at %d ms ------------\n",
1978                  (Int)now);
1979      VG_(show_sched_status)();
1980   }
1981
1982   /* core_panic also shows the sched status, which is why we don't
1983      show it above if bad==True. */
1984   if (bad)
1985      VG_(core_panic)("scheduler_sanity: failed");
1986}
1987
1988void VG_(sanity_check_general) ( Bool force_expensive )
1989{
1990   ThreadId tid;
1991
1992   static UInt next_slow_check_at = 1;
1993   static UInt slow_check_interval = 25;
1994
1995   if (VG_(clo_sanity_level) < 1) return;
1996
1997   /* --- First do all the tests that we can do quickly. ---*/
1998
1999   sanity_fast_count++;
2000
2001   /* Check stuff pertaining to the memory check system. */
2002
2003   /* Check that nobody has spuriously claimed that the first or
2004      last 16 pages of memory have become accessible [...] */
2005   if (VG_(needs).sanity_checks) {
2006      vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2007   }
2008
2009   /* --- Now some more expensive checks. ---*/
2010
2011   /* Once every now and again, check some more expensive stuff.
2012      Gradually increase the interval between such checks so as not to
2013      burden long-running programs too much. */
2014   if ( force_expensive
2015        || VG_(clo_sanity_level) > 1
2016        || (VG_(clo_sanity_level) == 1
2017            && sanity_fast_count == next_slow_check_at)) {
2018
2019      if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
2020
2021      next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2022      slow_check_interval++;
2023      sanity_slow_count++;
2024
2025      if (VG_(needs).sanity_checks) {
2026          vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2027      }
2028
2029      /* Look for stack overruns.  Visit all threads. */
2030      for (tid = 1; tid < VG_N_THREADS; tid++) {
2031	 SizeT    remains;
2032         VgStack* stack;
2033
2034	 if (VG_(threads)[tid].status == VgTs_Empty ||
2035	     VG_(threads)[tid].status == VgTs_Zombie)
2036	    continue;
2037
2038         stack
2039            = (VgStack*)
2040              VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2041         SizeT limit
2042            = 4096; // Let's say.  Checking more causes lots of L2 misses.
2043	 remains
2044            = VG_(am_get_VgStack_unused_szB)(stack, limit);
2045	 if (remains < limit)
2046	    VG_(message)(Vg_DebugMsg,
2047                         "WARNING: Thread %d is within %ld bytes "
2048                         "of running out of stack!\n",
2049		         tid, remains);
2050      }
2051   }
2052
2053   if (VG_(clo_sanity_level) > 1) {
2054      /* Check sanity of the low-level memory manager.  Note that bugs
2055         in the client's code can cause this to fail, so we don't do
2056         this check unless specially asked for.  And because it's
2057         potentially very expensive. */
2058      VG_(sanity_check_malloc_all)();
2059   }
2060}
2061
2062/*--------------------------------------------------------------------*/
2063/*--- end                                                          ---*/
2064/*--------------------------------------------------------------------*/
2065