vgdb-invoker-ptrace.c revision 582d58245637ab05272d89fb94b12fd0f18fa0f8
1/*--------------------------------------------------------------------*/
2/*--- Implementation of vgdb invoker subsystem via ptrace() calls. ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2011-2013 Philippe Waroquiers
10
11   This program is free software; you can redistribute it and/or
12   modify it under the terms of the GNU General Public License as
13   published by the Free Software Foundation; either version 2 of the
14   License, or (at your option) any later version.
15
16   This program is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19   General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program; if not, write to the Free Software
23   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24   02111-1307, USA.
25
26   The GNU General Public License is contained in the file COPYING.
27*/
28
29#include "config.h"
30
31#include "vgdb.h"
32#include "pub_core_threadstate.h"
33
34#include <alloca.h>
35#include <assert.h>
36#include <errno.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <sys/ptrace.h>
41#include <sys/time.h>
42#include <sys/user.h>
43#include <sys/wait.h>
44
45#ifdef PTRACE_GETREGSET
46// TBD: better have a configure test instead ?
47#define HAVE_PTRACE_GETREGSET
48
49// A bi-arch build using PTRACE_GET/SETREGSET needs
50// some conversion code for register structures.
51// So, better do not use PTRACE_GET/SETREGSET
52// Rather we use PTRACE_GETREGS or PTRACE_PEEKUSER.
53
54// The only platform on which we must use PTRACE_GETREGSET is arm64.
55// The resulting vgdb cannot work in a bi-arch setup.
56// -1 means we will check that PTRACE_GETREGSET works.
57#  if defined(VGA_arm64)
58#define USE_PTRACE_GETREGSET
59#  endif
60#endif
61
62#include <sys/uio.h>
63#include <elf.h>
64
65#include <sys/procfs.h>
66
67#if defined(VGA_s390x)
68/* RHEL 5 uses glibc 2.3.4 which does not define PTRACE_GETSIGINFO */
69#   ifndef PTRACE_GETSIGINFO
70#   define PTRACE_GETSIGINFO 0x4202
71#   endif
72#endif
73
74#if VEX_HOST_WORDSIZE == 8
75typedef Addr64 CORE_ADDR;
76#elif VEX_HOST_WORDSIZE == 4
77typedef Addr32 CORE_ADDR;
78#else
79# error "unexpected wordsize"
80#endif
81
82#if VEX_HOST_WORDSIZE == 8
83typedef Addr64 PTRACE_XFER_TYPE;
84typedef void* PTRACE_ARG3_TYPE;
85#elif VEX_HOST_WORDSIZE == 4
86typedef Addr32 PTRACE_XFER_TYPE;
87typedef void* PTRACE_ARG3_TYPE;
88#else
89# error "unexpected wordsize"
90#endif
91
92// if > 0, pid for which registers have to be restored.
93// if == 0, means we have not yet called setregs (or have already
94// restored the registers).
95static int pid_of_save_regs = 0;
96/* True if we have continued pid_of_save_regs after PTRACE_ATTACH. */
97static Bool pid_of_save_regs_continued = False;
98// When setregs has been called to change the registers of pid_of_save_regs,
99// vgdb cannot transmit the signals intercepted during ptrace.
100// So, we queue them, and will deliver them when detaching.
101// See function waitstopped for more info.
102static int signal_queue_sz = 0;
103static siginfo_t *signal_queue;
104
105/* True when loss of connection indicating that the Valgrind
106   process is dying. */
107static Bool dying = False;
108
109/* ptrace_(read|write)_memory are modified extracts of linux-low.c
110   from gdb 6.6. Copyrighted FSF */
111/* Copy LEN bytes from valgrind memory starting at MEMADDR
112   to vgdb memory starting at MYADDR.  */
113static
114int ptrace_read_memory (pid_t inferior_pid, CORE_ADDR memaddr,
115                        void *myaddr, size_t len)
116{
117   register int i;
118   /* Round starting address down to longword boundary.  */
119   register CORE_ADDR addr = memaddr & -(CORE_ADDR) sizeof (PTRACE_XFER_TYPE);
120   /* Round ending address up; get number of longwords that makes.  */
121   register int count
122      = (((memaddr + len) - addr) + sizeof (PTRACE_XFER_TYPE) - 1)
123      / sizeof (PTRACE_XFER_TYPE);
124   /* Allocate buffer of that many longwords.  */
125   register PTRACE_XFER_TYPE *buffer
126      = (PTRACE_XFER_TYPE *) alloca (count * sizeof (PTRACE_XFER_TYPE));
127
128   /* Read all the longwords */
129   for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE)) {
130      errno = 0;
131      buffer[i] = ptrace (PTRACE_PEEKTEXT, inferior_pid,
132                          (PTRACE_ARG3_TYPE) addr, 0);
133      if (errno)
134         return errno;
135   }
136
137   /* Copy appropriate bytes out of the buffer.  */
138   memcpy (myaddr,
139           (char *) buffer + (memaddr & (sizeof (PTRACE_XFER_TYPE) - 1)), len);
140
141   return 0;
142}
143
144/* Copy LEN bytes of data from vgdb memory at MYADDR
145   to valgrind memory at MEMADDR.
146   On failure (cannot write the valgrind memory)
147   returns the value of errno.  */
148__attribute__((unused)) /* not used on all platforms */
149static
150int ptrace_write_memory (pid_t inferior_pid, CORE_ADDR memaddr,
151                         const void *myaddr, size_t len)
152{
153   register int i;
154   /* Round starting address down to longword boundary.  */
155   register CORE_ADDR addr = memaddr & -(CORE_ADDR) sizeof (PTRACE_XFER_TYPE);
156   /* Round ending address up; get number of longwords that makes.  */
157   register int count
158      = (((memaddr + len) - addr) + sizeof (PTRACE_XFER_TYPE) - 1)
159      / sizeof (PTRACE_XFER_TYPE);
160   /* Allocate buffer of that many longwords.  */
161   register PTRACE_XFER_TYPE *buffer
162      = (PTRACE_XFER_TYPE *) alloca (count * sizeof (PTRACE_XFER_TYPE));
163
164   if (debuglevel >= 1) {
165      DEBUG (1, "Writing ");
166      for (i = 0; i < len; i++)
167         PDEBUG (1, "%02x", ((const unsigned char*)myaddr)[i]);
168      PDEBUG(1, " to %p\n", (void *) memaddr);
169   }
170
171   /* Fill start and end extra bytes of buffer with existing memory data.  */
172
173   buffer[0] = ptrace (PTRACE_PEEKTEXT, inferior_pid,
174                       (PTRACE_ARG3_TYPE) addr, 0);
175
176   if (count > 1) {
177      buffer[count - 1]
178         = ptrace (PTRACE_PEEKTEXT, inferior_pid,
179                   (PTRACE_ARG3_TYPE) (addr + (count - 1)
180                                       * sizeof (PTRACE_XFER_TYPE)),
181                   0);
182   }
183
184   /* Copy data to be written over corresponding part of buffer */
185
186   memcpy ((char *) buffer + (memaddr & (sizeof (PTRACE_XFER_TYPE) - 1)),
187           myaddr, len);
188
189   /* Write the entire buffer.  */
190
191   for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE)) {
192      errno = 0;
193      ptrace (PTRACE_POKETEXT, inferior_pid,
194              (PTRACE_ARG3_TYPE) addr, buffer[i]);
195      if (errno)
196         return errno;
197   }
198
199   return 0;
200}
201
202/* subset of VG_(threads) needed for vgdb ptrace.
203   This is initialized when process is attached. */
204typedef struct {
205   ThreadStatus status;
206   Int lwpid;
207}
208VgdbThreadState;
209static VgdbThreadState vgdb_threads[VG_N_THREADS];
210
211static const
212HChar* name_of_ThreadStatus ( ThreadStatus status )
213{
214   switch (status) {
215   case VgTs_Empty:     return "VgTs_Empty";
216   case VgTs_Init:      return "VgTs_Init";
217   case VgTs_Runnable:  return "VgTs_Runnable";
218   case VgTs_WaitSys:   return "VgTs_WaitSys";
219   case VgTs_Yielding:  return "VgTs_Yielding";
220   case VgTs_Zombie:    return "VgTs_Zombie";
221   default:             return "VgTs_???";
222  }
223}
224
225static
226char *status_image (int status)
227{
228   static char result[256];
229   int sz = 0;
230#define APPEND(...) sz += snprintf (result+sz, 256 - sz - 1, __VA_ARGS__)
231
232   result[0] = 0;
233
234   if (WIFEXITED(status))
235      APPEND ("WIFEXITED %d ", WEXITSTATUS(status));
236
237   if (WIFSIGNALED(status)) {
238      APPEND ("WIFSIGNALED %d ", WTERMSIG(status));
239      if (WCOREDUMP(status)) APPEND ("WCOREDUMP ");
240   }
241
242   if (WIFSTOPPED(status))
243      APPEND ("WIFSTOPPED %d ", WSTOPSIG(status));
244
245#ifdef WIFCONTINUED
246   if (WIFCONTINUED(status))
247      APPEND ("WIFCONTINUED ");
248#endif
249
250   return result;
251#undef APPEND
252}
253
254/* Wait till the process pid is reported as stopped with signal_expected.
255   If other signal(s) than signal_expected are received, waitstopped
256   will pass them to pid, waiting for signal_expected to stop pid.
257   Returns True when process is in stopped state with signal_expected.
258   Returns False if a problem was encountered while waiting for pid
259   to be stopped.
260
261   If pid is reported as being dead/exited, waitstopped will return False.
262*/
263static
264Bool waitstopped (pid_t pid, int signal_expected, const char *msg)
265{
266   pid_t p;
267   int status = 0;
268   int signal_received;
269   int res;
270
271   while (1) {
272      DEBUG(1, "waitstopped %s before waitpid signal_expected %d\n",
273            msg, signal_expected);
274      p = waitpid(pid, &status, __WALL);
275      DEBUG(1, "after waitpid pid %d p %d status 0x%x %s\n", pid, p,
276            status, status_image (status));
277      if (p != pid) {
278         ERROR(errno, "%s waitpid pid %d in waitstopped %d status 0x%x %s\n",
279               msg, pid, p, status, status_image (status));
280         return False;
281      }
282
283      if (WIFEXITED(status)) {
284         shutting_down = True;
285         return False;
286      }
287
288      assert (WIFSTOPPED(status));
289      signal_received = WSTOPSIG(status);
290      if (signal_received == signal_expected)
291         break;
292
293      /* pid received a signal which is not the signal we are waiting for.
294         If we have not (yet) changed the registers of the inferior
295         or we have (already) reset them, we can transmit the signal.
296
297         If we have already set the registers of the inferior, we cannot
298         transmit the signal, as this signal would arrive when the
299         gdbserver code runs. And valgrind only expects signals to
300         arrive in a small code portion around
301         client syscall logic, where signal are unmasked (see e.g.
302         m_syswrap/syscall-x86-linux.S ML_(do_syscall_for_client_WRK).
303
304         As ptrace is forcing a call to gdbserver by jumping
305         'out of this region', signals are not masked, but
306         will arrive outside of the allowed/expected code region.
307         So, if we have changed the registers of the inferior, we
308         rather queue the signal to transmit them when detaching,
309         after having restored the registers to the initial values. */
310      if (pid_of_save_regs) {
311         siginfo_t *newsiginfo;
312
313         // realloc a bigger queue, and store new signal at the end.
314         // This is not very efficient but we assume not many sigs are queued.
315         signal_queue_sz++;
316         signal_queue = vrealloc(signal_queue, sizeof(siginfo_t) * signal_queue_sz);
317         newsiginfo = signal_queue + (signal_queue_sz - 1);
318
319         res = ptrace (PTRACE_GETSIGINFO, pid, NULL, newsiginfo);
320         if (res != 0) {
321            ERROR(errno, "PTRACE_GETSIGINFO failed: signal lost !!!!\n");
322            signal_queue_sz--;
323         } else
324            DEBUG(1, "waitstopped PTRACE_CONT, queuing signal %d"
325                  " si_signo %d si_pid %d\n",
326                  signal_received, newsiginfo->si_signo, newsiginfo->si_pid);
327         res = ptrace (PTRACE_CONT, pid, NULL, 0);
328      } else {
329         DEBUG(1, "waitstopped PTRACE_CONT with signal %d\n", signal_received);
330         res = ptrace (PTRACE_CONT, pid, NULL, signal_received);
331      }
332      if (res != 0) {
333         ERROR(errno, "waitstopped PTRACE_CONT\n");
334         return False;
335      }
336   }
337
338   return True;
339}
340
341/* Stops the given pid, wait for the process to be stopped.
342   Returns True if succesful, False otherwise.
343   msg is used in tracing and error reporting. */
344static
345Bool stop (pid_t pid, const char *msg)
346{
347   long res;
348
349   DEBUG(1, "%s SIGSTOP pid %d\n", msg, pid);
350   res = kill (pid, SIGSTOP);
351   if (res != 0) {
352      ERROR(errno, "%s SIGSTOP pid %d %ld\n", msg, pid, res);
353      return False;
354   }
355
356   return waitstopped (pid, SIGSTOP, msg);
357
358}
359
360/* Attaches to given pid, wait for the process to be stopped.
361   Returns True if succesful, False otherwise.
362   msg is used in tracing and error reporting. */
363static
364Bool attach (pid_t pid, const char *msg)
365{
366   long res;
367   static Bool output_error = True;
368   static Bool initial_attach = True;
369   // For a ptrace_scope protected system, we do not want to output
370   // repetitively attach error. We will output once an error
371   // for the initial_attach. Once the 1st attach has succeeded, we
372   // again show all errors.
373
374   DEBUG(1, "%s PTRACE_ATTACH pid %d\n", msg, pid);
375   res = ptrace (PTRACE_ATTACH, pid, NULL, NULL);
376   if (res != 0) {
377      if (output_error || debuglevel > 0) {
378         ERROR(errno, "%s PTRACE_ATTACH pid %d %ld\n", msg, pid, res);
379         if (initial_attach)
380            output_error = False;
381      }
382      return False;
383   }
384
385   initial_attach = False;
386   output_error = True;
387   return waitstopped(pid, SIGSTOP, msg);
388}
389
390/* once we are attached to the pid, get the list of threads and stop
391   them all.
392   Returns True if all threads properly suspended, False otherwise. */
393static
394Bool acquire_and_suspend_threads (pid_t pid)
395{
396   int i;
397   int rw;
398   Bool pid_found = False;
399   Addr vgt;
400   int sz_tst;
401   int off_status;
402   int off_lwpid;
403   int nr_live_threads = 0;
404
405   if (shared32 != NULL) {
406      vgt = shared32->threads;
407      sz_tst = shared32->sizeof_ThreadState;
408      off_status = shared32->offset_status;
409      off_lwpid = shared32->offset_lwpid;
410   }
411   else if (shared64 != NULL) {
412      vgt = shared64->threads;
413      sz_tst = shared64->sizeof_ThreadState;
414      off_status = shared64->offset_status;
415      off_lwpid = shared64->offset_lwpid;
416   } else {
417      assert (0);
418   }
419
420   /* note: the entry 0 is unused */
421   for (i = 1; i < VG_N_THREADS; i++) {
422      vgt += sz_tst;
423      rw = ptrace_read_memory(pid, vgt+off_status,
424                              &(vgdb_threads[i].status),
425                              sizeof(ThreadStatus));
426      if (rw != 0) {
427         ERROR(rw, "status ptrace_read_memory\n");
428         return False;
429      }
430
431      rw = ptrace_read_memory(pid, vgt+off_lwpid,
432                              &(vgdb_threads[i].lwpid),
433                              sizeof(Int));
434      if (rw != 0) {
435         ERROR(rw, "lwpid ptrace_read_memory\n");
436         return False;
437      }
438
439      if (vgdb_threads[i].status != VgTs_Empty) {
440         DEBUG(1, "found tid %d status %s lwpid %d\n",
441               i, name_of_ThreadStatus(vgdb_threads[i].status),
442               vgdb_threads[i].lwpid);
443         nr_live_threads++;
444         if (vgdb_threads[i].lwpid <= 1) {
445            if (vgdb_threads[i].lwpid == 0
446                && vgdb_threads[i].status == VgTs_Init) {
447               DEBUG(1, "not set lwpid tid %d status %s lwpid %d\n",
448                     i, name_of_ThreadStatus(vgdb_threads[i].status),
449                     vgdb_threads[i].lwpid);
450            } else {
451               ERROR(1, "unexpected lwpid tid %d status %s lwpid %d\n",
452                     i, name_of_ThreadStatus(vgdb_threads[i].status),
453                     vgdb_threads[i].lwpid);
454            }
455            /* in case we have a VtTs_Init thread with lwpid not yet set,
456               we try again later. */
457            return False;
458         }
459         if (vgdb_threads[i].lwpid == pid) {
460            assert (!pid_found);
461            assert (i == 1);
462            pid_found = True;
463         } else {
464            if (!attach(vgdb_threads[i].lwpid, "attach_thread")) {
465                 ERROR(0, "ERROR attach pid %d tid %d\n",
466                       vgdb_threads[i].lwpid, i);
467               return False;
468            }
469         }
470      }
471   }
472   /* If we found no thread, it means the process is stopping, and
473      we better do not force anything to happen during that. */
474   if (nr_live_threads > 0)
475      return True;
476   else
477      return False;
478}
479
480static
481void detach_from_all_threads (pid_t pid)
482{
483   int i;
484   long res;
485   Bool pid_found = False;
486
487   /* detach from all the threads  */
488   for (i = 1; i < VG_N_THREADS; i++) {
489      if (vgdb_threads[i].status != VgTs_Empty) {
490         if (vgdb_threads[i].status == VgTs_Init
491             && vgdb_threads[i].lwpid == 0) {
492            DEBUG(1, "skipping PTRACE_DETACH pid %d tid %d status %s\n",
493                  vgdb_threads[i].lwpid, i,
494                  name_of_ThreadStatus (vgdb_threads[i].status));
495         } else {
496            if (vgdb_threads[i].lwpid == pid) {
497               assert (!pid_found);
498               pid_found = True;
499            }
500            DEBUG(1, "PTRACE_DETACH pid %d tid %d status %s\n",
501                  vgdb_threads[i].lwpid, i,
502                  name_of_ThreadStatus (vgdb_threads[i].status));
503            res = ptrace (PTRACE_DETACH, vgdb_threads[i].lwpid, NULL, NULL);
504            if (res != 0) {
505               ERROR(errno, "PTRACE_DETACH pid %d tid %d status %s res %ld\n",
506                     vgdb_threads[i].lwpid, i,
507                     name_of_ThreadStatus (vgdb_threads[i].status),
508                     res);
509            }
510         }
511      }
512   }
513
514   if (!pid_found && pid) {
515      /* No threads are live. Process is busy stopping.
516         We need to detach from pid explicitely. */
517      DEBUG(1, "no thread live => PTRACE_DETACH pid %d\n", pid);
518      res = ptrace (PTRACE_DETACH, pid, NULL, NULL);
519      if (res != 0)
520         ERROR(errno, "PTRACE_DETACH pid %d res %ld\n", pid, res);
521   }
522}
523
524#  if defined(VGA_arm64)
525/* arm64 is extra special, old glibc defined kernel user_pt_regs, but
526   newer glibc instead define user_regs_struct. */
527#    ifdef HAVE_SYS_USER_REGS
528static struct user_regs_struct user_save;
529#    else
530static struct user_pt_regs user_save;
531#    endif
532#  else
533static struct user user_save;
534#  endif
535// The below indicates if ptrace_getregs (and ptrace_setregs) can be used.
536// Note that some linux versions are defining PTRACE_GETREGS but using
537// it gives back EIO.
538// has_working_ptrace_getregs can take the following values:
539//  -1 : PTRACE_GETREGS is defined
540//       runtime check not yet done.
541//   0 : PTRACE_GETREGS runtime check has failed.
542//   1 : PTRACE_GETREGS defined and runtime check ok.
543#ifdef HAVE_PTRACE_GETREGS
544static int has_working_ptrace_getregs = -1;
545#endif
546// Similar but for PTRACE_GETREGSET
547#ifdef HAVE_PTRACE_GETREGSET
548static int has_working_ptrace_getregset = -1;
549#endif
550
551/* Get the registers from pid into regs.
552   regs_bsz value gives the length of *regs.
553   Returns True if all ok, otherwise False. */
554static
555Bool getregs (pid_t pid, void *regs, long regs_bsz)
556{
557   DEBUG(1, "getregs regs_bsz %ld\n", regs_bsz);
558#  ifdef HAVE_PTRACE_GETREGSET
559#  ifndef USE_PTRACE_GETREGSET
560   if (has_working_ptrace_getregset)
561      DEBUG(1, "PTRACE_GETREGSET defined, not used (yet?) by vgdb\n");
562   has_working_ptrace_getregset = 0;
563#  endif
564   if (has_working_ptrace_getregset) {
565      // Platforms having GETREGSET
566      long res;
567      elf_gregset_t elf_regs;
568      struct iovec iovec;
569
570      DEBUG(1, "getregs PTRACE_GETREGSET sizeof(elf_regs) %d\n", sizeof(elf_regs));
571      iovec.iov_base = regs;
572      iovec.iov_len =  sizeof(elf_regs);
573
574      res = ptrace (PTRACE_GETREGSET, pid, NT_PRSTATUS, &iovec);
575      if (res == 0) {
576         if (has_working_ptrace_getregset == -1) {
577            // First call to PTRACE_GETREGSET succesful =>
578            has_working_ptrace_getregset = 1;
579            DEBUG(1, "detected a working PTRACE_GETREGSET\n");
580         }
581         assert (has_working_ptrace_getregset == 1);
582         return True;
583      }
584      else if (has_working_ptrace_getregset == 1) {
585         // We had a working call, but now it fails.
586         // This is unexpected.
587         ERROR(errno, "PTRACE_GETREGSET %ld\n", res);
588         return False;
589      } else {
590         // Check this is the first call:
591         assert (has_working_ptrace_getregset == -1);
592         if (errno == EIO) {
593            DEBUG(1, "detected a broken PTRACE_GETREGSET with EIO\n");
594            has_working_ptrace_getregset = 0;
595            // Fall over to the PTRACE_GETREGS or PTRACE_PEEKUSER case.
596         } else {
597            ERROR(errno, "broken PTRACE_GETREGSET unexpected errno %ld\n", res);
598            return False;
599         }
600      }
601   }
602#  endif
603
604#  ifdef HAVE_PTRACE_GETREGS
605   if (has_working_ptrace_getregs) {
606      // Platforms having GETREGS
607      long res;
608      DEBUG(1, "getregs PTRACE_GETREGS\n");
609      res = ptrace (PTRACE_GETREGS, pid, NULL, regs);
610      if (res == 0) {
611         if (has_working_ptrace_getregs == -1) {
612            // First call to PTRACE_GETREGS succesful =>
613            has_working_ptrace_getregs = 1;
614            DEBUG(1, "detected a working PTRACE_GETREGS\n");
615         }
616         assert (has_working_ptrace_getregs == 1);
617         return True;
618      }
619      else if (has_working_ptrace_getregs == 1) {
620         // We had a working call, but now it fails.
621         // This is unexpected.
622         ERROR(errno, "PTRACE_GETREGS %ld\n", res);
623         return False;
624      } else {
625         // Check this is the first call:
626         assert (has_working_ptrace_getregs == -1);
627         if (errno == EIO) {
628            DEBUG(1, "detected a broken PTRACE_GETREGS with EIO\n");
629            has_working_ptrace_getregs = 0;
630            // Fall over to the PTRACE_PEEKUSER case.
631         } else {
632            ERROR(errno, "broken PTRACE_GETREGS unexpected errno %ld\n", res);
633            return False;
634         }
635      }
636   }
637#  endif
638
639   // We assume  PTRACE_PEEKUSER is defined everywhere.
640   {
641#     ifdef PT_ENDREGS
642      long peek_bsz = PT_ENDREGS;
643      assert (peek_bsz <= regs_bsz);
644#     else
645      long peek_bsz = regs_bsz-1;
646#     endif
647      char *pregs = (char *) regs;
648      long offset;
649      errno = 0;
650      DEBUG(1, "getregs PTRACE_PEEKUSER(s) peek_bsz %ld\n", peek_bsz);
651      for (offset = 0; offset < peek_bsz; offset = offset + sizeof(long)) {
652         *(long *)(pregs+offset) = ptrace(PTRACE_PEEKUSER, pid, offset, NULL);
653         if (errno != 0) {
654            ERROR(errno, "PTRACE_PEEKUSER offset %ld\n", offset);
655            return False;
656         }
657      }
658      return True;
659   }
660
661   // If neither of PTRACE_GETREGSET PTRACE_GETREGS PTRACE_PEEKUSER have
662   // returned, then we are in serious trouble.
663   assert (0);
664}
665
666/* Set the registers of pid to regs.
667   regs_bsz value gives the length of *regs.
668   Returns True if all ok, otherwise False. */
669static
670Bool setregs (pid_t pid, void *regs, long regs_bsz)
671{
672   DEBUG(1, "setregs regs_bsz %ld\n", regs_bsz);
673
674// Note : the below is checking for GETREGSET, not SETREGSET
675// as if one is defined and working, the other one should also work.
676#  ifdef HAVE_PTRACE_GETREGSET
677   if (has_working_ptrace_getregset) {
678      // Platforms having SETREGSET
679      long res;
680      elf_gregset_t elf_regs;
681      struct iovec iovec;
682
683      // setregset can never be called before getregset has done a runtime check.
684      assert (has_working_ptrace_getregset == 1);
685      DEBUG(1, "setregs PTRACE_SETREGSET sizeof(elf_regs) %d\n", sizeof(elf_regs));
686      iovec.iov_base = regs;
687      iovec.iov_len =  sizeof(elf_regs);
688      res = ptrace (PTRACE_SETREGSET, pid, NT_PRSTATUS, &iovec);
689      if (res != 0) {
690         ERROR(errno, "PTRACE_SETREGSET %ld\n", res);
691         return False;
692      }
693      return True;
694   }
695#  endif
696
697// Note : the below is checking for GETREGS, not SETREGS
698// as if one is defined and working, the other one should also work.
699#  ifdef HAVE_PTRACE_GETREGS
700   if (has_working_ptrace_getregs) {
701      // Platforms having SETREGS
702      long res;
703      // setregs can never be called before getregs has done a runtime check.
704      assert (has_working_ptrace_getregs == 1);
705      DEBUG(1, "setregs PTRACE_SETREGS\n");
706      res = ptrace (PTRACE_SETREGS, pid, NULL, regs);
707      if (res != 0) {
708         ERROR(errno, "PTRACE_SETREGS %ld\n", res);
709         return False;
710      }
711      return True;
712   }
713#  endif
714
715   {
716      char *pregs = (char *) regs;
717      long offset;
718      long res;
719#     ifdef PT_ENDREGS
720      long peek_bsz = PT_ENDREGS;
721      assert (peek_bsz <= regs_bsz);
722#     else
723      long peek_bsz = regs_bsz-1;
724#     endif
725      errno = 0;
726      DEBUG(1, "setregs PTRACE_POKEUSER(s) %ld\n", peek_bsz);
727      for (offset = 0; offset < peek_bsz; offset = offset + sizeof(long)) {
728         res = ptrace(PTRACE_POKEUSER, pid, offset, *(long*)(pregs+offset));
729         if (errno != 0) {
730            ERROR(errno, "PTRACE_POKEUSER offset %ld res %ld\n", offset, res);
731            return False;
732         }
733      }
734      return True;
735   }
736
737   // If neither PTRACE_SETREGS not PTRACE_POKEUSER have returned,
738   // then we are in serious trouble.
739   assert (0);
740}
741
742/* Restore the registers to the saved value, then detaches from all threads */
743static
744void restore_and_detach (pid_t pid)
745{
746   int res;
747
748   DEBUG(1, "restore_and_detach pid %d pid_of_save_regs %d\n",
749         pid, pid_of_save_regs);
750
751   if (pid_of_save_regs) {
752      /* In case the 'main pid' has been continued, we need to stop it
753         before resetting the registers. */
754      if (pid_of_save_regs_continued) {
755         pid_of_save_regs_continued = False;
756         if (!stop(pid_of_save_regs, "sigstop before reset regs"))
757            DEBUG(0, "Could not sigstop before reset");
758      }
759
760      DEBUG(1, "setregs restore registers pid %d\n", pid_of_save_regs);
761      if (!setregs(pid_of_save_regs, &user_save.regs, sizeof(user_save.regs))) {
762         ERROR(errno, "setregs restore registers pid %d after cont\n",
763               pid_of_save_regs);
764      }
765
766      /* Now, we transmit all the signals we have queued. */
767      if (signal_queue_sz > 0) {
768         int i;
769         for (i = 0; i < signal_queue_sz; i++) {
770            DEBUG(1, "PTRACE_CONT to transmit queued signal %d\n",
771                  signal_queue[i].si_signo);
772            res = ptrace (PTRACE_CONT, pid_of_save_regs, NULL,
773                          signal_queue[i].si_signo);
774            if (res != 0)
775               ERROR(errno, "PTRACE_CONT with signal %d\n",
776                     signal_queue[i].si_signo);
777            if (!stop(pid_of_save_regs, "sigstop after transmit sig"))
778               DEBUG(0, "Could not sigstop after transmit sig");
779         }
780         free (signal_queue);
781         signal_queue = NULL;
782         signal_queue_sz = 0;
783      }
784      pid_of_save_regs = 0;
785   } else {
786      DEBUG(1, "PTRACE_SETREGS restore registers: no pid\n");
787   }
788   if (signal_queue)
789      ERROR (0, "One or more signals queued were not delivered. "
790             "First signal: %d\n", signal_queue[0].si_signo);
791   detach_from_all_threads(pid);
792}
793
794Bool invoker_invoke_gdbserver (pid_t pid)
795{
796   long res;
797   Bool stopped;
798#  if defined(VGA_arm64)
799/* arm64 is extra special, old glibc defined kernel user_pt_regs, but
800   newer glibc instead define user_regs_struct. */
801#    ifdef HAVE_SYS_USER_REGS
802   struct user_regs_struct user_mod;
803#    else
804   struct user_pt_regs user_mod;
805#    endif
806#  else
807   struct user user_mod;
808#  endif
809   Addr sp;
810   /* A specific int value is passed to invoke_gdbserver, to check
811      everything goes according to the plan. */
812   const int check = 0x8BADF00D; // ate bad food.
813
814   const Addr bad_return = 0;
815   // A bad return address will be pushed on the stack.
816   // The function invoke_gdbserver cannot return. If ever it returns, a NULL
817   // address pushed on the stack should ensure this is detected.
818
819   /* Not yet attached. If problem, vgdb can abort,
820      no cleanup needed. */
821
822   DEBUG(1, "attach to 'main' pid %d\n", pid);
823   if (!attach(pid, "attach main pid")) {
824      ERROR(0, "error attach main pid %d\n", pid);
825      return False;
826   }
827
828   /* Now, we are attached. If problem, detach and return. */
829
830   if (!acquire_and_suspend_threads(pid)) {
831      detach_from_all_threads(pid);
832      /* if the pid does not exist anymore, we better stop */
833      if (kill(pid, 0) != 0)
834        XERROR (errno, "invoke_gdbserver: check for pid %d existence failed\n",
835                pid);
836      return False;
837   }
838
839   if (!getregs(pid, &user_mod.regs, sizeof(user_mod.regs))) {
840      detach_from_all_threads(pid);
841      return False;
842   }
843   user_save = user_mod;
844
845#if defined(VGA_x86)
846   sp = user_mod.regs.esp;
847#elif defined(VGA_amd64)
848   sp = user_mod.regs.rsp;
849   if (shared32 != NULL) {
850     /* 64bit vgdb speaking with a 32bit executable.
851        To have system call restart properly, we need to sign extend rax.
852        For more info:
853        web search '[patch] Fix syscall restarts for amd64->i386 biarch'
854        e.g. http://sourceware.org/ml/gdb-patches/2009-11/msg00592.html */
855     *(long *)&user_save.regs.rax = *(int*)&user_save.regs.rax;
856     DEBUG(1, "Sign extending %8.8lx to %8.8lx\n",
857           user_mod.regs.rax, user_save.regs.rax);
858   }
859#elif defined(VGA_arm)
860   sp = user_mod.regs.uregs[13];
861#elif defined(VGA_arm64)
862   sp = user_mod.sp;
863#elif defined(VGA_ppc32)
864   sp = user_mod.regs.gpr[1];
865#elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
866   sp = user_mod.regs.gpr[1];
867#elif defined(VGA_s390x)
868   sp = user_mod.regs.gprs[15];
869#elif defined(VGA_mips32)
870   long long *p = (long long *)user_mod.regs;
871   sp = p[29];
872#elif defined(VGA_mips64)
873   sp = user_mod.regs[29];
874#else
875   I_die_here : (sp) architecture missing in vgdb.c
876#endif
877
878
879   // the magic below is derived from spying what gdb sends to
880   // the (classical) gdbserver when invoking a C function.
881   if (shared32 != NULL) {
882      // vgdb speaking with a 32bit executable.
883#if   defined(VGA_x86) || defined(VGA_amd64)
884      const int regsize = 4;
885      int rw;
886      /* push check arg on the stack */
887      sp = sp - regsize;
888      DEBUG(1, "push check arg ptrace_write_memory\n");
889      assert(regsize == sizeof(check));
890      rw = ptrace_write_memory(pid, sp,
891                               &check,
892                               regsize);
893      if (rw != 0) {
894         ERROR(rw, "push check arg ptrace_write_memory");
895         detach_from_all_threads(pid);
896         return False;
897      }
898
899      sp = sp - regsize;
900      DEBUG(1, "push bad_return return address ptrace_write_memory\n");
901      // Note that for a 64 bits vgdb, only 4 bytes of NULL bad_return
902      // are written.
903      rw = ptrace_write_memory(pid, sp,
904                               &bad_return,
905                               regsize);
906      if (rw != 0) {
907         ERROR(rw, "push bad_return return address ptrace_write_memory");
908         detach_from_all_threads(pid);
909         return False;
910      }
911#if   defined(VGA_x86)
912      /* set ebp, esp, eip and orig_eax to invoke gdbserver */
913      // compiled in 32bits, speaking with a 32bits exe
914      user_mod.regs.ebp = sp; // bp set to sp
915      user_mod.regs.esp = sp;
916      user_mod.regs.eip = shared32->invoke_gdbserver;
917      user_mod.regs.orig_eax = -1L;
918#elif defined(VGA_amd64)
919      /* set ebp, esp, eip and orig_eax to invoke gdbserver */
920      // compiled in 64bits, speaking with a 32bits exe
921      user_mod.regs.rbp = sp; // bp set to sp
922      user_mod.regs.rsp = sp;
923      user_mod.regs.rip = shared32->invoke_gdbserver;
924      user_mod.regs.orig_rax = -1L;
925#else
926      I_die_here : not x86 or amd64 in x86/amd64 section/
927#endif
928
929#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
930      user_mod.regs.nip = shared32->invoke_gdbserver;
931      user_mod.regs.trap = -1L;
932      /* put check arg in register 3 */
933      user_mod.regs.gpr[3] = check;
934      /* put NULL return address in Link Register */
935      user_mod.regs.link = bad_return;
936
937#elif defined(VGA_arm)
938      /* put check arg in register 0 */
939      user_mod.regs.uregs[0] = check;
940      /* put NULL return address in Link Register */
941      user_mod.regs.uregs[14] = bad_return;
942      user_mod.regs.uregs[15] = shared32->invoke_gdbserver;
943
944#elif defined(VGA_arm64)
945      XERROR(0, "TBD arm64: vgdb a 32 bits executable with a 64 bits exe");
946
947#elif defined(VGA_s390x)
948      XERROR(0, "(fn32) s390x has no 32bits implementation");
949#elif defined(VGA_mips32)
950      /* put check arg in register 4 */
951      p[4] = check;
952      /* put NULL return address in ra */
953      p[31] = bad_return;
954      p[34] = shared32->invoke_gdbserver;
955      p[25] = shared32->invoke_gdbserver;
956      /* make stack space for args */
957      p[29] = sp - 32;
958
959#elif defined(VGA_mips64)
960      assert(0); // cannot vgdb a 32 bits executable with a 64 bits exe
961#else
962      I_die_here : architecture missing in vgdb.c
963#endif
964      }
965
966   else if (shared64 != NULL) {
967#if defined(VGA_x86)
968      assert(0); // cannot vgdb a 64 bits executable with a 32 bits exe
969#elif defined(VGA_amd64)
970      // vgdb speaking with a 64 bit executable.
971      const int regsize = 8;
972      int rw;
973
974      /* give check arg in rdi */
975      user_mod.regs.rdi = check;
976
977      /* push return address on stack : return to breakaddr */
978      sp = sp - regsize;
979      DEBUG(1, "push bad_return return address ptrace_write_memory\n");
980      rw = ptrace_write_memory(pid, sp,
981                               &bad_return,
982                               sizeof(bad_return));
983      if (rw != 0) {
984         ERROR(rw, "push bad_return return address ptrace_write_memory");
985         detach_from_all_threads(pid);
986         return False;
987      }
988
989      /* set rbp, rsp, rip and orig_rax to invoke gdbserver */
990      user_mod.regs.rbp = sp; // bp set to sp
991      user_mod.regs.rsp = sp;
992      user_mod.regs.rip = shared64->invoke_gdbserver;
993      user_mod.regs.orig_rax = -1L;
994
995#elif defined(VGA_arm)
996      assert(0); // cannot vgdb a 64 bits executable with a 32 bits exe
997#elif defined(VGA_arm64)
998      user_mod.regs[0] = check;
999      user_mod.sp = sp;
1000      user_mod.pc = shared64->invoke_gdbserver;
1001      /* put NULL return address in Link Register */
1002      user_mod.regs[30] = bad_return;
1003
1004#elif defined(VGA_ppc32)
1005      assert(0); // cannot vgdb a 64 bits executable with a 32 bits exe
1006#elif defined(VGA_ppc64be)
1007      Addr64 func_addr;
1008      Addr64 toc_addr;
1009      int rw;
1010      rw = ptrace_read_memory(pid, shared64->invoke_gdbserver,
1011                              &func_addr,
1012                              sizeof(Addr64));
1013      if (rw != 0) {
1014         ERROR(rw, "ppc64 read func_addr\n");
1015         detach_from_all_threads(pid);
1016         return False;
1017      }
1018      rw = ptrace_read_memory(pid, shared64->invoke_gdbserver+8,
1019                              &toc_addr,
1020                              sizeof(Addr64));
1021      if (rw != 0) {
1022         ERROR(rw, "ppc64 read toc_addr\n");
1023         detach_from_all_threads(pid);
1024         return False;
1025      }
1026      // We are not pushing anything on the stack, so it is not
1027      // very clear why the sp has to be decreased, but it seems
1028      // needed. The ppc64 ABI might give some lights on this ?
1029      user_mod.regs.gpr[1] = sp - 220;
1030      user_mod.regs.gpr[2] = toc_addr;
1031      user_mod.regs.nip = func_addr;
1032      user_mod.regs.trap = -1L;
1033      /* put check arg in register 3 */
1034      user_mod.regs.gpr[3] = check;
1035      /* put bad_return return address in Link Register */
1036      user_mod.regs.link = bad_return;
1037#elif defined(VGA_ppc64le)
1038      /* LE does not use the function pointer structure used in BE */
1039      user_mod.regs.nip = shared64->invoke_gdbserver;
1040      user_mod.regs.gpr[1] = sp - 512;
1041      user_mod.regs.gpr[12] = user_mod.regs.nip;
1042      user_mod.regs.trap = -1L;
1043      /* put check arg in register 3 */
1044      user_mod.regs.gpr[3] = check;
1045      /* put bad_return return address in Link Register */
1046      user_mod.regs.link = bad_return;
1047#elif defined(VGA_s390x)
1048      /* put check arg in register r2 */
1049      user_mod.regs.gprs[2] = check;
1050      /* bad_return Return address is in r14 */
1051      user_mod.regs.gprs[14] = bad_return;
1052      /* minimum stack frame */
1053      sp = sp - 160;
1054      user_mod.regs.gprs[15] = sp;
1055      /* set program counter */
1056      user_mod.regs.psw.addr = shared64->invoke_gdbserver;
1057#elif defined(VGA_mips32)
1058      assert(0); // cannot vgdb a 64 bits executable with a 32 bits exe
1059#elif defined(VGA_mips64)
1060      /* put check arg in register 4 */
1061      user_mod.regs[4] = check;
1062      /* put NULL return address in ra */
1063      user_mod.regs[31] = bad_return;
1064      user_mod.regs[34] = shared64->invoke_gdbserver;
1065      user_mod.regs[25] = shared64->invoke_gdbserver;
1066#else
1067      I_die_here: architecture missing in vgdb.c
1068#endif
1069   }
1070   else {
1071      assert(0);
1072   }
1073
1074   if (!setregs(pid, &user_mod.regs, sizeof(user_mod.regs))) {
1075      detach_from_all_threads(pid);
1076      return False;
1077   }
1078   /* Now that we have modified the registers, we set
1079      pid_of_save_regs to indicate that restore_and_detach
1080      must restore the registers in case of cleanup. */
1081   pid_of_save_regs = pid;
1082   pid_of_save_regs_continued = False;
1083
1084
1085   /* We PTRACE_CONT-inue pid.
1086      Either gdbserver will be invoked directly (if all
1087      threads are interruptible) or gdbserver will be
1088      called soon by the scheduler. In the first case,
1089      pid will stop on the break inserted above when
1090      gdbserver returns. In the 2nd case, the break will
1091      be encountered directly. */
1092   DEBUG(1, "PTRACE_CONT to invoke\n");
1093   res = ptrace (PTRACE_CONT, pid, NULL, NULL);
1094   if (res != 0) {
1095      ERROR(errno, "PTRACE_CONT\n");
1096      restore_and_detach(pid);
1097      return False;
1098   }
1099   pid_of_save_regs_continued = True;
1100   /* Wait for SIGSTOP generated by m_gdbserver.c give_control_back_to_vgdb */
1101   stopped = waitstopped (pid, SIGSTOP,
1102                          "waitpid status after PTRACE_CONT to invoke");
1103   if (stopped) {
1104      /* Here pid has properly stopped on the break. */
1105      pid_of_save_regs_continued = False;
1106      restore_and_detach(pid);
1107      return True;
1108   } else {
1109      /* Whatever kind of problem happened. We shutdown. */
1110      shutting_down = True;
1111      return False;
1112   }
1113}
1114
1115void invoker_cleanup_restore_and_detach(void *v_pid)
1116{
1117   DEBUG(1, "invoker_cleanup_restore_and_detach dying: %d\n", dying);
1118   if (!dying)
1119      restore_and_detach(*(int*)v_pid);
1120}
1121
1122void invoker_restrictions_msg(void)
1123{
1124}
1125
1126void invoker_valgrind_dying(void)
1127{
1128   /* Avoid messing up with registers of valgrind when it is dying. */
1129   pid_of_save_regs_continued = False;
1130   dying = True;
1131}
1132