strace.c revision 74ec14f968a418691b851cbbfeb0269174c64b08
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "defs.h"
32#include <stdarg.h>
33#include <sys/param.h>
34#include <fcntl.h>
35#include <sys/resource.h>
36#include <sys/wait.h>
37#include <sys/stat.h>
38#include <pwd.h>
39#include <grp.h>
40#include <dirent.h>
41#include <sys/utsname.h>
42#if defined(IA64)
43# include <asm/ptrace_offsets.h>
44#endif
45/* In some libc, these aren't declared. Do it ourself: */
46extern char **environ;
47extern int optind;
48extern char *optarg;
49
50
51#if defined __NR_tkill
52# define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
53#else
54   /* kill() may choose arbitrarily the target task of the process group
55      while we later wait on a that specific TID.  PID process waits become
56      TID task specific waits for a process under ptrace(2).  */
57# warning "tkill(2) not available, risk of strace hangs!"
58# define my_tkill(tid, sig) kill((tid), (sig))
59#endif
60
61/* Glue for systems without a MMU that cannot provide fork() */
62#if !defined(HAVE_FORK)
63# undef NOMMU_SYSTEM
64# define NOMMU_SYSTEM 1
65#endif
66#if NOMMU_SYSTEM
67# define fork() vfork()
68#endif
69
70cflag_t cflag = CFLAG_NONE;
71unsigned int followfork = 0;
72unsigned int ptrace_setoptions = 0;
73unsigned int xflag = 0;
74bool need_fork_exec_workarounds = 0;
75bool debug_flag = 0;
76bool Tflag = 0;
77bool qflag = 0;
78/* Which WSTOPSIG(status) value marks syscall traps? */
79static unsigned int syscall_trap_sig = SIGTRAP;
80static unsigned int tflag = 0;
81static bool iflag = 0;
82static bool rflag = 0;
83static bool print_pid_pfx = 0;
84
85/* -I n */
86enum {
87    INTR_NOT_SET        = 0,
88    INTR_ANYWHERE       = 1, /* don't block/ignore any signals */
89    INTR_WHILE_WAIT     = 2, /* block fatal signals while decoding syscall. default */
90    INTR_NEVER          = 3, /* block fatal signals. default if '-o FILE PROG' */
91    INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z) */
92    NUM_INTR_OPTS
93};
94static int opt_intr;
95/* We play with signal mask only if this mode is active: */
96#define interactive (opt_intr == INTR_WHILE_WAIT)
97
98/*
99 * daemonized_tracer supports -D option.
100 * With this option, strace forks twice.
101 * Unlike normal case, with -D *grandparent* process exec's,
102 * becoming a traced process. Child exits (this prevents traced process
103 * from having children it doesn't expect to have), and grandchild
104 * attaches to grandparent similarly to strace -p PID.
105 * This allows for more transparent interaction in cases
106 * when process and its parent are communicating via signals,
107 * wait() etc. Without -D, strace process gets lodged in between,
108 * disrupting parent<->child link.
109 */
110static bool daemonized_tracer = 0;
111
112#if USE_SEIZE
113static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
114# define use_seize (post_attach_sigstop == 0)
115#else
116# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
117# define use_seize 0
118#endif
119
120/* Sometimes we want to print only succeeding syscalls. */
121bool not_failing_only = 0;
122
123/* Show path associated with fd arguments */
124bool show_fd_path = 0;
125
126/* are we filtering traces based on paths? */
127bool tracing_paths = 0;
128
129static bool detach_on_execve = 0;
130static bool skip_startup_execve = 0;
131
132static int exit_code = 0;
133static int strace_child = 0;
134static int strace_tracer_pid = 0;
135
136static char *username = NULL;
137static uid_t run_uid;
138static gid_t run_gid;
139
140unsigned int max_strlen = DEFAULT_STRLEN;
141static int acolumn = DEFAULT_ACOLUMN;
142static char *acolumn_spaces;
143
144static char *outfname = NULL;
145/* If -ff, points to stderr. Else, it's our common output log */
146static FILE *shared_log;
147
148struct tcb *printing_tcp = NULL;
149static struct tcb *current_tcp;
150
151static struct tcb **tcbtab;
152static unsigned int nprocs, tcbtabsize;
153static const char *progname;
154
155unsigned os_release; /* generated from uname()'s u.release */
156
157static int detach(struct tcb *tcp);
158static int trace(void);
159static void cleanup(void);
160static void interrupt(int sig);
161static sigset_t empty_set, blocked_set;
162
163#ifdef HAVE_SIG_ATOMIC_T
164static volatile sig_atomic_t interrupted;
165#else
166static volatile int interrupted;
167#endif
168
169#ifndef HAVE_STRERROR
170
171#if !HAVE_DECL_SYS_ERRLIST
172extern int sys_nerr;
173extern char *sys_errlist[];
174#endif
175
176const char *
177strerror(int err_no)
178{
179	static char buf[sizeof("Unknown error %d") + sizeof(int)*3];
180
181	if (err_no < 1 || err_no >= sys_nerr) {
182		sprintf(buf, "Unknown error %d", err_no);
183		return buf;
184	}
185	return sys_errlist[err_no];
186}
187
188#endif /* HAVE_STERRROR */
189
190static void
191usage(FILE *ofp, int exitval)
192{
193	fprintf(ofp, "\
194usage: strace [-CdffhiqrtttTvVxxy] [-I n] [-e expr]...\n\
195              [-a column] [-o file] [-s strsize] [-P path]...\n\
196              -p pid... / [-D] [-E var=val]... [-u username] PROG [ARGS]\n\
197   or: strace -c[df] [-I n] [-e expr]... [-O overhead] [-S sortby]\n\
198              -p pid... / [-D] [-E var=val]... [-u username] PROG [ARGS]\n\
199-c -- count time, calls, and errors for each syscall and report summary\n\
200-C -- like -c but also print regular output\n\
201-d -- enable debug output to stderr\n\
202-D -- run tracer process as a detached grandchild, not as parent\n\
203-f -- follow forks, -ff -- with output into separate files\n\
204-F -- attempt to follow vforks (deprecated, use -f)\n\
205-i -- print instruction pointer at time of syscall\n\
206-q -- suppress messages about attaching, detaching, etc.\n\
207-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
208-T -- print time spent in each syscall\n\
209-v -- verbose mode: print unabbreviated argv, stat, termios, etc. args\n\
210-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
211-y -- print paths associated with file descriptor arguments\n\
212-h -- print help message, -V -- print version\n\
213-a column -- alignment COLUMN for printing syscall results (default %d)\n\
214-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
215   options: trace, abbrev, verbose, raw, signal, read, or write\n\
216-I interruptible --\n\
217   1: no signals are blocked\n\
218   2: fatal signals are blocked while decoding syscall (default)\n\
219   3: fatal signals are always blocked (default if '-o FILE PROG')\n\
220   4: fatal signals and SIGTSTP (^Z) are always blocked\n\
221      (useful to make 'strace -o FILE PROG' not stop on ^Z)\n\
222-o file -- send trace output to FILE instead of stderr\n\
223-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
224-p pid -- trace process with process id PID, may be repeated\n\
225-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
226-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
227-u username -- run command as username handling setuid and/or setgid\n\
228-E var=val -- put var=val in the environment for command\n\
229-E var -- remove var from the environment for command\n\
230-P path -- trace accesses to path\n\
231"
232/* this is broken, so don't document it
233-z -- print only succeeding syscalls\n\
234 */
235/* experimental, don't document it yet (option letter may change in the future!)
236-b -- detach on successful execve\n\
237 */
238, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
239	exit(exitval);
240}
241
242static void die(void) __attribute__ ((noreturn));
243static void die(void)
244{
245	if (strace_tracer_pid == getpid()) {
246		cflag = 0;
247		cleanup();
248	}
249	exit(1);
250}
251
252static void verror_msg(int err_no, const char *fmt, va_list p)
253{
254	char *msg;
255
256	fflush(NULL);
257
258	/* We want to print entire message with single fprintf to ensure
259	 * message integrity if stderr is shared with other programs.
260	 * Thus we use vasprintf + single fprintf.
261	 */
262	msg = NULL;
263	if (vasprintf(&msg, fmt, p) >= 0) {
264		if (err_no)
265			fprintf(stderr, "%s: %s: %s\n", progname, msg, strerror(err_no));
266		else
267			fprintf(stderr, "%s: %s\n", progname, msg);
268		free(msg);
269	} else {
270		/* malloc in vasprintf failed, try it without malloc */
271		fprintf(stderr, "%s: ", progname);
272		vfprintf(stderr, fmt, p);
273		if (err_no)
274			fprintf(stderr, ": %s\n", strerror(err_no));
275		else
276			putc('\n', stderr);
277	}
278	/* We don't switch stderr to buffered, thus fprintf(stderr)
279	 * always flushes its output and this is not necessary: */
280	/* fflush(stderr); */
281}
282
283void error_msg(const char *fmt, ...)
284{
285	va_list p;
286	va_start(p, fmt);
287	verror_msg(0, fmt, p);
288	va_end(p);
289}
290
291void error_msg_and_die(const char *fmt, ...)
292{
293	va_list p;
294	va_start(p, fmt);
295	verror_msg(0, fmt, p);
296	die();
297}
298
299void perror_msg(const char *fmt, ...)
300{
301	va_list p;
302	va_start(p, fmt);
303	verror_msg(errno, fmt, p);
304	va_end(p);
305}
306
307void perror_msg_and_die(const char *fmt, ...)
308{
309	va_list p;
310	va_start(p, fmt);
311	verror_msg(errno, fmt, p);
312	die();
313}
314
315void die_out_of_memory(void)
316{
317	static bool recursed = 0;
318	if (recursed)
319		exit(1);
320	recursed = 1;
321	error_msg_and_die("Out of memory");
322}
323
324static void
325error_opt_arg(int opt, const char *arg)
326{
327	error_msg_and_die("Invalid -%c argument: '%s'", opt, arg);
328}
329
330#if USE_SEIZE
331static int
332ptrace_attach_or_seize(int pid)
333{
334	int r;
335	if (!use_seize)
336		return ptrace(PTRACE_ATTACH, pid, 0, 0);
337	r = ptrace(PTRACE_SEIZE, pid, 0, 0);
338	if (r)
339		return r;
340	r = ptrace(PTRACE_INTERRUPT, pid, 0, 0);
341	return r;
342}
343#else
344# define ptrace_attach_or_seize(pid) ptrace(PTRACE_ATTACH, (pid), 0, 0)
345#endif
346
347/*
348 * Used when we want to unblock stopped traced process.
349 * Should be only used with PTRACE_CONT, PTRACE_DETACH and PTRACE_SYSCALL.
350 * Returns 0 on success or if error was ESRCH
351 * (presumably process was killed while we talk to it).
352 * Otherwise prints error message and returns -1.
353 */
354static int
355ptrace_restart(int op, struct tcb *tcp, int sig)
356{
357	int err;
358	const char *msg;
359
360	errno = 0;
361	ptrace(op, tcp->pid, (void *) 0, (long) sig);
362	err = errno;
363	if (!err)
364		return 0;
365
366	msg = "SYSCALL";
367	if (op == PTRACE_CONT)
368		msg = "CONT";
369	if (op == PTRACE_DETACH)
370		msg = "DETACH";
371#ifdef PTRACE_LISTEN
372	if (op == PTRACE_LISTEN)
373		msg = "LISTEN";
374#endif
375	/*
376	 * Why curcol != 0? Otherwise sometimes we get this:
377	 *
378	 * 10252 kill(10253, SIGKILL)              = 0
379	 *  <ptrace(SYSCALL,10252):No such process>10253 ...next decode...
380	 *
381	 * 10252 died after we retrieved syscall exit data,
382	 * but before we tried to restart it. Log looks ugly.
383	 */
384	if (current_tcp && current_tcp->curcol != 0) {
385		tprintf(" <ptrace(%s):%s>\n", msg, strerror(err));
386		line_ended();
387	}
388	if (err == ESRCH)
389		return 0;
390	errno = err;
391	perror_msg("ptrace(PTRACE_%s,pid:%d,sig:%d)", msg, tcp->pid, sig);
392	return -1;
393}
394
395static void
396set_cloexec_flag(int fd)
397{
398	int flags, newflags;
399
400	flags = fcntl(fd, F_GETFD);
401	if (flags < 0) {
402		/* Can happen only if fd is bad.
403		 * Should never happen: if it does, we have a bug
404		 * in the caller. Therefore we just abort
405		 * instead of propagating the error.
406		 */
407		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
408	}
409
410	newflags = flags | FD_CLOEXEC;
411	if (flags == newflags)
412		return;
413
414	fcntl(fd, F_SETFD, newflags); /* never fails */
415}
416
417static void kill_save_errno(pid_t pid, int sig)
418{
419	int saved_errno = errno;
420
421	(void) kill(pid, sig);
422	errno = saved_errno;
423}
424
425/*
426 * When strace is setuid executable, we have to swap uids
427 * before and after filesystem and process management operations.
428 */
429static void
430swap_uid(void)
431{
432	int euid = geteuid(), uid = getuid();
433
434	if (euid != uid && setreuid(euid, uid) < 0) {
435		perror_msg_and_die("setreuid");
436	}
437}
438
439#if _LFS64_LARGEFILE
440# define fopen_for_output fopen64
441#else
442# define fopen_for_output fopen
443#endif
444
445static FILE *
446strace_fopen(const char *path)
447{
448	FILE *fp;
449
450	swap_uid();
451	fp = fopen_for_output(path, "w");
452	if (!fp)
453		perror_msg_and_die("Can't fopen '%s'", path);
454	swap_uid();
455	set_cloexec_flag(fileno(fp));
456	return fp;
457}
458
459static int popen_pid = 0;
460
461#ifndef _PATH_BSHELL
462# define _PATH_BSHELL "/bin/sh"
463#endif
464
465/*
466 * We cannot use standard popen(3) here because we have to distinguish
467 * popen child process from other processes we trace, and standard popen(3)
468 * does not export its child's pid.
469 */
470static FILE *
471strace_popen(const char *command)
472{
473	FILE *fp;
474	int fds[2];
475
476	swap_uid();
477	if (pipe(fds) < 0)
478		perror_msg_and_die("pipe");
479
480	set_cloexec_flag(fds[1]); /* never fails */
481
482	popen_pid = vfork();
483	if (popen_pid == -1)
484		perror_msg_and_die("vfork");
485
486	if (popen_pid == 0) {
487		/* child */
488		close(fds[1]);
489		if (fds[0] != 0) {
490			if (dup2(fds[0], 0))
491				perror_msg_and_die("dup2");
492			close(fds[0]);
493		}
494		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
495		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
496	}
497
498	/* parent */
499	close(fds[0]);
500	swap_uid();
501	fp = fdopen(fds[1], "w");
502	if (!fp)
503		die_out_of_memory();
504	return fp;
505}
506
507void
508tprintf(const char *fmt, ...)
509{
510	va_list args;
511
512	va_start(args, fmt);
513	if (current_tcp) {
514		int n = strace_vfprintf(current_tcp->outf, fmt, args);
515		if (n < 0) {
516			if (current_tcp->outf != stderr)
517				perror_msg("%s", outfname);
518		} else
519			current_tcp->curcol += n;
520	}
521	va_end(args);
522}
523
524void
525tprints(const char *str)
526{
527	if (current_tcp) {
528		int n = fputs_unlocked(str, current_tcp->outf);
529		if (n >= 0) {
530			current_tcp->curcol += strlen(str);
531			return;
532		}
533		if (current_tcp->outf != stderr)
534			perror_msg("%s", outfname);
535	}
536}
537
538void
539line_ended(void)
540{
541	if (current_tcp) {
542		current_tcp->curcol = 0;
543		fflush(current_tcp->outf);
544	}
545	if (printing_tcp) {
546		printing_tcp->curcol = 0;
547		printing_tcp = NULL;
548	}
549}
550
551void
552printleader(struct tcb *tcp)
553{
554	/* If -ff, "previous tcb we printed" is always the same as current,
555	 * because we have per-tcb output files.
556	 */
557	if (followfork >= 2)
558		printing_tcp = tcp;
559
560	if (printing_tcp) {
561		current_tcp = printing_tcp;
562		if (printing_tcp->curcol != 0 && (followfork < 2 || printing_tcp == tcp)) {
563			/*
564			 * case 1: we have a shared log (i.e. not -ff), and last line
565			 * wasn't finished (same or different tcb, doesn't matter).
566			 * case 2: split log, we are the same tcb, but our last line
567			 * didn't finish ("SIGKILL nuked us after syscall entry" etc).
568			 */
569			tprints(" <unfinished ...>\n");
570			printing_tcp->curcol = 0;
571		}
572	}
573
574	printing_tcp = tcp;
575	current_tcp = tcp;
576	current_tcp->curcol = 0;
577
578	if (print_pid_pfx)
579		tprintf("%-5d ", tcp->pid);
580	else if (nprocs > 1 && !outfname)
581		tprintf("[pid %5u] ", tcp->pid);
582
583	if (tflag) {
584		char str[sizeof("HH:MM:SS")];
585		struct timeval tv, dtv;
586		static struct timeval otv;
587
588		gettimeofday(&tv, NULL);
589		if (rflag) {
590			if (otv.tv_sec == 0)
591				otv = tv;
592			tv_sub(&dtv, &tv, &otv);
593			tprintf("%6ld.%06ld ",
594				(long) dtv.tv_sec, (long) dtv.tv_usec);
595			otv = tv;
596		}
597		else if (tflag > 2) {
598			tprintf("%ld.%06ld ",
599				(long) tv.tv_sec, (long) tv.tv_usec);
600		}
601		else {
602			time_t local = tv.tv_sec;
603			strftime(str, sizeof(str), "%T", localtime(&local));
604			if (tflag > 1)
605				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
606			else
607				tprintf("%s ", str);
608		}
609	}
610	if (iflag)
611		printcall(tcp);
612}
613
614void
615tabto(void)
616{
617	if (current_tcp->curcol < acolumn)
618		tprints(acolumn_spaces + current_tcp->curcol);
619}
620
621/* Should be only called directly *after successful attach* to a tracee.
622 * Otherwise, "strace -oFILE -ff -p<nonexistant_pid>"
623 * may create bogus empty FILE.<nonexistant_pid>, and then die.
624 */
625static void
626newoutf(struct tcb *tcp)
627{
628	tcp->outf = shared_log; /* if not -ff mode, the same file is for all */
629	if (followfork >= 2) {
630		char name[520 + sizeof(int) * 3];
631		sprintf(name, "%.512s.%u", outfname, tcp->pid);
632		tcp->outf = strace_fopen(name);
633	}
634}
635
636static void
637expand_tcbtab(void)
638{
639	/* Allocate some more TCBs and expand the table.
640	   We don't want to relocate the TCBs because our
641	   callers have pointers and it would be a pain.
642	   So tcbtab is a table of pointers.  Since we never
643	   free the TCBs, we allocate a single chunk of many.  */
644	int i = tcbtabsize;
645	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
646	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
647	if (!newtab || !newtcbs)
648		die_out_of_memory();
649	tcbtabsize *= 2;
650	tcbtab = newtab;
651	while (i < tcbtabsize)
652		tcbtab[i++] = newtcbs++;
653}
654
655static struct tcb *
656alloctcb(int pid)
657{
658	int i;
659	struct tcb *tcp;
660
661	if (nprocs == tcbtabsize)
662		expand_tcbtab();
663
664	for (i = 0; i < tcbtabsize; i++) {
665		tcp = tcbtab[i];
666		if ((tcp->flags & TCB_INUSE) == 0) {
667			memset(tcp, 0, sizeof(*tcp));
668			tcp->pid = pid;
669			tcp->flags = TCB_INUSE;
670#if SUPPORTED_PERSONALITIES > 1
671			tcp->currpers = current_personality;
672#endif
673			nprocs++;
674			if (debug_flag)
675				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
676			return tcp;
677		}
678	}
679	error_msg_and_die("bug in alloctcb");
680}
681
682static void
683droptcb(struct tcb *tcp)
684{
685	if (tcp->pid == 0)
686		return;
687
688	nprocs--;
689	if (debug_flag)
690		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
691
692	if (tcp->outf) {
693		if (followfork >= 2) {
694			if (tcp->curcol != 0)
695				fprintf(tcp->outf, " <detached ...>\n");
696			fclose(tcp->outf);
697		} else {
698			if (printing_tcp == tcp && tcp->curcol != 0)
699				fprintf(tcp->outf, " <detached ...>\n");
700			fflush(tcp->outf);
701		}
702	}
703
704	if (current_tcp == tcp)
705		current_tcp = NULL;
706	if (printing_tcp == tcp)
707		printing_tcp = NULL;
708
709	memset(tcp, 0, sizeof(*tcp));
710}
711
712/* detach traced process; continue with sig
713 * Never call DETACH twice on the same process as both unattached and
714 * attached-unstopped processes give the same ESRCH.  For unattached process we
715 * would SIGSTOP it and wait for its SIGSTOP notification forever.
716 */
717static int
718detach(struct tcb *tcp)
719{
720	int error;
721	int status, sigstop_expected;
722
723	if (tcp->flags & TCB_BPTSET)
724		clearbpt(tcp);
725
726	/*
727	 * Linux wrongly insists the child be stopped
728	 * before detaching.  Arghh.  We go through hoops
729	 * to make a clean break of things.
730	 */
731#if defined(SPARC)
732# undef PTRACE_DETACH
733# define PTRACE_DETACH PTRACE_SUNDETACH
734#endif
735
736	error = 0;
737	sigstop_expected = 0;
738	if (tcp->flags & TCB_ATTACHED) {
739		/*
740		 * We attached but possibly didn't see the expected SIGSTOP.
741		 * We must catch exactly one as otherwise the detached process
742		 * would be left stopped (process state T).
743		 */
744		sigstop_expected = (tcp->flags & TCB_IGNORE_ONE_SIGSTOP);
745		error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, 0);
746		if (error == 0) {
747			/* On a clear day, you can see forever. */
748		}
749		else if (errno != ESRCH) {
750			/* Shouldn't happen. */
751			perror_msg("%s", "detach: ptrace(PTRACE_DETACH, ...)");
752		}
753		else if (my_tkill(tcp->pid, 0) < 0) {
754			if (errno != ESRCH)
755				perror_msg("%s", "detach: checking sanity");
756		}
757		else if (!sigstop_expected && my_tkill(tcp->pid, SIGSTOP) < 0) {
758			if (errno != ESRCH)
759				perror_msg("%s", "detach: stopping child");
760		}
761		else
762			sigstop_expected = 1;
763	}
764
765	if (sigstop_expected) {
766		for (;;) {
767#ifdef __WALL
768			if (waitpid(tcp->pid, &status, __WALL) < 0) {
769				if (errno == ECHILD) /* Already gone.  */
770					break;
771				if (errno != EINVAL) {
772					perror_msg("%s", "detach: waiting");
773					break;
774				}
775#endif /* __WALL */
776				/* No __WALL here.  */
777				if (waitpid(tcp->pid, &status, 0) < 0) {
778					if (errno != ECHILD) {
779						perror_msg("%s", "detach: waiting");
780						break;
781					}
782#ifdef __WCLONE
783					/* If no processes, try clones.  */
784					if (waitpid(tcp->pid, &status, __WCLONE) < 0) {
785						if (errno != ECHILD)
786							perror_msg("%s", "detach: waiting");
787						break;
788					}
789#endif /* __WCLONE */
790				}
791#ifdef __WALL
792			}
793#endif
794			if (!WIFSTOPPED(status)) {
795				/* Au revoir, mon ami. */
796				break;
797			}
798			if (WSTOPSIG(status) == SIGSTOP) {
799				ptrace_restart(PTRACE_DETACH, tcp, 0);
800				break;
801			}
802			error = ptrace_restart(PTRACE_CONT, tcp,
803					WSTOPSIG(status) == syscall_trap_sig ? 0
804					: WSTOPSIG(status));
805			if (error < 0)
806				break;
807		}
808	}
809
810	if (!qflag && (tcp->flags & TCB_ATTACHED))
811		fprintf(stderr, "Process %u detached\n", tcp->pid);
812
813	droptcb(tcp);
814
815	return error;
816}
817
818static void
819process_opt_p_list(char *opt)
820{
821	while (*opt) {
822		/*
823		 * We accept -p PID,PID; -p "`pidof PROG`"; -p "`pgrep PROG`".
824		 * pidof uses space as delim, pgrep uses newline. :(
825		 */
826		int pid;
827		char *delim = opt + strcspn(opt, ", \n\t");
828		char c = *delim;
829
830		*delim = '\0';
831		pid = string_to_uint(opt);
832		if (pid <= 0) {
833			error_msg_and_die("Invalid process id: '%s'", opt);
834		}
835		if (pid == strace_tracer_pid) {
836			error_msg_and_die("I'm sorry, I can't let you do that, Dave.");
837		}
838		*delim = c;
839		alloctcb(pid);
840		if (c == '\0')
841			break;
842		opt = delim + 1;
843	}
844}
845
846static void
847startup_attach(void)
848{
849	int tcbi;
850	struct tcb *tcp;
851
852	/*
853	 * Block user interruptions as we would leave the traced
854	 * process stopped (process state T) if we would terminate in
855	 * between PTRACE_ATTACH and wait4() on SIGSTOP.
856	 * We rely on cleanup() from this point on.
857	 */
858	if (interactive)
859		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
860
861	if (daemonized_tracer) {
862		pid_t pid = fork();
863		if (pid < 0) {
864			perror_msg_and_die("fork");
865		}
866		if (pid) { /* parent */
867			/*
868			 * Wait for grandchild to attach to straced process
869			 * (grandparent). Grandchild SIGKILLs us after it attached.
870			 * Grandparent's wait() is unblocked by our death,
871			 * it proceeds to exec the straced program.
872			 */
873			pause();
874			_exit(0); /* paranoia */
875		}
876		/* grandchild */
877		/* We will be the tracer process. Remember our new pid: */
878		strace_tracer_pid = getpid();
879	}
880
881	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
882		tcp = tcbtab[tcbi];
883
884		if (!(tcp->flags & TCB_INUSE))
885			continue;
886
887		/* Is this a process we should attach to, but not yet attached? */
888		if (tcp->flags & TCB_ATTACHED)
889			continue; /* no, we already attached it */
890
891		if (followfork && !daemonized_tracer) {
892			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
893			DIR *dir;
894
895			sprintf(procdir, "/proc/%d/task", tcp->pid);
896			dir = opendir(procdir);
897			if (dir != NULL) {
898				unsigned int ntid = 0, nerr = 0;
899				struct dirent *de;
900
901				while ((de = readdir(dir)) != NULL) {
902					struct tcb *cur_tcp;
903					int tid;
904
905					if (de->d_fileno == 0)
906						continue;
907					/* we trust /proc filesystem */
908					tid = atoi(de->d_name);
909					if (tid <= 0)
910						continue;
911					++ntid;
912					if (ptrace_attach_or_seize(tid) < 0) {
913						++nerr;
914						if (debug_flag)
915							fprintf(stderr, "attach to pid %d failed\n", tid);
916						continue;
917					}
918					if (debug_flag)
919						fprintf(stderr, "attach to pid %d succeeded\n", tid);
920					cur_tcp = tcp;
921					if (tid != tcp->pid)
922						cur_tcp = alloctcb(tid);
923					cur_tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
924					newoutf(cur_tcp);
925				}
926				closedir(dir);
927				if (interactive) {
928					sigprocmask(SIG_SETMASK, &empty_set, NULL);
929					if (interrupted)
930						goto ret;
931					sigprocmask(SIG_BLOCK, &blocked_set, NULL);
932				}
933				ntid -= nerr;
934				if (ntid == 0) {
935					perror_msg("%s", "attach: ptrace(PTRACE_ATTACH, ...)");
936					droptcb(tcp);
937					continue;
938				}
939				if (!qflag) {
940					fprintf(stderr, ntid > 1
941? "Process %u attached with %u threads\n"
942: "Process %u attached\n",
943						tcp->pid, ntid);
944				}
945				if (!(tcp->flags & TCB_ATTACHED)) {
946					/* -p PID, we failed to attach to PID itself
947					 * but did attach to some of its sibling threads.
948					 * Drop PID's tcp.
949					 */
950					droptcb(tcp);
951				}
952				continue;
953			} /* if (opendir worked) */
954		} /* if (-f) */
955		if (ptrace_attach_or_seize(tcp->pid) < 0) {
956			perror_msg("%s", "attach: ptrace(PTRACE_ATTACH, ...)");
957			droptcb(tcp);
958			continue;
959		}
960		tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
961		newoutf(tcp);
962		if (debug_flag)
963			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
964
965		if (daemonized_tracer) {
966			/*
967			 * Make parent go away.
968			 * Also makes grandparent's wait() unblock.
969			 */
970			kill(getppid(), SIGKILL);
971		}
972
973		if (!qflag)
974			fprintf(stderr,
975				"Process %u attached\n",
976				tcp->pid);
977	} /* for each tcbtab[] */
978
979 ret:
980	if (interactive)
981		sigprocmask(SIG_SETMASK, &empty_set, NULL);
982}
983
984/* Stack-o-phobic exec helper, in the hope to work around
985 * NOMMU + "daemonized tracer" difficulty.
986 */
987struct exec_params {
988	int fd_to_close;
989	uid_t run_euid;
990	gid_t run_egid;
991	char **argv;
992	char *pathname;
993};
994static struct exec_params params_for_tracee;
995static void __attribute__ ((noinline, noreturn))
996exec_or_die(void)
997{
998	struct exec_params *params = &params_for_tracee;
999
1000	if (params->fd_to_close >= 0)
1001		close(params->fd_to_close);
1002	if (!daemonized_tracer && !use_seize) {
1003		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0) {
1004			perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
1005		}
1006	}
1007
1008	if (username != NULL) {
1009		/*
1010		 * It is important to set groups before we
1011		 * lose privileges on setuid.
1012		 */
1013		if (initgroups(username, run_gid) < 0) {
1014			perror_msg_and_die("initgroups");
1015		}
1016		if (setregid(run_gid, params->run_egid) < 0) {
1017			perror_msg_and_die("setregid");
1018		}
1019		if (setreuid(run_uid, params->run_euid) < 0) {
1020			perror_msg_and_die("setreuid");
1021		}
1022	}
1023	else if (geteuid() != 0)
1024		if (setreuid(run_uid, run_uid) < 0) {
1025			perror_msg_and_die("setreuid");
1026		}
1027
1028	if (!daemonized_tracer) {
1029		/*
1030		 * Induce a ptrace stop. Tracer (our parent)
1031		 * will resume us with PTRACE_SYSCALL and display
1032		 * the immediately following execve syscall.
1033		 * Can't do this on NOMMU systems, we are after
1034		 * vfork: parent is blocked, stopping would deadlock.
1035		 */
1036		if (!NOMMU_SYSTEM)
1037			kill(getpid(), SIGSTOP);
1038	} else {
1039		alarm(3);
1040		/* we depend on SIGCHLD set to SIG_DFL by init code */
1041		/* if it happens to be SIG_IGN'ed, wait won't block */
1042		wait(NULL);
1043		alarm(0);
1044	}
1045
1046	execv(params->pathname, params->argv);
1047	perror_msg_and_die("exec");
1048}
1049
1050static void
1051startup_child(char **argv)
1052{
1053	struct stat statbuf;
1054	const char *filename;
1055	char pathname[MAXPATHLEN];
1056	int pid;
1057	struct tcb *tcp;
1058
1059	filename = argv[0];
1060	if (strchr(filename, '/')) {
1061		if (strlen(filename) > sizeof pathname - 1) {
1062			errno = ENAMETOOLONG;
1063			perror_msg_and_die("exec");
1064		}
1065		strcpy(pathname, filename);
1066	}
1067#ifdef USE_DEBUGGING_EXEC
1068	/*
1069	 * Debuggers customarily check the current directory
1070	 * first regardless of the path but doing that gives
1071	 * security geeks a panic attack.
1072	 */
1073	else if (stat(filename, &statbuf) == 0)
1074		strcpy(pathname, filename);
1075#endif /* USE_DEBUGGING_EXEC */
1076	else {
1077		const char *path;
1078		int m, n, len;
1079
1080		for (path = getenv("PATH"); path && *path; path += m) {
1081			const char *colon = strchr(path, ':');
1082			if (colon) {
1083				n = colon - path;
1084				m = n + 1;
1085			}
1086			else
1087				m = n = strlen(path);
1088			if (n == 0) {
1089				if (!getcwd(pathname, MAXPATHLEN))
1090					continue;
1091				len = strlen(pathname);
1092			}
1093			else if (n > sizeof pathname - 1)
1094				continue;
1095			else {
1096				strncpy(pathname, path, n);
1097				len = n;
1098			}
1099			if (len && pathname[len - 1] != '/')
1100				pathname[len++] = '/';
1101			strcpy(pathname + len, filename);
1102			if (stat(pathname, &statbuf) == 0 &&
1103			    /* Accept only regular files
1104			       with some execute bits set.
1105			       XXX not perfect, might still fail */
1106			    S_ISREG(statbuf.st_mode) &&
1107			    (statbuf.st_mode & 0111))
1108				break;
1109		}
1110	}
1111	if (stat(pathname, &statbuf) < 0) {
1112		perror_msg_and_die("Can't stat '%s'", filename);
1113	}
1114
1115	params_for_tracee.fd_to_close = (shared_log != stderr) ? fileno(shared_log) : -1;
1116	params_for_tracee.run_euid = (statbuf.st_mode & S_ISUID) ? statbuf.st_uid : run_uid;
1117	params_for_tracee.run_egid = (statbuf.st_mode & S_ISGID) ? statbuf.st_gid : run_gid;
1118	params_for_tracee.argv = argv;
1119	/*
1120	 * On NOMMU, can be safely freed only after execve in tracee.
1121	 * It's hard to know when that happens, so we just leak it.
1122	 */
1123	params_for_tracee.pathname = strdup(pathname);
1124
1125	strace_child = pid = fork();
1126	if (pid < 0) {
1127		perror_msg_and_die("fork");
1128	}
1129	if ((pid != 0 && daemonized_tracer)
1130	 || (pid == 0 && !daemonized_tracer)
1131	) {
1132		/* We are to become the tracee. Two cases:
1133		 * -D: we are parent
1134		 * not -D: we are child
1135		 */
1136		exec_or_die();
1137	}
1138
1139	/* We are the tracer */
1140
1141	if (!daemonized_tracer) {
1142		if (!use_seize) {
1143			/* child did PTRACE_TRACEME, nothing to do in parent */
1144		} else {
1145			if (!NOMMU_SYSTEM) {
1146				/* Wait until child stopped itself */
1147				int status;
1148				while (waitpid(pid, &status, WSTOPPED) < 0) {
1149					if (errno == EINTR)
1150						continue;
1151					perror_msg_and_die("waitpid");
1152				}
1153				if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
1154					kill_save_errno(pid, SIGKILL);
1155					perror_msg_and_die("Unexpected wait status %x", status);
1156				}
1157			}
1158			/* Else: NOMMU case, we have no way to sync.
1159			 * Just attach to it as soon as possible.
1160			 * This means that we may miss a few first syscalls...
1161			 */
1162
1163			if (ptrace_attach_or_seize(pid)) {
1164				kill_save_errno(pid, SIGKILL);
1165				perror_msg_and_die("Can't attach to %d", pid);
1166			}
1167			if (!NOMMU_SYSTEM)
1168				kill(pid, SIGCONT);
1169		}
1170		tcp = alloctcb(pid);
1171		if (!NOMMU_SYSTEM)
1172			tcp->flags |= TCB_ATTACHED | TCB_STRACE_CHILD | TCB_STARTUP | post_attach_sigstop;
1173		else
1174			tcp->flags |= TCB_ATTACHED | TCB_STRACE_CHILD | TCB_STARTUP;
1175		newoutf(tcp);
1176	}
1177	else {
1178		/* With -D, *we* are child here, IOW: different pid. Fetch it: */
1179		strace_tracer_pid = getpid();
1180		/* The tracee is our parent: */
1181		pid = getppid();
1182		alloctcb(pid);
1183		/* attaching will be done later, by startup_attach */
1184		/* note: we don't do newoutf(tcp) here either! */
1185
1186		/* NOMMU BUG! -D mode is active, we (child) return,
1187		 * and we will scribble over parent's stack!
1188		 * When parent later unpauses, it segfaults.
1189		 *
1190		 * We work around it
1191		 * (1) by declaring exec_or_die() NORETURN,
1192		 * hopefully compiler will just jump to it
1193		 * instead of call (won't push anything to stack),
1194		 * (2) by trying very hard in exec_or_die()
1195		 * to not use any stack,
1196		 * (3) having a really big (MAXPATHLEN) stack object
1197		 * in this function, which creates a "buffer" between
1198		 * child's and parent's stack pointers.
1199		 * This may save us if (1) and (2) failed
1200		 * and compiler decided to use stack in exec_or_die() anyway
1201		 * (happens on i386 because of stack parameter passing).
1202		 */
1203	}
1204}
1205
1206/*
1207 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
1208 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
1209 * and then see which options are supported by the kernel.
1210 */
1211static int
1212test_ptrace_setoptions_followfork(void)
1213{
1214	int pid, expected_grandchild = 0, found_grandchild = 0;
1215	const unsigned int test_options = PTRACE_O_TRACECLONE |
1216					  PTRACE_O_TRACEFORK |
1217					  PTRACE_O_TRACEVFORK;
1218
1219	pid = fork();
1220	if (pid < 0)
1221		perror_msg_and_die("fork");
1222	if (pid == 0) {
1223		pid = getpid();
1224		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
1225			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
1226					   __func__);
1227		kill_save_errno(pid, SIGSTOP);
1228		if (fork() < 0)
1229			perror_msg_and_die("fork");
1230		_exit(0);
1231	}
1232
1233	while (1) {
1234		int status, tracee_pid;
1235
1236		errno = 0;
1237		tracee_pid = wait(&status);
1238		if (tracee_pid <= 0) {
1239			if (errno == EINTR)
1240				continue;
1241			if (errno == ECHILD)
1242				break;
1243			kill_save_errno(pid, SIGKILL);
1244			perror_msg_and_die("%s: unexpected wait result %d",
1245					   __func__, tracee_pid);
1246		}
1247		if (WIFEXITED(status)) {
1248			if (WEXITSTATUS(status)) {
1249				if (tracee_pid != pid)
1250					kill_save_errno(pid, SIGKILL);
1251				error_msg_and_die("%s: unexpected exit status %u",
1252						  __func__, WEXITSTATUS(status));
1253			}
1254			continue;
1255		}
1256		if (WIFSIGNALED(status)) {
1257			if (tracee_pid != pid)
1258				kill_save_errno(pid, SIGKILL);
1259			error_msg_and_die("%s: unexpected signal %u",
1260					  __func__, WTERMSIG(status));
1261		}
1262		if (!WIFSTOPPED(status)) {
1263			if (tracee_pid != pid)
1264				kill_save_errno(tracee_pid, SIGKILL);
1265			kill_save_errno(pid, SIGKILL);
1266			error_msg_and_die("%s: unexpected wait status %x",
1267					  __func__, status);
1268		}
1269		if (tracee_pid != pid) {
1270			found_grandchild = tracee_pid;
1271			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
1272				kill_save_errno(tracee_pid, SIGKILL);
1273				kill_save_errno(pid, SIGKILL);
1274				perror_msg_and_die("PTRACE_CONT doesn't work");
1275			}
1276			continue;
1277		}
1278		switch (WSTOPSIG(status)) {
1279		case SIGSTOP:
1280			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
1281			    && errno != EINVAL && errno != EIO)
1282				perror_msg("PTRACE_SETOPTIONS");
1283			break;
1284		case SIGTRAP:
1285			if (status >> 16 == PTRACE_EVENT_FORK) {
1286				long msg = 0;
1287
1288				if (ptrace(PTRACE_GETEVENTMSG, pid,
1289					   NULL, (long) &msg) == 0)
1290					expected_grandchild = msg;
1291			}
1292			break;
1293		}
1294		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
1295			kill_save_errno(pid, SIGKILL);
1296			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
1297		}
1298	}
1299	if (expected_grandchild && expected_grandchild == found_grandchild) {
1300		ptrace_setoptions |= test_options;
1301		if (debug_flag)
1302			fprintf(stderr, "ptrace_setoptions = %#x\n",
1303				ptrace_setoptions);
1304		return 0;
1305	}
1306	error_msg("Test for PTRACE_O_TRACECLONE failed, "
1307		  "giving up using this feature.");
1308	return 1;
1309}
1310
1311/*
1312 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
1313 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
1314 * and then see whether it will stop with (SIGTRAP | 0x80).
1315 *
1316 * Use of this option enables correct handling of user-generated SIGTRAPs,
1317 * and SIGTRAPs generated by special instructions such as int3 on x86:
1318 * _start:	.globl	_start
1319 *		int3
1320 *		movl	$42, %ebx
1321 *		movl	$1, %eax
1322 *		int	$0x80
1323 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
1324 */
1325static int
1326test_ptrace_setoptions_for_all(void)
1327{
1328	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
1329					  PTRACE_O_TRACEEXEC;
1330	int pid;
1331	int it_worked = 0;
1332
1333	/* this fork test doesn't work on no-mmu systems */
1334	if (NOMMU_SYSTEM)
1335		return 0; /* be bold, and pretend that test succeeded */
1336
1337	pid = fork();
1338	if (pid < 0)
1339		perror_msg_and_die("fork");
1340
1341	if (pid == 0) {
1342		pid = getpid();
1343		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
1344			/* Note: exits with exitcode 1 */
1345			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
1346					   __func__);
1347		kill(pid, SIGSTOP);
1348		_exit(0); /* parent should see entry into this syscall */
1349	}
1350
1351	while (1) {
1352		int status, tracee_pid;
1353
1354		errno = 0;
1355		tracee_pid = wait(&status);
1356		if (tracee_pid <= 0) {
1357			if (errno == EINTR)
1358				continue;
1359			kill_save_errno(pid, SIGKILL);
1360			perror_msg_and_die("%s: unexpected wait result %d",
1361					   __func__, tracee_pid);
1362		}
1363		if (WIFEXITED(status)) {
1364			if (WEXITSTATUS(status) == 0)
1365				break;
1366			error_msg_and_die("%s: unexpected exit status %u",
1367					  __func__, WEXITSTATUS(status));
1368		}
1369		if (WIFSIGNALED(status)) {
1370			error_msg_and_die("%s: unexpected signal %u",
1371					  __func__, WTERMSIG(status));
1372		}
1373		if (!WIFSTOPPED(status)) {
1374			kill(pid, SIGKILL);
1375			error_msg_and_die("%s: unexpected wait status %x",
1376					  __func__, status);
1377		}
1378		if (WSTOPSIG(status) == SIGSTOP) {
1379			/*
1380			 * We don't check "options aren't accepted" error.
1381			 * If it happens, we'll never get (SIGTRAP | 0x80),
1382			 * and thus will decide to not use the option.
1383			 * IOW: the outcome of the test will be correct.
1384			 */
1385			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
1386			    && errno != EINVAL && errno != EIO)
1387				perror_msg("PTRACE_SETOPTIONS");
1388		}
1389		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
1390			it_worked = 1;
1391		}
1392		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
1393			kill_save_errno(pid, SIGKILL);
1394			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
1395		}
1396	}
1397
1398	if (it_worked) {
1399		syscall_trap_sig = (SIGTRAP | 0x80);
1400		ptrace_setoptions |= test_options;
1401		if (debug_flag)
1402			fprintf(stderr, "ptrace_setoptions = %#x\n",
1403				ptrace_setoptions);
1404		return 0;
1405	}
1406
1407	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
1408		  "giving up using this feature.");
1409	return 1;
1410}
1411
1412#if USE_SEIZE
1413static void
1414test_ptrace_seize(void)
1415{
1416	int pid;
1417
1418	/* this fork test doesn't work on no-mmu systems */
1419	if (NOMMU_SYSTEM) {
1420		post_attach_sigstop = 0; /* this sets use_seize to 1 */
1421		return;
1422	}
1423
1424	pid = fork();
1425	if (pid < 0)
1426		perror_msg_and_die("fork");
1427
1428	if (pid == 0) {
1429		pause();
1430		_exit(0);
1431	}
1432
1433	/* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap.  After
1434	 * attaching tracee continues to run unless a trap condition occurs.
1435	 * PTRACE_SEIZE doesn't affect signal or group stop state.
1436	 */
1437	if (ptrace(PTRACE_SEIZE, pid, 0, 0) == 0) {
1438		post_attach_sigstop = 0; /* this sets use_seize to 1 */
1439	} else if (debug_flag) {
1440		fprintf(stderr, "PTRACE_SEIZE doesn't work\n");
1441	}
1442
1443	kill(pid, SIGKILL);
1444
1445	while (1) {
1446		int status, tracee_pid;
1447
1448		errno = 0;
1449		tracee_pid = waitpid(pid, &status, 0);
1450		if (tracee_pid <= 0) {
1451			if (errno == EINTR)
1452				continue;
1453			perror_msg_and_die("%s: unexpected wait result %d",
1454					 __func__, tracee_pid);
1455		}
1456		if (WIFSIGNALED(status)) {
1457			return;
1458		}
1459		error_msg_and_die("%s: unexpected wait status %x",
1460				__func__, status);
1461	}
1462}
1463#else /* !USE_SEIZE */
1464# define test_ptrace_seize() ((void)0)
1465#endif
1466
1467static unsigned
1468get_os_release(void)
1469{
1470	unsigned rel;
1471	const char *p;
1472	struct utsname u;
1473	if (uname(&u) < 0)
1474		perror_msg_and_die("uname");
1475	/* u.release has this form: "3.2.9[-some-garbage]" */
1476	rel = 0;
1477	p = u.release;
1478	for (;;) {
1479		if (!(*p >= '0' && *p <= '9'))
1480			error_msg_and_die("Bad OS release string: '%s'", u.release);
1481		/* Note: this open-codes KERNEL_VERSION(): */
1482		rel = (rel << 8) | atoi(p);
1483		if (rel >= KERNEL_VERSION(1,0,0))
1484			break;
1485		while (*p >= '0' && *p <= '9')
1486			p++;
1487		if (*p != '.') {
1488			if (rel >= KERNEL_VERSION(0,1,0)) {
1489				/* "X.Y-something" means "X.Y.0" */
1490				rel <<= 8;
1491				break;
1492			}
1493			error_msg_and_die("Bad OS release string: '%s'", u.release);
1494		}
1495		p++;
1496	}
1497	return rel;
1498}
1499
1500/*
1501 * Initialization part of main() was eating much stack (~0.5k),
1502 * which was unused after init.
1503 * We can reuse it if we move init code into a separate function.
1504 *
1505 * Don't want main() to inline us and defeat the reason
1506 * we have a separate function.
1507 */
1508static void __attribute__ ((noinline))
1509init(int argc, char *argv[])
1510{
1511	struct tcb *tcp;
1512	int c, i;
1513	int optF = 0;
1514	struct sigaction sa;
1515
1516	progname = argv[0] ? argv[0] : "strace";
1517
1518	/* Make sure SIGCHLD has the default action so that waitpid
1519	   definitely works without losing track of children.  The user
1520	   should not have given us a bogus state to inherit, but he might
1521	   have.  Arguably we should detect SIG_IGN here and pass it on
1522	   to children, but probably noone really needs that.  */
1523	signal(SIGCHLD, SIG_DFL);
1524
1525	strace_tracer_pid = getpid();
1526
1527	os_release = get_os_release();
1528
1529	/* Allocate the initial tcbtab.  */
1530	tcbtabsize = argc;	/* Surely enough for all -p args.  */
1531	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
1532	if (!tcbtab)
1533		die_out_of_memory();
1534	tcp = calloc(tcbtabsize, sizeof(*tcp));
1535	if (!tcp)
1536		die_out_of_memory();
1537	for (c = 0; c < tcbtabsize; c++)
1538		tcbtab[c] = tcp++;
1539
1540	shared_log = stderr;
1541	set_sortby(DEFAULT_SORTBY);
1542	set_personality(DEFAULT_PERSONALITY);
1543	qualify("trace=all");
1544	qualify("abbrev=all");
1545	qualify("verbose=all");
1546#if DEFAULT_QUAL_FLAGS != (QUAL_TRACE | QUAL_ABBREV | QUAL_VERBOSE)
1547# error Bug in DEFAULT_QUAL_FLAGS
1548#endif
1549	qualify("signal=all");
1550	while ((c = getopt(argc, argv,
1551		"+bcCdfFhiqrtTvVxyz"
1552		"D"
1553		"a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
1554		switch (c) {
1555		case 'b':
1556			detach_on_execve = 1;
1557			break;
1558		case 'c':
1559			if (cflag == CFLAG_BOTH) {
1560				error_msg_and_die("-c and -C are mutually exclusive");
1561			}
1562			cflag = CFLAG_ONLY_STATS;
1563			break;
1564		case 'C':
1565			if (cflag == CFLAG_ONLY_STATS) {
1566				error_msg_and_die("-c and -C are mutually exclusive");
1567			}
1568			cflag = CFLAG_BOTH;
1569			break;
1570		case 'd':
1571			debug_flag = 1;
1572			break;
1573		case 'D':
1574			daemonized_tracer = 1;
1575			break;
1576		case 'F':
1577			optF = 1;
1578			break;
1579		case 'f':
1580			followfork++;
1581			break;
1582		case 'h':
1583			usage(stdout, 0);
1584			break;
1585		case 'i':
1586			iflag = 1;
1587			break;
1588		case 'q':
1589			qflag = 1;
1590			break;
1591		case 'r':
1592			rflag = 1;
1593			/* fall through to tflag++ */
1594		case 't':
1595			tflag++;
1596			break;
1597		case 'T':
1598			Tflag = 1;
1599			break;
1600		case 'x':
1601			xflag++;
1602			break;
1603		case 'y':
1604			show_fd_path = 1;
1605			break;
1606		case 'v':
1607			qualify("abbrev=none");
1608			break;
1609		case 'V':
1610			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1611			exit(0);
1612			break;
1613		case 'z':
1614			not_failing_only = 1;
1615			break;
1616		case 'a':
1617			acolumn = string_to_uint(optarg);
1618			if (acolumn < 0)
1619				error_opt_arg(c, optarg);
1620			break;
1621		case 'e':
1622			qualify(optarg);
1623			break;
1624		case 'o':
1625			outfname = strdup(optarg);
1626			break;
1627		case 'O':
1628			i = string_to_uint(optarg);
1629			if (i < 0)
1630				error_opt_arg(c, optarg);
1631			set_overhead(i);
1632			break;
1633		case 'p':
1634			process_opt_p_list(optarg);
1635			break;
1636		case 'P':
1637			tracing_paths = 1;
1638			if (pathtrace_select(optarg)) {
1639				error_msg_and_die("Failed to select path '%s'", optarg);
1640			}
1641			break;
1642		case 's':
1643			i = string_to_uint(optarg);
1644			if (i < 0)
1645				error_opt_arg(c, optarg);
1646			max_strlen = i;
1647			break;
1648		case 'S':
1649			set_sortby(optarg);
1650			break;
1651		case 'u':
1652			username = strdup(optarg);
1653			break;
1654		case 'E':
1655			if (putenv(optarg) < 0)
1656				die_out_of_memory();
1657			break;
1658		case 'I':
1659			opt_intr = string_to_uint(optarg);
1660			if (opt_intr <= 0 || opt_intr >= NUM_INTR_OPTS)
1661				error_opt_arg(c, optarg);
1662			break;
1663		default:
1664			usage(stderr, 1);
1665			break;
1666		}
1667	}
1668	argv += optind;
1669	/* argc -= optind; - no need, argc is not used below */
1670
1671	acolumn_spaces = malloc(acolumn + 1);
1672	if (!acolumn_spaces)
1673		die_out_of_memory();
1674	memset(acolumn_spaces, ' ', acolumn);
1675	acolumn_spaces[acolumn] = '\0';
1676
1677	/* Must have PROG [ARGS], or -p PID. Not both. */
1678	if (!argv[0] == !nprocs)
1679		usage(stderr, 1);
1680
1681	if (nprocs != 0 && daemonized_tracer) {
1682		error_msg_and_die("-D and -p are mutually exclusive");
1683	}
1684
1685	if (!followfork)
1686		followfork = optF;
1687
1688	if (followfork >= 2 && cflag) {
1689		error_msg_and_die("(-c or -C) and -ff are mutually exclusive");
1690	}
1691
1692	/* See if they want to run as another user. */
1693	if (username != NULL) {
1694		struct passwd *pent;
1695
1696		if (getuid() != 0 || geteuid() != 0) {
1697			error_msg_and_die("You must be root to use the -u option");
1698		}
1699		pent = getpwnam(username);
1700		if (pent == NULL) {
1701			error_msg_and_die("Cannot find user '%s'", username);
1702		}
1703		run_uid = pent->pw_uid;
1704		run_gid = pent->pw_gid;
1705	}
1706	else {
1707		run_uid = getuid();
1708		run_gid = getgid();
1709	}
1710
1711	/*
1712	 * On any reasonably recent Linux kernel (circa about 2.5.46)
1713	 * need_fork_exec_workarounds should stay 0 after these tests:
1714	 */
1715	/*need_fork_exec_workarounds = 0; - already is */
1716	if (followfork)
1717		need_fork_exec_workarounds = test_ptrace_setoptions_followfork();
1718	need_fork_exec_workarounds |= test_ptrace_setoptions_for_all();
1719	test_ptrace_seize();
1720
1721	/* Check if they want to redirect the output. */
1722	if (outfname) {
1723		/* See if they want to pipe the output. */
1724		if (outfname[0] == '|' || outfname[0] == '!') {
1725			/*
1726			 * We can't do the <outfname>.PID funny business
1727			 * when using popen, so prohibit it.
1728			 */
1729			if (followfork >= 2)
1730				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1731			shared_log = strace_popen(outfname + 1);
1732		}
1733		else if (followfork < 2)
1734			shared_log = strace_fopen(outfname);
1735	} else {
1736		/* -ff without -o FILE is the same as single -f */
1737		if (followfork >= 2)
1738			followfork = 1;
1739	}
1740
1741	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1742		char *buf = malloc(BUFSIZ);
1743		if (!buf)
1744			die_out_of_memory();
1745		setvbuf(shared_log, buf, _IOLBF, BUFSIZ);
1746	}
1747	if (outfname && argv[0]) {
1748		if (!opt_intr)
1749			opt_intr = INTR_NEVER;
1750		qflag = 1;
1751	}
1752	if (!opt_intr)
1753		opt_intr = INTR_WHILE_WAIT;
1754
1755	/* argv[0]	-pPID	-oFILE	Default interactive setting
1756	 * yes		0	0	INTR_WHILE_WAIT
1757	 * no		1	0	INTR_WHILE_WAIT
1758	 * yes		0	1	INTR_NEVER
1759	 * no		1	1	INTR_WHILE_WAIT
1760	 */
1761
1762	sigemptyset(&empty_set);
1763	sigemptyset(&blocked_set);
1764
1765	/* startup_child() must be called before the signal handlers get
1766	 * installed below as they are inherited into the spawned process.
1767	 * Also we do not need to be protected by them as during interruption
1768	 * in the startup_child() mode we kill the spawned process anyway.
1769	 */
1770	if (argv[0]) {
1771		skip_startup_execve = 1;
1772		startup_child(argv);
1773	}
1774
1775	sa.sa_handler = SIG_IGN;
1776	sigemptyset(&sa.sa_mask);
1777	sa.sa_flags = 0;
1778	sigaction(SIGTTOU, &sa, NULL); /* SIG_IGN */
1779	sigaction(SIGTTIN, &sa, NULL); /* SIG_IGN */
1780	if (opt_intr != INTR_ANYWHERE) {
1781		if (opt_intr == INTR_BLOCK_TSTP_TOO)
1782			sigaction(SIGTSTP, &sa, NULL); /* SIG_IGN */
1783		/*
1784		 * In interactive mode (if no -o OUTFILE, or -p PID is used),
1785		 * fatal signals are blocked while syscall stop is processed,
1786		 * and acted on in between, when waiting for new syscall stops.
1787		 * In non-interactive mode, signals are ignored.
1788		 */
1789		if (opt_intr == INTR_WHILE_WAIT) {
1790			sigaddset(&blocked_set, SIGHUP);
1791			sigaddset(&blocked_set, SIGINT);
1792			sigaddset(&blocked_set, SIGQUIT);
1793			sigaddset(&blocked_set, SIGPIPE);
1794			sigaddset(&blocked_set, SIGTERM);
1795			sa.sa_handler = interrupt;
1796		}
1797		/* SIG_IGN, or set handler for these */
1798		sigaction(SIGHUP, &sa, NULL);
1799		sigaction(SIGINT, &sa, NULL);
1800		sigaction(SIGQUIT, &sa, NULL);
1801		sigaction(SIGPIPE, &sa, NULL);
1802		sigaction(SIGTERM, &sa, NULL);
1803	}
1804	if (nprocs != 0 || daemonized_tracer)
1805		startup_attach();
1806
1807	/* Do we want pids printed in our -o OUTFILE?
1808	 * -ff: no (every pid has its own file); or
1809	 * -f: yes (there can be more pids in the future); or
1810	 * -p PID1,PID2: yes (there are already more than one pid)
1811	 */
1812	print_pid_pfx = (outfname && followfork < 2 && (followfork == 1 || nprocs > 1));
1813}
1814
1815static struct tcb *
1816pid2tcb(int pid)
1817{
1818	int i;
1819
1820	if (pid <= 0)
1821		return NULL;
1822
1823	for (i = 0; i < tcbtabsize; i++) {
1824		struct tcb *tcp = tcbtab[i];
1825		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1826			return tcp;
1827	}
1828
1829	return NULL;
1830}
1831
1832static void
1833cleanup(void)
1834{
1835	int i;
1836	struct tcb *tcp;
1837	int fatal_sig;
1838
1839	/* 'interrupted' is a volatile object, fetch it only once */
1840	fatal_sig = interrupted;
1841	if (!fatal_sig)
1842		fatal_sig = SIGTERM;
1843
1844	for (i = 0; i < tcbtabsize; i++) {
1845		tcp = tcbtab[i];
1846		if (!(tcp->flags & TCB_INUSE))
1847			continue;
1848		if (debug_flag)
1849			fprintf(stderr,
1850				"cleanup: looking at pid %u\n", tcp->pid);
1851		if (tcp->flags & TCB_STRACE_CHILD) {
1852			kill(tcp->pid, SIGCONT);
1853			kill(tcp->pid, fatal_sig);
1854		}
1855		detach(tcp);
1856	}
1857	if (cflag)
1858		call_summary(shared_log);
1859}
1860
1861static void
1862interrupt(int sig)
1863{
1864	interrupted = sig;
1865}
1866
1867static int
1868trace(void)
1869{
1870	struct rusage ru;
1871	struct rusage *rup = cflag ? &ru : NULL;
1872#ifdef __WALL
1873	static int wait4_options = __WALL;
1874#endif
1875
1876	while (nprocs != 0) {
1877		int pid;
1878		int wait_errno;
1879		int status, sig;
1880		int stopped;
1881		struct tcb *tcp;
1882		unsigned event;
1883
1884		if (interrupted)
1885			return 0;
1886		if (interactive)
1887			sigprocmask(SIG_SETMASK, &empty_set, NULL);
1888#ifdef __WALL
1889		pid = wait4(-1, &status, wait4_options, rup);
1890		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
1891			/* this kernel does not support __WALL */
1892			wait4_options &= ~__WALL;
1893			pid = wait4(-1, &status, wait4_options, rup);
1894		}
1895		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
1896			/* most likely a "cloned" process */
1897			pid = wait4(-1, &status, __WCLONE, rup);
1898			if (pid < 0) {
1899				perror_msg("wait4(__WCLONE) failed");
1900			}
1901		}
1902#else
1903		pid = wait4(-1, &status, 0, rup);
1904#endif /* __WALL */
1905		wait_errno = errno;
1906		if (interactive)
1907			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1908
1909		if (pid < 0) {
1910			switch (wait_errno) {
1911			case EINTR:
1912				continue;
1913			case ECHILD:
1914				/*
1915				 * We would like to verify this case
1916				 * but sometimes a race in Solbourne's
1917				 * version of SunOS sometimes reports
1918				 * ECHILD before sending us SIGCHILD.
1919				 */
1920				return 0;
1921			default:
1922				errno = wait_errno;
1923				perror_msg("wait");
1924				return -1;
1925			}
1926		}
1927		if (pid == popen_pid) {
1928			if (WIFEXITED(status) || WIFSIGNALED(status))
1929				popen_pid = 0;
1930			continue;
1931		}
1932
1933		event = ((unsigned)status >> 16);
1934		if (debug_flag) {
1935			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
1936			char evbuf[sizeof(",PTRACE_EVENT_?? (%u)") + sizeof(int)*3 /*paranoia:*/ + 16];
1937			strcpy(buf, "???");
1938			if (WIFSIGNALED(status))
1939#ifdef WCOREDUMP
1940				sprintf(buf, "WIFSIGNALED,%ssig=%s",
1941						WCOREDUMP(status) ? "core," : "",
1942						signame(WTERMSIG(status)));
1943#else
1944				sprintf(buf, "WIFSIGNALED,sig=%s",
1945						signame(WTERMSIG(status)));
1946#endif
1947			if (WIFEXITED(status))
1948				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
1949			if (WIFSTOPPED(status))
1950				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
1951#ifdef WIFCONTINUED
1952			if (WIFCONTINUED(status))
1953				strcpy(buf, "WIFCONTINUED");
1954#endif
1955			evbuf[0] = '\0';
1956			if (event != 0) {
1957				static const char *const event_names[] = {
1958					[PTRACE_EVENT_CLONE] = "CLONE",
1959					[PTRACE_EVENT_FORK]  = "FORK",
1960					[PTRACE_EVENT_VFORK] = "VFORK",
1961					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
1962					[PTRACE_EVENT_EXEC]  = "EXEC",
1963					[PTRACE_EVENT_EXIT]  = "EXIT",
1964				};
1965				const char *e;
1966				if (event < ARRAY_SIZE(event_names))
1967					e = event_names[event];
1968				else {
1969					sprintf(buf, "?? (%u)", event);
1970					e = buf;
1971				}
1972				sprintf(evbuf, ",PTRACE_EVENT_%s", e);
1973			}
1974			fprintf(stderr, " [wait(0x%04x) = %u] %s%s\n", status, pid, buf, evbuf);
1975		}
1976
1977		/* Look up 'pid' in our table. */
1978		tcp = pid2tcb(pid);
1979
1980		if (!tcp) {
1981			if (followfork) {
1982				tcp = alloctcb(pid);
1983				tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
1984				newoutf(tcp);
1985				if (!qflag)
1986					fprintf(stderr, "Process %d attached\n",
1987						pid);
1988			} else {
1989				/* This can happen if a clone call used
1990				   CLONE_PTRACE itself.  */
1991				if (WIFSTOPPED(status))
1992					ptrace(PTRACE_CONT, pid, (char *) 0, 0);
1993				error_msg_and_die("Unknown pid: %u", pid);
1994			}
1995		}
1996
1997		clear_regs();
1998		if (WIFSTOPPED(status))
1999			get_regs(pid);
2000
2001		/* Under Linux, execve changes pid to thread leader's pid,
2002		 * and we see this changed pid on EVENT_EXEC and later,
2003		 * execve sysexit. Leader "disappears" without exit
2004		 * notification. Let user know that, drop leader's tcb,
2005		 * and fix up pid in execve thread's tcb.
2006		 * Effectively, execve thread's tcb replaces leader's tcb.
2007		 *
2008		 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
2009		 * on exit syscall) in multithreaded programs exactly
2010		 * in order to handle this case.
2011		 *
2012		 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
2013		 * On 2.6 and earlier, it can return garbage.
2014		 */
2015		if (event == PTRACE_EVENT_EXEC && os_release >= KERNEL_VERSION(3,0,0)) {
2016			FILE *fp;
2017			struct tcb *execve_thread;
2018			long old_pid = 0;
2019
2020			if (ptrace(PTRACE_GETEVENTMSG, pid, NULL, (long) &old_pid) < 0)
2021				goto dont_switch_tcbs;
2022			if (old_pid <= 0 || old_pid == pid)
2023				goto dont_switch_tcbs;
2024			execve_thread = pid2tcb(old_pid);
2025			/* It should be !NULL, but I feel paranoid */
2026			if (!execve_thread)
2027				goto dont_switch_tcbs;
2028
2029			if (execve_thread->curcol != 0) {
2030				/*
2031				 * One case we are here is -ff:
2032				 * try "strace -oLOG -ff test/threaded_execve"
2033				 */
2034				fprintf(execve_thread->outf, " <pid changed to %d ...>\n", pid);
2035				/*execve_thread->curcol = 0; - no need, see code below */
2036			}
2037			/* Swap output FILEs (needed for -ff) */
2038			fp = execve_thread->outf;
2039			execve_thread->outf = tcp->outf;
2040			tcp->outf = fp;
2041			/* And their column positions */
2042			execve_thread->curcol = tcp->curcol;
2043			tcp->curcol = 0;
2044			/* Drop leader, but close execve'd thread outfile (if -ff) */
2045			droptcb(tcp);
2046			/* Switch to the thread, reusing leader's outfile and pid */
2047			tcp = execve_thread;
2048			tcp->pid = pid;
2049			if (cflag != CFLAG_ONLY_STATS) {
2050				printleader(tcp);
2051				tprintf("+++ superseded by execve in pid %lu +++\n", old_pid);
2052				line_ended();
2053				tcp->flags |= TCB_REPRINT;
2054			}
2055		}
2056 dont_switch_tcbs:
2057
2058		if (event == PTRACE_EVENT_EXEC && detach_on_execve) {
2059			if (!skip_startup_execve)
2060				detach(tcp);
2061			/* This was initial execve for "strace PROG". Skip. */
2062			skip_startup_execve = 0;
2063		}
2064
2065		/* Set current output file */
2066		current_tcp = tcp;
2067
2068		if (cflag) {
2069			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2070			tcp->stime = ru.ru_stime;
2071		}
2072
2073		if (WIFSIGNALED(status)) {
2074			if (pid == strace_child)
2075				exit_code = 0x100 | WTERMSIG(status);
2076			if (cflag != CFLAG_ONLY_STATS
2077			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2078				printleader(tcp);
2079#ifdef WCOREDUMP
2080				tprintf("+++ killed by %s %s+++\n",
2081					signame(WTERMSIG(status)),
2082					WCOREDUMP(status) ? "(core dumped) " : "");
2083#else
2084				tprintf("+++ killed by %s +++\n",
2085					signame(WTERMSIG(status)));
2086#endif
2087				line_ended();
2088			}
2089			droptcb(tcp);
2090			continue;
2091		}
2092		if (WIFEXITED(status)) {
2093			if (pid == strace_child)
2094				exit_code = WEXITSTATUS(status);
2095			if (cflag != CFLAG_ONLY_STATS) {
2096				printleader(tcp);
2097				tprintf("+++ exited with %d +++\n", WEXITSTATUS(status));
2098				line_ended();
2099			}
2100			droptcb(tcp);
2101			continue;
2102		}
2103		if (!WIFSTOPPED(status)) {
2104			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2105			droptcb(tcp);
2106			continue;
2107		}
2108
2109		/* Is this the very first time we see this tracee stopped? */
2110		if (tcp->flags & TCB_STARTUP) {
2111			if (debug_flag)
2112				fprintf(stderr, "pid %d has TCB_STARTUP, initializing it\n", tcp->pid);
2113			tcp->flags &= ~TCB_STARTUP;
2114			if (tcp->flags & TCB_BPTSET) {
2115				/*
2116				 * One example is a breakpoint inherited from
2117				 * parent through fork().
2118				 */
2119				if (clearbpt(tcp) < 0) {
2120					/* Pretty fatal */
2121					droptcb(tcp);
2122					cleanup();
2123					return -1;
2124				}
2125			}
2126			if (ptrace_setoptions) {
2127				if (debug_flag)
2128					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2129				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2130					if (errno != ESRCH) {
2131						/* Should never happen, really */
2132						perror_msg_and_die("PTRACE_SETOPTIONS");
2133					}
2134				}
2135			}
2136		}
2137
2138		sig = WSTOPSIG(status);
2139
2140		if (event != 0) {
2141			/* Ptrace event */
2142#if USE_SEIZE
2143			if (event == PTRACE_EVENT_STOP) {
2144				/*
2145				 * PTRACE_INTERRUPT-stop or group-stop.
2146				 * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
2147				 */
2148				if (sig == SIGSTOP
2149				 || sig == SIGTSTP
2150				 || sig == SIGTTIN
2151				 || sig == SIGTTOU
2152				) {
2153					stopped = 1;
2154					goto show_stopsig;
2155				}
2156			}
2157#endif
2158			goto restart_tracee_with_sig_0;
2159		}
2160
2161		/* Is this post-attach SIGSTOP?
2162		 * Interestingly, the process may stop
2163		 * with STOPSIG equal to some other signal
2164		 * than SIGSTOP if we happend to attach
2165		 * just before the process takes a signal.
2166		 */
2167		if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
2168			if (debug_flag)
2169				fprintf(stderr, "ignored SIGSTOP on pid %d\n", tcp->pid);
2170			tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
2171			goto restart_tracee_with_sig_0;
2172		}
2173
2174		if (sig != syscall_trap_sig) {
2175			siginfo_t si;
2176
2177			/* Nonzero (true) if tracee is stopped by signal
2178			 * (as opposed to "tracee received signal").
2179			 * TODO: shouldn't we check for errno == EINVAL too?
2180			 * We can get ESRCH instead, you know...
2181			 */
2182			stopped = (ptrace(PTRACE_GETSIGINFO, pid, 0, (long) &si) < 0);
2183#if USE_SEIZE
2184 show_stopsig:
2185#endif
2186			if (cflag != CFLAG_ONLY_STATS
2187			    && (qual_flags[sig] & QUAL_SIGNAL)) {
2188#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2189				long pc = 0;
2190				long psr = 0;
2191
2192				upeek(tcp, PT_CR_IPSR, &psr);
2193				upeek(tcp, PT_CR_IIP, &pc);
2194
2195# define PSR_RI	41
2196				pc += (psr >> PSR_RI) & 0x3;
2197# define PC_FORMAT_STR	" @ %lx"
2198# define PC_FORMAT_ARG	, pc
2199#else
2200# define PC_FORMAT_STR	""
2201# define PC_FORMAT_ARG	/* nothing */
2202#endif
2203				printleader(tcp);
2204				if (!stopped) {
2205					tprintf("--- %s ", signame(sig));
2206					printsiginfo(&si, verbose(tcp));
2207					tprintf(PC_FORMAT_STR " ---\n"
2208						PC_FORMAT_ARG);
2209				} else
2210					tprintf("--- stopped by %s" PC_FORMAT_STR " ---\n",
2211						signame(sig)
2212						PC_FORMAT_ARG);
2213				line_ended();
2214			}
2215
2216			if (!stopped)
2217				/* It's signal-delivery-stop. Inject the signal */
2218				goto restart_tracee;
2219
2220			/* It's group-stop */
2221#if USE_SEIZE
2222			if (use_seize) {
2223				/*
2224				 * This ends ptrace-stop, but does *not* end group-stop.
2225				 * This makes stopping signals work properly on straced process
2226				 * (that is, process really stops. It used to continue to run).
2227				 */
2228				if (ptrace_restart(PTRACE_LISTEN, tcp, 0) < 0) {
2229					cleanup();
2230					return -1;
2231				}
2232				continue;
2233			}
2234			/* We don't have PTRACE_LISTEN support... */
2235#endif
2236			goto restart_tracee;
2237		}
2238
2239		/* We handled quick cases, we are permitted to interrupt now. */
2240		if (interrupted)
2241			return 0;
2242
2243		/* This should be syscall entry or exit.
2244		 * (Or it still can be that pesky post-execve SIGTRAP!)
2245		 * Handle it.
2246		 */
2247		if (trace_syscall(tcp) < 0) {
2248			/* ptrace() failed in trace_syscall().
2249			 * Likely a result of process disappearing mid-flight.
2250			 * Observed case: exit_group() or SIGKILL terminating
2251			 * all processes in thread group.
2252			 * We assume that ptrace error was caused by process death.
2253			 * We used to detach(tcp) here, but since we no longer
2254			 * implement "detach before death" policy/hack,
2255			 * we can let this process to report its death to us
2256			 * normally, via WIFEXITED or WIFSIGNALED wait status.
2257			 */
2258			continue;
2259		}
2260 restart_tracee_with_sig_0:
2261		sig = 0;
2262 restart_tracee:
2263		if (ptrace_restart(PTRACE_SYSCALL, tcp, sig) < 0) {
2264			cleanup();
2265			return -1;
2266		}
2267	}
2268	return 0;
2269}
2270
2271int
2272main(int argc, char *argv[])
2273{
2274	init(argc, argv);
2275
2276	/* Run main tracing loop */
2277	if (trace() < 0)
2278		return 1;
2279
2280	cleanup();
2281	fflush(NULL);
2282	if (shared_log != stderr)
2283		fclose(shared_log);
2284	if (popen_pid) {
2285		while (waitpid(popen_pid, NULL, 0) < 0 && errno == EINTR)
2286			;
2287	}
2288	if (exit_code > 0xff) {
2289		/* Avoid potential core file clobbering.  */
2290		struct rlimit rlim = {0, 0};
2291		setrlimit(RLIMIT_CORE, &rlim);
2292
2293		/* Child was killed by a signal, mimic that.  */
2294		exit_code &= 0xff;
2295		signal(exit_code, SIG_DFL);
2296		raise(exit_code);
2297		/* Paranoia - what if this signal is not fatal?
2298		   Exit with 128 + signo then.  */
2299		exit_code += 128;
2300	}
2301
2302	return exit_code;
2303}
2304