strace.c revision 800ec8ffde1296b3f2cfdd838fb08f2ff2bbe946
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "defs.h"
32#include <stdarg.h>
33#include <sys/param.h>
34#include <fcntl.h>
35#include <sys/resource.h>
36#include <sys/wait.h>
37#include <sys/stat.h>
38#include <pwd.h>
39#include <grp.h>
40#include <dirent.h>
41#include <sys/utsname.h>
42#if defined(IA64)
43# include <asm/ptrace_offsets.h>
44#endif
45/* In some libc, these aren't declared. Do it ourself: */
46extern char **environ;
47extern int optind;
48extern char *optarg;
49
50
51#if defined __NR_tkill
52# define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
53#else
54   /* kill() may choose arbitrarily the target task of the process group
55      while we later wait on a that specific TID.  PID process waits become
56      TID task specific waits for a process under ptrace(2).  */
57# warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
58# define my_tkill(tid, sig) kill((tid), (sig))
59#endif
60
61#undef KERNEL_VERSION
62#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
63
64cflag_t cflag = CFLAG_NONE;
65unsigned int followfork = 0;
66unsigned int ptrace_setoptions = 0;
67unsigned int xflag = 0;
68bool debug_flag = 0;
69bool Tflag = 0;
70bool qflag = 0;
71/* Which WSTOPSIG(status) value marks syscall traps? */
72static unsigned int syscall_trap_sig = SIGTRAP;
73static unsigned int tflag = 0;
74static bool iflag = 0;
75static bool rflag = 0;
76static bool print_pid_pfx = 0;
77
78/* -I n */
79enum {
80    INTR_NOT_SET        = 0,
81    INTR_ANYWHERE       = 1, /* don't block/ignore any signals */
82    INTR_WHILE_WAIT     = 2, /* block fatal signals while decoding syscall. default */
83    INTR_NEVER          = 3, /* block fatal signals. default if '-o FILE PROG' */
84    INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z) */
85    NUM_INTR_OPTS
86};
87static int opt_intr;
88/* We play with signal mask only if this mode is active: */
89#define interactive (opt_intr == INTR_WHILE_WAIT)
90
91/*
92 * daemonized_tracer supports -D option.
93 * With this option, strace forks twice.
94 * Unlike normal case, with -D *grandparent* process exec's,
95 * becoming a traced process. Child exits (this prevents traced process
96 * from having children it doesn't expect to have), and grandchild
97 * attaches to grandparent similarly to strace -p PID.
98 * This allows for more transparent interaction in cases
99 * when process and its parent are communicating via signals,
100 * wait() etc. Without -D, strace process gets lodged in between,
101 * disrupting parent<->child link.
102 */
103static bool daemonized_tracer = 0;
104
105#ifdef USE_SEIZE
106static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
107# define use_seize (post_attach_sigstop == 0)
108#else
109# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
110# define use_seize 0
111#endif
112
113/* Sometimes we want to print only succeeding syscalls. */
114bool not_failing_only = 0;
115
116/* Show path associated with fd arguments */
117bool show_fd_path = 0;
118
119/* are we filtering traces based on paths? */
120bool tracing_paths = 0;
121
122static bool detach_on_execve = 0;
123static bool skip_startup_execve = 0;
124
125static int exit_code = 0;
126static int strace_child = 0;
127static int strace_tracer_pid = 0;
128
129static char *username = NULL;
130static uid_t run_uid;
131static gid_t run_gid;
132
133unsigned int max_strlen = DEFAULT_STRLEN;
134static unsigned int acolumn = DEFAULT_ACOLUMN;
135static char *acolumn_spaces;
136static char *outfname = NULL;
137static FILE *outf;
138struct tcb *printing_tcp = NULL;
139static unsigned int curcol;
140static struct tcb **tcbtab;
141static unsigned int nprocs, tcbtabsize;
142static const char *progname;
143
144static unsigned os_release; /* generated from uname()'s u.release */
145
146static int detach(struct tcb *tcp);
147static int trace(void);
148static void cleanup(void);
149static void interrupt(int sig);
150static sigset_t empty_set, blocked_set;
151
152#ifdef HAVE_SIG_ATOMIC_T
153static volatile sig_atomic_t interrupted;
154#else
155static volatile int interrupted;
156#endif
157
158#ifndef HAVE_STRERROR
159
160#if !HAVE_DECL_SYS_ERRLIST
161extern int sys_nerr;
162extern char *sys_errlist[];
163#endif
164
165const char *
166strerror(int err_no)
167{
168	static char buf[sizeof("Unknown error %d") + sizeof(int)*3];
169
170	if (err_no < 1 || err_no >= sys_nerr) {
171		sprintf(buf, "Unknown error %d", err_no);
172		return buf;
173	}
174	return sys_errlist[err_no];
175}
176
177#endif /* HAVE_STERRROR */
178
179static void
180usage(FILE *ofp, int exitval)
181{
182	fprintf(ofp, "\
183usage: strace [-CdDffhiqrtttTvVxxy] [-I n] [-a column] [-e expr]... [-o file]\n\
184              [-p pid]... [-s strsize] [-u username] [-E var=val]...\n\
185              [-P path] [PROG [ARGS]]\n\
186   or: strace -c [-D] [-I n] [-e expr]... [-O overhead] [-S sortby] [-E var=val]...\n\
187              [PROG [ARGS]]\n\
188-c -- count time, calls, and errors for each syscall and report summary\n\
189-C -- like -c but also print regular output while processes are running\n\
190-d -- enable debug output to stderr\n\
191-D -- run tracer process as a detached grandchild, not as parent\n\
192-f -- follow forks, -ff -- with output into separate files\n\
193-F -- attempt to follow vforks (deprecated, use -f)\n\
194-i -- print instruction pointer at time of syscall\n\
195-I interruptible\n\
196   1: no signals are blocked\n\
197   2: fatal signals are blocked while decoding syscall (default)\n\
198   3: fatal signals are always blocked (default if '-o FILE PROG')\n\
199   4: fatal signals and SIGTSTP (^Z) are always blocked\n\
200      (useful to make 'strace -o FILE PROG' not stop on ^Z)\n\
201-q -- suppress messages about attaching, detaching, etc.\n\
202-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
203-T -- print time spent in each syscall\n\
204-v -- verbose mode: print unabbreviated argv, stat, termios, etc. args\n\
205-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
206-y -- print paths associated with file descriptor arguments\n\
207-h -- print help message\n\
208-V -- print version\n\
209-a column -- alignment COLUMN for printing syscall results (default %d)\n\
210-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
211   options: trace, abbrev, verbose, raw, signal, read, or write\n\
212-o file -- send trace output to FILE instead of stderr\n\
213-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
214-p pid -- trace process with process id PID, may be repeated\n\
215-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
216-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
217-u username -- run command as username handling setuid and/or setgid\n\
218-E var=val -- put var=val in the environment for command\n\
219-E var -- remove var from the environment for command\n\
220-P path -- trace accesses to path\n\
221"
222/* this is broken, so don't document it
223-z -- print only succeeding syscalls\n\
224 */
225/* experimental, don't document it yet (option letter may change in the future!)
226-b -- detach on successful execve\n\
227 */
228, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
229	exit(exitval);
230}
231
232static void die(void) __attribute__ ((noreturn));
233static void die(void)
234{
235	if (strace_tracer_pid == getpid()) {
236		cflag = 0;
237		cleanup();
238	}
239	exit(1);
240}
241
242static void verror_msg(int err_no, const char *fmt, va_list p)
243{
244	char *msg;
245
246	fflush(NULL);
247
248	/* We want to print entire message with single fprintf to ensure
249	 * message integrity if stderr is shared with other programs.
250	 * Thus we use vasprintf + single fprintf.
251	 */
252	msg = NULL;
253	if (vasprintf(&msg, fmt, p) >= 0) {
254		if (err_no)
255			fprintf(stderr, "%s: %s: %s\n", progname, msg, strerror(err_no));
256		else
257			fprintf(stderr, "%s: %s\n", progname, msg);
258		free(msg);
259	} else {
260		/* malloc in vasprintf failed, try it without malloc */
261		fprintf(stderr, "%s: ", progname);
262		vfprintf(stderr, fmt, p);
263		if (err_no)
264			fprintf(stderr, ": %s\n", strerror(err_no));
265		else
266			putc('\n', stderr);
267	}
268	/* We don't switch stderr to buffered, thus fprintf(stderr)
269	 * always flushes its output and this is not necessary: */
270	/* fflush(stderr); */
271}
272
273void error_msg(const char *fmt, ...)
274{
275	va_list p;
276	va_start(p, fmt);
277	verror_msg(0, fmt, p);
278	va_end(p);
279}
280
281void error_msg_and_die(const char *fmt, ...)
282{
283	va_list p;
284	va_start(p, fmt);
285	verror_msg(0, fmt, p);
286	die();
287}
288
289void perror_msg(const char *fmt, ...)
290{
291	va_list p;
292	va_start(p, fmt);
293	verror_msg(errno, fmt, p);
294	va_end(p);
295}
296
297void perror_msg_and_die(const char *fmt, ...)
298{
299	va_list p;
300	va_start(p, fmt);
301	verror_msg(errno, fmt, p);
302	die();
303}
304
305void die_out_of_memory(void)
306{
307	static bool recursed = 0;
308	if (recursed)
309		exit(1);
310	recursed = 1;
311	error_msg_and_die("Out of memory");
312}
313
314/* Glue for systems without a MMU that cannot provide fork() */
315#ifdef HAVE_FORK
316# define strace_vforked 0
317#else
318# define strace_vforked 1
319# define fork()         vfork()
320#endif
321
322#ifdef USE_SEIZE
323static int
324ptrace_attach_or_seize(int pid)
325{
326	int r;
327	if (!use_seize)
328		return ptrace(PTRACE_ATTACH, pid, 0, 0);
329	r = ptrace(PTRACE_SEIZE, pid, 0, PTRACE_SEIZE_DEVEL);
330	if (r)
331		return r;
332	r = ptrace(PTRACE_INTERRUPT, pid, 0, 0);
333	return r;
334}
335#else
336# define ptrace_attach_or_seize(pid) ptrace(PTRACE_ATTACH, (pid), 0, 0)
337#endif
338
339static void
340set_cloexec_flag(int fd)
341{
342	int flags, newflags;
343
344	flags = fcntl(fd, F_GETFD);
345	if (flags < 0) {
346		/* Can happen only if fd is bad.
347		 * Should never happen: if it does, we have a bug
348		 * in the caller. Therefore we just abort
349		 * instead of propagating the error.
350		 */
351		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
352	}
353
354	newflags = flags | FD_CLOEXEC;
355	if (flags == newflags)
356		return;
357
358	fcntl(fd, F_SETFD, newflags); /* never fails */
359}
360
361static void kill_save_errno(pid_t pid, int sig)
362{
363	int saved_errno = errno;
364
365	(void) kill(pid, sig);
366	errno = saved_errno;
367}
368
369/*
370 * When strace is setuid executable, we have to swap uids
371 * before and after filesystem and process management operations.
372 */
373static void
374swap_uid(void)
375{
376	int euid = geteuid(), uid = getuid();
377
378	if (euid != uid && setreuid(euid, uid) < 0) {
379		perror_msg_and_die("setreuid");
380	}
381}
382
383#if _LFS64_LARGEFILE
384# define fopen_for_output fopen64
385#else
386# define fopen_for_output fopen
387#endif
388
389static FILE *
390strace_fopen(const char *path)
391{
392	FILE *fp;
393
394	swap_uid();
395	fp = fopen_for_output(path, "w");
396	if (!fp)
397		perror_msg_and_die("Can't fopen '%s'", path);
398	swap_uid();
399	set_cloexec_flag(fileno(fp));
400	return fp;
401}
402
403static int popen_pid = 0;
404
405#ifndef _PATH_BSHELL
406# define _PATH_BSHELL "/bin/sh"
407#endif
408
409/*
410 * We cannot use standard popen(3) here because we have to distinguish
411 * popen child process from other processes we trace, and standard popen(3)
412 * does not export its child's pid.
413 */
414static FILE *
415strace_popen(const char *command)
416{
417	FILE *fp;
418	int fds[2];
419
420	swap_uid();
421	if (pipe(fds) < 0)
422		perror_msg_and_die("pipe");
423
424	set_cloexec_flag(fds[1]); /* never fails */
425
426	popen_pid = vfork();
427	if (popen_pid == -1)
428		perror_msg_and_die("vfork");
429
430	if (popen_pid == 0) {
431		/* child */
432		close(fds[1]);
433		if (fds[0] != 0) {
434			if (dup2(fds[0], 0))
435				perror_msg_and_die("dup2");
436			close(fds[0]);
437		}
438		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
439		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
440	}
441
442	/* parent */
443	close(fds[0]);
444	swap_uid();
445	fp = fdopen(fds[1], "w");
446	if (!fp)
447		die_out_of_memory();
448	return fp;
449}
450
451void
452tprintf(const char *fmt, ...)
453{
454	va_list args;
455
456	va_start(args, fmt);
457	if (outf) {
458		int n = vfprintf(outf, fmt, args);
459		if (n < 0) {
460			if (outf != stderr)
461				perror(outfname == NULL
462				       ? "<writing to pipe>" : outfname);
463		} else
464			curcol += n;
465	}
466	va_end(args);
467}
468
469void
470tprints(const char *str)
471{
472	if (outf) {
473		int n = fputs(str, outf);
474		if (n >= 0) {
475			curcol += strlen(str);
476			return;
477		}
478		if (outf != stderr)
479			perror(outfname == NULL
480			       ? "<writing to pipe>" : outfname);
481	}
482}
483
484void
485line_ended(void)
486{
487	curcol = 0;
488	fflush(outf);
489	if (!printing_tcp)
490		return;
491	printing_tcp->curcol = 0;
492	printing_tcp = NULL;
493}
494
495void
496printleader(struct tcb *tcp)
497{
498	/* If -ff, "previous tcb we printed" is always the same as current,
499	 * because we have per-tcb output files.
500	 */
501	if (followfork >= 2)
502		printing_tcp = tcp;
503
504	if (printing_tcp) {
505		outf = printing_tcp->outf;
506		curcol = printing_tcp->curcol;
507		if (printing_tcp->ptrace_errno) {
508			if (printing_tcp->flags & TCB_INSYSCALL) {
509				tprints(" <unavailable>) ");
510				tabto();
511			}
512			tprints("= ? <unavailable>\n");
513			printing_tcp->ptrace_errno = 0;
514			printing_tcp->curcol = 0;
515		}
516		if (printing_tcp->curcol != 0 && (followfork < 2 || printing_tcp == tcp)) {
517			/*
518			 * case 1: we have a shared log (i.e. not -ff), and last line
519			 * wasn't finished (same or different tcb, doesn't matter).
520			 * case 2: split log, we are the same tcb, but our last line
521			 * didn't finish ("SIGKILL nuked us after syscall entry" etc).
522			 */
523			tprints(" <unfinished ...>\n");
524			printing_tcp->flags |= TCB_REPRINT;
525			printing_tcp->curcol = 0;
526		}
527	}
528
529	printing_tcp = tcp;
530	outf = tcp->outf;
531	curcol = 0;
532
533	if (print_pid_pfx)
534		tprintf("%-5d ", tcp->pid);
535	else if (nprocs > 1 && !outfname)
536		tprintf("[pid %5u] ", tcp->pid);
537
538	if (tflag) {
539		char str[sizeof("HH:MM:SS")];
540		struct timeval tv, dtv;
541		static struct timeval otv;
542
543		gettimeofday(&tv, NULL);
544		if (rflag) {
545			if (otv.tv_sec == 0)
546				otv = tv;
547			tv_sub(&dtv, &tv, &otv);
548			tprintf("%6ld.%06ld ",
549				(long) dtv.tv_sec, (long) dtv.tv_usec);
550			otv = tv;
551		}
552		else if (tflag > 2) {
553			tprintf("%ld.%06ld ",
554				(long) tv.tv_sec, (long) tv.tv_usec);
555		}
556		else {
557			time_t local = tv.tv_sec;
558			strftime(str, sizeof(str), "%T", localtime(&local));
559			if (tflag > 1)
560				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
561			else
562				tprintf("%s ", str);
563		}
564	}
565	if (iflag)
566		printcall(tcp);
567}
568
569void
570tabto(void)
571{
572	if (curcol < acolumn)
573		tprints(acolumn_spaces + curcol);
574}
575
576static void
577newoutf(struct tcb *tcp)
578{
579	if (outfname && followfork >= 2) {
580		char name[520 + sizeof(int) * 3];
581		sprintf(name, "%.512s.%u", outfname, tcp->pid);
582		tcp->outf = strace_fopen(name);
583	}
584}
585
586static void
587expand_tcbtab(void)
588{
589	/* Allocate some more TCBs and expand the table.
590	   We don't want to relocate the TCBs because our
591	   callers have pointers and it would be a pain.
592	   So tcbtab is a table of pointers.  Since we never
593	   free the TCBs, we allocate a single chunk of many.  */
594	int i = tcbtabsize;
595	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
596	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
597	if (!newtab || !newtcbs)
598		die_out_of_memory();
599	tcbtabsize *= 2;
600	tcbtab = newtab;
601	while (i < tcbtabsize)
602		tcbtab[i++] = newtcbs++;
603}
604
605static struct tcb *
606alloc_tcb(int pid, int command_options_parsed)
607{
608	int i;
609	struct tcb *tcp;
610
611	if (nprocs == tcbtabsize)
612		expand_tcbtab();
613
614	for (i = 0; i < tcbtabsize; i++) {
615		tcp = tcbtab[i];
616		if ((tcp->flags & TCB_INUSE) == 0) {
617			memset(tcp, 0, sizeof(*tcp));
618			tcp->pid = pid;
619			tcp->flags = TCB_INUSE;
620			tcp->outf = outf; /* Initialise to current out file */
621#if SUPPORTED_PERSONALITIES > 1
622			tcp->currpers = current_personality;
623#endif
624			nprocs++;
625			if (debug_flag)
626				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
627			if (command_options_parsed)
628				newoutf(tcp);
629			return tcp;
630		}
631	}
632	error_msg_and_die("bug in alloc_tcb");
633}
634#define alloctcb(pid) alloc_tcb((pid), 1)
635
636static void
637droptcb(struct tcb *tcp)
638{
639	if (tcp->pid == 0)
640		return;
641
642	nprocs--;
643	if (debug_flag)
644		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
645
646	if (tcp->outf) {
647		if (outfname && followfork >= 2) {
648			if (tcp->curcol != 0)
649				fprintf(tcp->outf, " <detached ...>\n");
650			fclose(tcp->outf);
651			if (outf == tcp->outf)
652				outf = NULL;
653		} else {
654			if (printing_tcp == tcp && tcp->curcol != 0)
655				fprintf(tcp->outf, " <detached ...>\n");
656			fflush(tcp->outf);
657		}
658	}
659
660	if (printing_tcp == tcp)
661		printing_tcp = NULL;
662
663	memset(tcp, 0, sizeof(*tcp));
664}
665
666/* detach traced process; continue with sig
667 * Never call DETACH twice on the same process as both unattached and
668 * attached-unstopped processes give the same ESRCH.  For unattached process we
669 * would SIGSTOP it and wait for its SIGSTOP notification forever.
670 */
671static int
672detach(struct tcb *tcp)
673{
674	int error;
675	int status, sigstop_expected;
676
677	if (tcp->flags & TCB_BPTSET)
678		clearbpt(tcp);
679
680	/*
681	 * Linux wrongly insists the child be stopped
682	 * before detaching.  Arghh.  We go through hoops
683	 * to make a clean break of things.
684	 */
685#if defined(SPARC)
686#undef PTRACE_DETACH
687#define PTRACE_DETACH PTRACE_SUNDETACH
688#endif
689
690	error = 0;
691	sigstop_expected = 0;
692	if (tcp->flags & TCB_ATTACHED) {
693		/*
694		 * We attached but possibly didn't see the expected SIGSTOP.
695		 * We must catch exactly one as otherwise the detached process
696		 * would be left stopped (process state T).
697		 */
698		sigstop_expected = (tcp->flags & TCB_IGNORE_ONE_SIGSTOP);
699		error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, 0);
700		if (error == 0) {
701			/* On a clear day, you can see forever. */
702		}
703		else if (errno != ESRCH) {
704			/* Shouldn't happen. */
705			perror("detach: ptrace(PTRACE_DETACH, ...)");
706		}
707		else if (my_tkill(tcp->pid, 0) < 0) {
708			if (errno != ESRCH)
709				perror("detach: checking sanity");
710		}
711		else if (!sigstop_expected && my_tkill(tcp->pid, SIGSTOP) < 0) {
712			if (errno != ESRCH)
713				perror("detach: stopping child");
714		}
715		else
716			sigstop_expected = 1;
717	}
718
719	if (sigstop_expected) {
720		for (;;) {
721#ifdef __WALL
722			if (waitpid(tcp->pid, &status, __WALL) < 0) {
723				if (errno == ECHILD) /* Already gone.  */
724					break;
725				if (errno != EINVAL) {
726					perror("detach: waiting");
727					break;
728				}
729#endif /* __WALL */
730				/* No __WALL here.  */
731				if (waitpid(tcp->pid, &status, 0) < 0) {
732					if (errno != ECHILD) {
733						perror("detach: waiting");
734						break;
735					}
736#ifdef __WCLONE
737					/* If no processes, try clones.  */
738					if (waitpid(tcp->pid, &status, __WCLONE) < 0) {
739						if (errno != ECHILD)
740							perror("detach: waiting");
741						break;
742					}
743#endif /* __WCLONE */
744				}
745#ifdef __WALL
746			}
747#endif
748			if (!WIFSTOPPED(status)) {
749				/* Au revoir, mon ami. */
750				break;
751			}
752			if (WSTOPSIG(status) == SIGSTOP) {
753				ptrace_restart(PTRACE_DETACH, tcp, 0);
754				break;
755			}
756			error = ptrace_restart(PTRACE_CONT, tcp,
757					WSTOPSIG(status) == syscall_trap_sig ? 0
758					: WSTOPSIG(status));
759			if (error < 0)
760				break;
761		}
762	}
763
764	if (!qflag && (tcp->flags & TCB_ATTACHED))
765		fprintf(stderr, "Process %u detached\n", tcp->pid);
766
767	droptcb(tcp);
768
769	return error;
770}
771
772static void
773process_opt_p_list(char *opt)
774{
775	while (*opt) {
776		/*
777		 * We accept -p PID,PID; -p "`pidof PROG`"; -p "`pgrep PROG`".
778		 * pidof uses space as delim, pgrep uses newline. :(
779		 */
780		int pid;
781		char *delim = opt + strcspn(opt, ", \n\t");
782		char c = *delim;
783
784		*delim = '\0';
785		pid = atoi(opt); /* TODO: stricter parsing of the number? */
786		if (pid <= 0) {
787			error_msg("Invalid process id: '%s'", opt);
788			*delim = c;
789			return;
790		}
791		if (pid == strace_tracer_pid) {
792			error_msg("I'm sorry, I can't let you do that, Dave.");
793			*delim = c;
794			return;
795		}
796		*delim = c;
797		alloc_tcb(pid, 0);
798		if (c == '\0')
799			break;
800		opt = delim + 1;
801	}
802}
803
804static void
805startup_attach(void)
806{
807	int tcbi;
808	struct tcb *tcp;
809
810	/*
811	 * Block user interruptions as we would leave the traced
812	 * process stopped (process state T) if we would terminate in
813	 * between PTRACE_ATTACH and wait4() on SIGSTOP.
814	 * We rely on cleanup() from this point on.
815	 */
816	if (interactive)
817		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
818
819	if (daemonized_tracer) {
820		pid_t pid = fork();
821		if (pid < 0) {
822			perror_msg_and_die("fork");
823		}
824		if (pid) { /* parent */
825			/*
826			 * Wait for grandchild to attach to straced process
827			 * (grandparent). Grandchild SIGKILLs us after it attached.
828			 * Grandparent's wait() is unblocked by our death,
829			 * it proceeds to exec the straced program.
830			 */
831			pause();
832			_exit(0); /* paranoia */
833		}
834		/* grandchild */
835		/* We will be the tracer process. Remember our new pid: */
836		strace_tracer_pid = getpid();
837	}
838
839	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
840		tcp = tcbtab[tcbi];
841
842		if (!(tcp->flags & TCB_INUSE))
843			continue;
844
845		/* Is this a process we should attach to, but not yet attached? */
846		if (tcp->flags & TCB_ATTACHED)
847			continue; /* no, we already attached it */
848
849		/* Reinitialize the output since it may have changed */
850		tcp->outf = outf;
851		newoutf(tcp);
852
853		if (followfork && !daemonized_tracer) {
854			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
855			DIR *dir;
856
857			sprintf(procdir, "/proc/%d/task", tcp->pid);
858			dir = opendir(procdir);
859			if (dir != NULL) {
860				unsigned int ntid = 0, nerr = 0;
861				struct dirent *de;
862
863				while ((de = readdir(dir)) != NULL) {
864					struct tcb *cur_tcp;
865					int tid;
866
867					if (de->d_fileno == 0)
868						continue;
869					tid = atoi(de->d_name);
870					if (tid <= 0)
871						continue;
872					++ntid;
873					if (ptrace_attach_or_seize(tid) < 0) {
874						++nerr;
875						if (debug_flag)
876							fprintf(stderr, "attach to pid %d failed\n", tid);
877						continue;
878					}
879					if (debug_flag)
880						fprintf(stderr, "attach to pid %d succeeded\n", tid);
881					cur_tcp = tcp;
882					if (tid != tcp->pid)
883						cur_tcp = alloctcb(tid);
884					cur_tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
885				}
886				closedir(dir);
887				if (interactive) {
888					sigprocmask(SIG_SETMASK, &empty_set, NULL);
889					if (interrupted)
890						goto ret;
891					sigprocmask(SIG_BLOCK, &blocked_set, NULL);
892				}
893				ntid -= nerr;
894				if (ntid == 0) {
895					perror("attach: ptrace(PTRACE_ATTACH, ...)");
896					droptcb(tcp);
897					continue;
898				}
899				if (!qflag) {
900					fprintf(stderr, ntid > 1
901? "Process %u attached with %u threads - interrupt to quit\n"
902: "Process %u attached - interrupt to quit\n",
903						tcp->pid, ntid);
904				}
905				if (!(tcp->flags & TCB_ATTACHED)) {
906					/* -p PID, we failed to attach to PID itself
907					 * but did attach to some of its sibling threads.
908					 * Drop PID's tcp.
909					 */
910					droptcb(tcp);
911				}
912				continue;
913			} /* if (opendir worked) */
914		} /* if (-f) */
915		if (ptrace_attach_or_seize(tcp->pid) < 0) {
916			perror("attach: ptrace(PTRACE_ATTACH, ...)");
917			droptcb(tcp);
918			continue;
919		}
920		tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
921		if (debug_flag)
922			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
923
924		if (daemonized_tracer) {
925			/*
926			 * Make parent go away.
927			 * Also makes grandparent's wait() unblock.
928			 */
929			kill(getppid(), SIGKILL);
930		}
931
932		if (!qflag)
933			fprintf(stderr,
934				"Process %u attached - interrupt to quit\n",
935				tcp->pid);
936	} /* for each tcbtab[] */
937
938 ret:
939	if (interactive)
940		sigprocmask(SIG_SETMASK, &empty_set, NULL);
941}
942
943static void
944startup_child(char **argv)
945{
946	struct stat statbuf;
947	const char *filename;
948	char pathname[MAXPATHLEN];
949	int pid = 0;
950	struct tcb *tcp;
951
952	filename = argv[0];
953	if (strchr(filename, '/')) {
954		if (strlen(filename) > sizeof pathname - 1) {
955			errno = ENAMETOOLONG;
956			perror_msg_and_die("exec");
957		}
958		strcpy(pathname, filename);
959	}
960#ifdef USE_DEBUGGING_EXEC
961	/*
962	 * Debuggers customarily check the current directory
963	 * first regardless of the path but doing that gives
964	 * security geeks a panic attack.
965	 */
966	else if (stat(filename, &statbuf) == 0)
967		strcpy(pathname, filename);
968#endif /* USE_DEBUGGING_EXEC */
969	else {
970		const char *path;
971		int m, n, len;
972
973		for (path = getenv("PATH"); path && *path; path += m) {
974			const char *colon = strchr(path, ':');
975			if (colon) {
976				n = colon - path;
977				m = n + 1;
978			}
979			else
980				m = n = strlen(path);
981			if (n == 0) {
982				if (!getcwd(pathname, MAXPATHLEN))
983					continue;
984				len = strlen(pathname);
985			}
986			else if (n > sizeof pathname - 1)
987				continue;
988			else {
989				strncpy(pathname, path, n);
990				len = n;
991			}
992			if (len && pathname[len - 1] != '/')
993				pathname[len++] = '/';
994			strcpy(pathname + len, filename);
995			if (stat(pathname, &statbuf) == 0 &&
996			    /* Accept only regular files
997			       with some execute bits set.
998			       XXX not perfect, might still fail */
999			    S_ISREG(statbuf.st_mode) &&
1000			    (statbuf.st_mode & 0111))
1001				break;
1002		}
1003	}
1004	if (stat(pathname, &statbuf) < 0) {
1005		perror_msg_and_die("Can't stat '%s'", filename);
1006	}
1007	strace_child = pid = fork();
1008	if (pid < 0) {
1009		perror_msg_and_die("fork");
1010	}
1011	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
1012	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
1013	) {
1014		pid = getpid();
1015		if (outf != stderr)
1016			close(fileno(outf));
1017		if (!daemonized_tracer && !use_seize) {
1018			if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0) {
1019				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
1020			}
1021		}
1022
1023		if (username != NULL) {
1024			uid_t run_euid = run_uid;
1025			gid_t run_egid = run_gid;
1026
1027			if (statbuf.st_mode & S_ISUID)
1028				run_euid = statbuf.st_uid;
1029			if (statbuf.st_mode & S_ISGID)
1030				run_egid = statbuf.st_gid;
1031			/*
1032			 * It is important to set groups before we
1033			 * lose privileges on setuid.
1034			 */
1035			if (initgroups(username, run_gid) < 0) {
1036				perror_msg_and_die("initgroups");
1037			}
1038			if (setregid(run_gid, run_egid) < 0) {
1039				perror_msg_and_die("setregid");
1040			}
1041			if (setreuid(run_uid, run_euid) < 0) {
1042				perror_msg_and_die("setreuid");
1043			}
1044		}
1045		else if (geteuid() != 0)
1046			setreuid(run_uid, run_uid);
1047
1048		if (!daemonized_tracer) {
1049			/*
1050			 * Induce a ptrace stop. Tracer (our parent)
1051			 * will resume us with PTRACE_SYSCALL and display
1052			 * the immediately following execve syscall.
1053			 * Can't do this on NOMMU systems, we are after
1054			 * vfork: parent is blocked, stopping would deadlock.
1055			 */
1056			if (!strace_vforked)
1057				kill(pid, SIGSTOP);
1058		} else {
1059			alarm(3);
1060			/* we depend on SIGCHLD set to SIG_DFL by init code */
1061			/* if it happens to be SIG_IGN'ed, wait won't block */
1062			wait(NULL);
1063			alarm(0);
1064		}
1065
1066		execv(pathname, argv);
1067		perror_msg_and_die("exec");
1068	}
1069
1070	/* We are the tracer */
1071
1072	if (!daemonized_tracer) {
1073		if (!use_seize) {
1074			/* child did PTRACE_TRACEME, nothing to do in parent */
1075		} else {
1076			if (!strace_vforked) {
1077				/* Wait until child stopped itself */
1078				int status;
1079				while (waitpid(pid, &status, WSTOPPED) < 0) {
1080					if (errno == EINTR)
1081						continue;
1082					perror_msg_and_die("waitpid");
1083				}
1084				if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
1085					kill_save_errno(pid, SIGKILL);
1086					perror_msg_and_die("Unexpected wait status %x", status);
1087				}
1088			}
1089			/* Else: vforked case, we have no way to sync.
1090			 * Just attach to it as soon as possible.
1091			 * This means that we may miss a few first syscalls...
1092			 */
1093
1094			if (ptrace_attach_or_seize(pid)) {
1095				kill_save_errno(pid, SIGKILL);
1096				perror_msg_and_die("Can't attach to %d", pid);
1097			}
1098			if (!strace_vforked)
1099				kill(pid, SIGCONT);
1100		}
1101		tcp = alloctcb(pid);
1102		if (!strace_vforked)
1103			tcp->flags |= TCB_ATTACHED | TCB_STRACE_CHILD | TCB_STARTUP | post_attach_sigstop;
1104		else
1105			tcp->flags |= TCB_ATTACHED | TCB_STRACE_CHILD | TCB_STARTUP;
1106	}
1107	else {
1108		/* With -D, *we* are child here, IOW: different pid. Fetch it: */
1109		strace_tracer_pid = getpid();
1110		/* The tracee is our parent: */
1111		pid = getppid();
1112		alloctcb(pid);
1113		/* attaching will be done later, by startup_attach */
1114	}
1115}
1116
1117/*
1118 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
1119 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
1120 * and then see which options are supported by the kernel.
1121 */
1122static void
1123test_ptrace_setoptions_followfork(void)
1124{
1125	int pid, expected_grandchild = 0, found_grandchild = 0;
1126	const unsigned int test_options = PTRACE_O_TRACECLONE |
1127					  PTRACE_O_TRACEFORK |
1128					  PTRACE_O_TRACEVFORK;
1129
1130	pid = fork();
1131	if (pid < 0)
1132		perror_msg_and_die("fork");
1133	if (pid == 0) {
1134		pid = getpid();
1135		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
1136			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
1137					   __func__);
1138		kill_save_errno(pid, SIGSTOP);
1139		if (fork() < 0)
1140			perror_msg_and_die("fork");
1141		_exit(0);
1142	}
1143
1144	while (1) {
1145		int status, tracee_pid;
1146
1147		errno = 0;
1148		tracee_pid = wait(&status);
1149		if (tracee_pid <= 0) {
1150			if (errno == EINTR)
1151				continue;
1152			if (errno == ECHILD)
1153				break;
1154			kill_save_errno(pid, SIGKILL);
1155			perror_msg_and_die("%s: unexpected wait result %d",
1156					   __func__, tracee_pid);
1157		}
1158		if (WIFEXITED(status)) {
1159			if (WEXITSTATUS(status)) {
1160				if (tracee_pid != pid)
1161					kill_save_errno(pid, SIGKILL);
1162				error_msg_and_die("%s: unexpected exit status %u",
1163						  __func__, WEXITSTATUS(status));
1164			}
1165			continue;
1166		}
1167		if (WIFSIGNALED(status)) {
1168			if (tracee_pid != pid)
1169				kill_save_errno(pid, SIGKILL);
1170			error_msg_and_die("%s: unexpected signal %u",
1171					  __func__, WTERMSIG(status));
1172		}
1173		if (!WIFSTOPPED(status)) {
1174			if (tracee_pid != pid)
1175				kill_save_errno(tracee_pid, SIGKILL);
1176			kill_save_errno(pid, SIGKILL);
1177			error_msg_and_die("%s: unexpected wait status %x",
1178					  __func__, status);
1179		}
1180		if (tracee_pid != pid) {
1181			found_grandchild = tracee_pid;
1182			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
1183				kill_save_errno(tracee_pid, SIGKILL);
1184				kill_save_errno(pid, SIGKILL);
1185				perror_msg_and_die("PTRACE_CONT doesn't work");
1186			}
1187			continue;
1188		}
1189		switch (WSTOPSIG(status)) {
1190		case SIGSTOP:
1191			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
1192			    && errno != EINVAL && errno != EIO)
1193				perror_msg("PTRACE_SETOPTIONS");
1194			break;
1195		case SIGTRAP:
1196			if (status >> 16 == PTRACE_EVENT_FORK) {
1197				long msg = 0;
1198
1199				if (ptrace(PTRACE_GETEVENTMSG, pid,
1200					   NULL, (long) &msg) == 0)
1201					expected_grandchild = msg;
1202			}
1203			break;
1204		}
1205		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
1206			kill_save_errno(pid, SIGKILL);
1207			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
1208		}
1209	}
1210	if (expected_grandchild && expected_grandchild == found_grandchild) {
1211		ptrace_setoptions |= test_options;
1212		if (debug_flag)
1213			fprintf(stderr, "ptrace_setoptions = %#x\n",
1214				ptrace_setoptions);
1215		return;
1216	}
1217	error_msg("Test for PTRACE_O_TRACECLONE failed, "
1218		  "giving up using this feature.");
1219}
1220
1221/*
1222 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
1223 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
1224 * and then see whether it will stop with (SIGTRAP | 0x80).
1225 *
1226 * Use of this option enables correct handling of user-generated SIGTRAPs,
1227 * and SIGTRAPs generated by special instructions such as int3 on x86:
1228 * _start:	.globl	_start
1229 *		int3
1230 *		movl	$42, %ebx
1231 *		movl	$1, %eax
1232 *		int	$0x80
1233 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
1234 */
1235static void
1236test_ptrace_setoptions_for_all(void)
1237{
1238	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
1239					  PTRACE_O_TRACEEXEC;
1240	int pid;
1241	int it_worked = 0;
1242
1243	pid = fork();
1244	if (pid < 0)
1245		perror_msg_and_die("fork");
1246
1247	if (pid == 0) {
1248		pid = getpid();
1249		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
1250			/* Note: exits with exitcode 1 */
1251			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
1252					   __func__);
1253		kill(pid, SIGSTOP);
1254		_exit(0); /* parent should see entry into this syscall */
1255	}
1256
1257	while (1) {
1258		int status, tracee_pid;
1259
1260		errno = 0;
1261		tracee_pid = wait(&status);
1262		if (tracee_pid <= 0) {
1263			if (errno == EINTR)
1264				continue;
1265			kill_save_errno(pid, SIGKILL);
1266			perror_msg_and_die("%s: unexpected wait result %d",
1267					   __func__, tracee_pid);
1268		}
1269		if (WIFEXITED(status)) {
1270			if (WEXITSTATUS(status) == 0)
1271				break;
1272			error_msg_and_die("%s: unexpected exit status %u",
1273					  __func__, WEXITSTATUS(status));
1274		}
1275		if (WIFSIGNALED(status)) {
1276			error_msg_and_die("%s: unexpected signal %u",
1277					  __func__, WTERMSIG(status));
1278		}
1279		if (!WIFSTOPPED(status)) {
1280			kill(pid, SIGKILL);
1281			error_msg_and_die("%s: unexpected wait status %x",
1282					  __func__, status);
1283		}
1284		if (WSTOPSIG(status) == SIGSTOP) {
1285			/*
1286			 * We don't check "options aren't accepted" error.
1287			 * If it happens, we'll never get (SIGTRAP | 0x80),
1288			 * and thus will decide to not use the option.
1289			 * IOW: the outcome of the test will be correct.
1290			 */
1291			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
1292			    && errno != EINVAL && errno != EIO)
1293				perror_msg("PTRACE_SETOPTIONS");
1294		}
1295		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
1296			it_worked = 1;
1297		}
1298		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
1299			kill_save_errno(pid, SIGKILL);
1300			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
1301		}
1302	}
1303
1304	if (it_worked) {
1305		syscall_trap_sig = (SIGTRAP | 0x80);
1306		ptrace_setoptions |= test_options;
1307		if (debug_flag)
1308			fprintf(stderr, "ptrace_setoptions = %#x\n",
1309				ptrace_setoptions);
1310		return;
1311	}
1312
1313	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
1314		  "giving up using this feature.");
1315}
1316
1317# ifdef USE_SEIZE
1318static void
1319test_ptrace_seize(void)
1320{
1321	int pid;
1322
1323	pid = fork();
1324	if (pid < 0)
1325		perror_msg_and_die("fork");
1326
1327	if (pid == 0) {
1328		pause();
1329		_exit(0);
1330	}
1331
1332	/* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap.  After
1333	 * attaching tracee continues to run unless a trap condition occurs.
1334	 * PTRACE_SEIZE doesn't affect signal or group stop state.
1335	 */
1336	if (ptrace(PTRACE_SEIZE, pid, 0, PTRACE_SEIZE_DEVEL) == 0) {
1337		post_attach_sigstop = 0; /* this sets use_seize to 1 */
1338	} else if (debug_flag) {
1339		fprintf(stderr, "PTRACE_SEIZE doesn't work\n");
1340	}
1341
1342	kill(pid, SIGKILL);
1343
1344	while (1) {
1345		int status, tracee_pid;
1346
1347		errno = 0;
1348		tracee_pid = waitpid(pid, &status, 0);
1349		if (tracee_pid <= 0) {
1350			if (errno == EINTR)
1351				continue;
1352			perror_msg_and_die("%s: unexpected wait result %d",
1353					 __func__, tracee_pid);
1354		}
1355		if (WIFSIGNALED(status)) {
1356			return;
1357		}
1358		error_msg_and_die("%s: unexpected wait status %x",
1359				__func__, status);
1360	}
1361}
1362# else /* !USE_SEIZE */
1363#  define test_ptrace_seize() ((void)0)
1364# endif
1365
1366static unsigned
1367get_os_release(void)
1368{
1369	unsigned rel;
1370	const char *p;
1371	struct utsname u;
1372	if (uname(&u) < 0)
1373		perror_msg_and_die("uname");
1374	/* u.release has this form: "3.2.9[-some-garbage]" */
1375	rel = 0;
1376	p = u.release;
1377	for (;;) {
1378		if (!(*p >= '0' && *p <= '9'))
1379			error_msg_and_die("Bad OS release string: '%s'", u.release);
1380		/* Note: this open-codes KERNEL_VERSION(): */
1381		rel = (rel << 8) | atoi(p);
1382		if (rel >= KERNEL_VERSION(1,0,0))
1383			break;
1384		while (*p >= '0' && *p <= '9')
1385			p++;
1386		if (*p != '.')
1387			error_msg_and_die("Bad OS release string: '%s'", u.release);
1388		p++;
1389	}
1390	return rel;
1391}
1392
1393/*
1394 * Initialization part of main() was eating much stack (~0.5k),
1395 * which was unused after init.
1396 * We can reuse it if we move init code into a separate function.
1397 *
1398 * Don't want main() to inline us and defeat the reason
1399 * we have a separate function.
1400 */
1401static void __attribute__ ((noinline))
1402init(int argc, char *argv[])
1403{
1404	struct tcb *tcp;
1405	int c;
1406	int optF = 0;
1407	struct sigaction sa;
1408
1409	progname = argv[0] ? argv[0] : "strace";
1410
1411	/* Make sure SIGCHLD has the default action so that waitpid
1412	   definitely works without losing track of children.  The user
1413	   should not have given us a bogus state to inherit, but he might
1414	   have.  Arguably we should detect SIG_IGN here and pass it on
1415	   to children, but probably noone really needs that.  */
1416	signal(SIGCHLD, SIG_DFL);
1417
1418	strace_tracer_pid = getpid();
1419
1420	os_release = get_os_release();
1421
1422	/* Allocate the initial tcbtab.  */
1423	tcbtabsize = argc;	/* Surely enough for all -p args.  */
1424	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
1425	if (!tcbtab)
1426		die_out_of_memory();
1427	tcp = calloc(tcbtabsize, sizeof(*tcp));
1428	if (!tcp)
1429		die_out_of_memory();
1430	for (c = 0; c < tcbtabsize; c++)
1431		tcbtab[c] = tcp++;
1432
1433	outf = stderr;
1434	set_sortby(DEFAULT_SORTBY);
1435	set_personality(DEFAULT_PERSONALITY);
1436	qualify("trace=all");
1437	qualify("abbrev=all");
1438	qualify("verbose=all");
1439	qualify("signal=all");
1440	while ((c = getopt(argc, argv,
1441		"+bcCdfFhiqrtTvVxyz"
1442		"D"
1443		"a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
1444		switch (c) {
1445		case 'b':
1446			detach_on_execve = 1;
1447			break;
1448		case 'c':
1449			if (cflag == CFLAG_BOTH) {
1450				error_msg_and_die("-c and -C are mutually exclusive options");
1451			}
1452			cflag = CFLAG_ONLY_STATS;
1453			break;
1454		case 'C':
1455			if (cflag == CFLAG_ONLY_STATS) {
1456				error_msg_and_die("-c and -C are mutually exclusive options");
1457			}
1458			cflag = CFLAG_BOTH;
1459			break;
1460		case 'd':
1461			debug_flag = 1;
1462			break;
1463		case 'D':
1464			daemonized_tracer = 1;
1465			break;
1466		case 'F':
1467			optF = 1;
1468			break;
1469		case 'f':
1470			followfork++;
1471			break;
1472		case 'h':
1473			usage(stdout, 0);
1474			break;
1475		case 'i':
1476			iflag = 1;
1477			break;
1478		case 'q':
1479			qflag = 1;
1480			break;
1481		case 'r':
1482			rflag = 1;
1483			/* fall through to tflag++ */
1484		case 't':
1485			tflag++;
1486			break;
1487		case 'T':
1488			Tflag = 1;
1489			break;
1490		case 'x':
1491			xflag++;
1492			break;
1493		case 'y':
1494			show_fd_path = 1;
1495			break;
1496		case 'v':
1497			qualify("abbrev=none");
1498			break;
1499		case 'V':
1500			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1501			exit(0);
1502			break;
1503		case 'z':
1504			not_failing_only = 1;
1505			break;
1506		case 'a':
1507			acolumn = atoi(optarg);
1508			if (acolumn < 0)
1509				error_msg_and_die("Bad column width '%s'", optarg);
1510			break;
1511		case 'e':
1512			qualify(optarg);
1513			break;
1514		case 'o':
1515			outfname = strdup(optarg);
1516			break;
1517		case 'O':
1518			set_overhead(atoi(optarg));
1519			break;
1520		case 'p':
1521			process_opt_p_list(optarg);
1522			break;
1523		case 'P':
1524			tracing_paths = 1;
1525			if (pathtrace_select(optarg)) {
1526				error_msg_and_die("Failed to select path '%s'", optarg);
1527			}
1528			break;
1529		case 's':
1530			max_strlen = atoi(optarg);
1531			if (max_strlen < 0) {
1532				error_msg_and_die("Invalid -%c argument: '%s'", c, optarg);
1533			}
1534			break;
1535		case 'S':
1536			set_sortby(optarg);
1537			break;
1538		case 'u':
1539			username = strdup(optarg);
1540			break;
1541		case 'E':
1542			if (putenv(optarg) < 0)
1543				die_out_of_memory();
1544			break;
1545		case 'I':
1546			opt_intr = atoi(optarg);
1547			if (opt_intr <= 0 || opt_intr >= NUM_INTR_OPTS) {
1548				error_msg_and_die("Invalid -%c argument: '%s'", c, optarg);
1549			}
1550			break;
1551		default:
1552			usage(stderr, 1);
1553			break;
1554		}
1555	}
1556	argv += optind;
1557	/* argc -= optind; - no need, argc is not used below */
1558
1559	acolumn_spaces = malloc(acolumn + 1);
1560	if (!acolumn_spaces)
1561		die_out_of_memory();
1562	memset(acolumn_spaces, ' ', acolumn);
1563	acolumn_spaces[acolumn] = '\0';
1564
1565	/* Must have PROG [ARGS], or -p PID. Not both. */
1566	if (!argv[0] == !nprocs)
1567		usage(stderr, 1);
1568
1569	if (nprocs != 0 && daemonized_tracer) {
1570		error_msg_and_die("-D and -p are mutually exclusive options");
1571	}
1572
1573	if (!followfork)
1574		followfork = optF;
1575
1576	if (followfork > 1 && cflag) {
1577		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1578	}
1579
1580	/* See if they want to run as another user. */
1581	if (username != NULL) {
1582		struct passwd *pent;
1583
1584		if (getuid() != 0 || geteuid() != 0) {
1585			error_msg_and_die("You must be root to use the -u option");
1586		}
1587		pent = getpwnam(username);
1588		if (pent == NULL) {
1589			error_msg_and_die("Cannot find user '%s'", username);
1590		}
1591		run_uid = pent->pw_uid;
1592		run_gid = pent->pw_gid;
1593	}
1594	else {
1595		run_uid = getuid();
1596		run_gid = getgid();
1597	}
1598
1599	if (followfork)
1600		test_ptrace_setoptions_followfork();
1601	test_ptrace_setoptions_for_all();
1602	test_ptrace_seize();
1603
1604	/* Check if they want to redirect the output. */
1605	if (outfname) {
1606		/* See if they want to pipe the output. */
1607		if (outfname[0] == '|' || outfname[0] == '!') {
1608			/*
1609			 * We can't do the <outfname>.PID funny business
1610			 * when using popen, so prohibit it.
1611			 */
1612			if (followfork > 1)
1613				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1614			outf = strace_popen(outfname + 1);
1615		}
1616		else if (followfork <= 1)
1617			outf = strace_fopen(outfname);
1618	} else {
1619		/* -ff without -o FILE is the same as single -f */
1620		if (followfork > 1)
1621			followfork = 1;
1622	}
1623
1624	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1625		char *buf = malloc(BUFSIZ);
1626		if (!buf)
1627			die_out_of_memory();
1628		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1629	}
1630	if (outfname && argv[0]) {
1631		if (!opt_intr)
1632			opt_intr = INTR_NEVER;
1633		qflag = 1;
1634	}
1635	if (!opt_intr)
1636		opt_intr = INTR_WHILE_WAIT;
1637
1638	/* argv[0]	-pPID	-oFILE	Default interactive setting
1639	 * yes		0	0	INTR_WHILE_WAIT
1640	 * no		1	0	INTR_WHILE_WAIT
1641	 * yes		0	1	INTR_NEVER
1642	 * no		1	1	INTR_WHILE_WAIT
1643	 */
1644
1645	/* STARTUP_CHILD must be called before the signal handlers get
1646	   installed below as they are inherited into the spawned process.
1647	   Also we do not need to be protected by them as during interruption
1648	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1649	if (argv[0]) {
1650		skip_startup_execve = 1;
1651		startup_child(argv);
1652	}
1653
1654	sigemptyset(&empty_set);
1655	sigemptyset(&blocked_set);
1656	sa.sa_handler = SIG_IGN;
1657	sigemptyset(&sa.sa_mask);
1658	sa.sa_flags = 0;
1659	sigaction(SIGTTOU, &sa, NULL); /* SIG_IGN */
1660	sigaction(SIGTTIN, &sa, NULL); /* SIG_IGN */
1661	if (opt_intr != INTR_ANYWHERE) {
1662		if (opt_intr == INTR_BLOCK_TSTP_TOO)
1663			sigaction(SIGTSTP, &sa, NULL); /* SIG_IGN */
1664		/*
1665		 * In interactive mode (if no -o OUTFILE, or -p PID is used),
1666		 * fatal signals are blocked while syscall stop is processed,
1667		 * and acted on in between, when waiting for new syscall stops.
1668		 * In non-interactive mode, signals are ignored.
1669		 */
1670		if (opt_intr == INTR_WHILE_WAIT) {
1671			sigaddset(&blocked_set, SIGHUP);
1672			sigaddset(&blocked_set, SIGINT);
1673			sigaddset(&blocked_set, SIGQUIT);
1674			sigaddset(&blocked_set, SIGPIPE);
1675			sigaddset(&blocked_set, SIGTERM);
1676			sa.sa_handler = interrupt;
1677		}
1678		/* SIG_IGN, or set handler for these */
1679		sigaction(SIGHUP, &sa, NULL);
1680		sigaction(SIGINT, &sa, NULL);
1681		sigaction(SIGQUIT, &sa, NULL);
1682		sigaction(SIGPIPE, &sa, NULL);
1683		sigaction(SIGTERM, &sa, NULL);
1684	}
1685	if (nprocs != 0 || daemonized_tracer)
1686		startup_attach();
1687
1688	/* Do we want pids printed in our -o OUTFILE?
1689	 * -ff: no (every pid has its own file); or
1690	 * -f: yes (there can be more pids in the future); or
1691	 * -p PID1,PID2: yes (there are already more than one pid)
1692	 */
1693	print_pid_pfx = (outfname && followfork < 2 && (followfork == 1 || nprocs > 1));
1694}
1695
1696static struct tcb *
1697pid2tcb(int pid)
1698{
1699	int i;
1700
1701	if (pid <= 0)
1702		return NULL;
1703
1704	for (i = 0; i < tcbtabsize; i++) {
1705		struct tcb *tcp = tcbtab[i];
1706		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1707			return tcp;
1708	}
1709
1710	return NULL;
1711}
1712
1713static void
1714cleanup(void)
1715{
1716	int i;
1717	struct tcb *tcp;
1718	int fatal_sig;
1719
1720	/* 'interrupted' is a volatile object, fetch it only once */
1721	fatal_sig = interrupted;
1722	if (!fatal_sig)
1723		fatal_sig = SIGTERM;
1724
1725	for (i = 0; i < tcbtabsize; i++) {
1726		tcp = tcbtab[i];
1727		if (!(tcp->flags & TCB_INUSE))
1728			continue;
1729		if (debug_flag)
1730			fprintf(stderr,
1731				"cleanup: looking at pid %u\n", tcp->pid);
1732		if (tcp->flags & TCB_STRACE_CHILD) {
1733			kill(tcp->pid, SIGCONT);
1734			kill(tcp->pid, fatal_sig);
1735		}
1736		detach(tcp);
1737	}
1738	if (cflag)
1739		call_summary(outf);
1740}
1741
1742static void
1743interrupt(int sig)
1744{
1745	interrupted = sig;
1746}
1747
1748static int
1749trace(void)
1750{
1751	struct rusage ru;
1752	struct rusage *rup = cflag ? &ru : NULL;
1753# ifdef __WALL
1754	static int wait4_options = __WALL;
1755# endif
1756
1757	while (nprocs != 0) {
1758		int pid;
1759		int wait_errno;
1760		int status, sig;
1761		int stopped;
1762		struct tcb *tcp;
1763		unsigned event;
1764
1765		if (interrupted)
1766			return 0;
1767		if (interactive)
1768			sigprocmask(SIG_SETMASK, &empty_set, NULL);
1769# ifdef __WALL
1770		pid = wait4(-1, &status, wait4_options, rup);
1771		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
1772			/* this kernel does not support __WALL */
1773			wait4_options &= ~__WALL;
1774			pid = wait4(-1, &status, wait4_options, rup);
1775		}
1776		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
1777			/* most likely a "cloned" process */
1778			pid = wait4(-1, &status, __WCLONE, rup);
1779			if (pid < 0) {
1780				perror_msg("wait4(__WCLONE) failed");
1781			}
1782		}
1783# else
1784		pid = wait4(-1, &status, 0, rup);
1785# endif /* __WALL */
1786		wait_errno = errno;
1787		if (interactive)
1788			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1789
1790		if (pid < 0) {
1791			switch (wait_errno) {
1792			case EINTR:
1793				continue;
1794			case ECHILD:
1795				/*
1796				 * We would like to verify this case
1797				 * but sometimes a race in Solbourne's
1798				 * version of SunOS sometimes reports
1799				 * ECHILD before sending us SIGCHILD.
1800				 */
1801				return 0;
1802			default:
1803				errno = wait_errno;
1804				perror_msg("wait");
1805				return -1;
1806			}
1807		}
1808		if (pid == popen_pid) {
1809			if (WIFEXITED(status) || WIFSIGNALED(status))
1810				popen_pid = 0;
1811			continue;
1812		}
1813
1814		event = ((unsigned)status >> 16);
1815		if (debug_flag) {
1816			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
1817			char evbuf[sizeof(",PTRACE_EVENT_?? (%u)") + sizeof(int)*3 /*paranoia:*/ + 16];
1818			strcpy(buf, "???");
1819			if (WIFSIGNALED(status))
1820#ifdef WCOREDUMP
1821				sprintf(buf, "WIFSIGNALED,%ssig=%s",
1822						WCOREDUMP(status) ? "core," : "",
1823						signame(WTERMSIG(status)));
1824#else
1825				sprintf(buf, "WIFSIGNALED,sig=%s",
1826						signame(WTERMSIG(status)));
1827#endif
1828			if (WIFEXITED(status))
1829				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
1830			if (WIFSTOPPED(status))
1831				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
1832#ifdef WIFCONTINUED
1833			if (WIFCONTINUED(status))
1834				strcpy(buf, "WIFCONTINUED");
1835#endif
1836			evbuf[0] = '\0';
1837			if (event != 0) {
1838				static const char *const event_names[] = {
1839					[PTRACE_EVENT_CLONE] = "CLONE",
1840					[PTRACE_EVENT_FORK]  = "FORK",
1841					[PTRACE_EVENT_VFORK] = "VFORK",
1842					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
1843					[PTRACE_EVENT_EXEC]  = "EXEC",
1844					[PTRACE_EVENT_EXIT]  = "EXIT",
1845				};
1846				const char *e;
1847				if (event < ARRAY_SIZE(event_names))
1848					e = event_names[event];
1849				else {
1850					sprintf(buf, "?? (%u)", event);
1851					e = buf;
1852				}
1853				sprintf(evbuf, ",PTRACE_EVENT_%s", e);
1854			}
1855			fprintf(stderr, " [wait(0x%04x) = %u] %s%s\n", status, pid, buf, evbuf);
1856		}
1857
1858		/* Look up 'pid' in our table. */
1859		tcp = pid2tcb(pid);
1860
1861		/* Under Linux, execve changes pid to thread leader's pid,
1862		 * and we see this changed pid on EVENT_EXEC and later,
1863		 * execve sysexit. Leader "disappears" without exit
1864		 * notification. Let user know that, drop leader's tcb,
1865		 * and fix up pid in execve thread's tcb.
1866		 * Effectively, execve thread's tcb replaces leader's tcb.
1867		 *
1868		 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
1869		 * on exit syscall) in multithreaded programs exactly
1870		 * in order to handle this case.
1871		 *
1872		 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
1873		 * On 2.6 and earlier, it can return garbage.
1874		 */
1875		if (event == PTRACE_EVENT_EXEC && os_release >= KERNEL_VERSION(3,0,0)) {
1876			long old_pid = 0;
1877			if (ptrace(PTRACE_GETEVENTMSG, pid, NULL, (long) &old_pid) >= 0
1878			 && old_pid > 0
1879			 && old_pid != pid
1880			) {
1881				struct tcb *execve_thread = pid2tcb(old_pid);
1882				if (tcp) {
1883					outf = tcp->outf;
1884					curcol = tcp->curcol;
1885					if (execve_thread) {
1886						if (execve_thread->curcol != 0) {
1887							/*
1888							 * One case we are here is -ff:
1889							 * try "strace -oLOG -ff test/threaded_execve"
1890							 */
1891							fprintf(execve_thread->outf, " <pid changed to %d ...>\n", pid);
1892							execve_thread->curcol = 0;
1893						}
1894						/* swap output FILEs (needed for -ff) */
1895						tcp->outf = execve_thread->outf;
1896						tcp->curcol = execve_thread->curcol;
1897						execve_thread->outf = outf;
1898						execve_thread->curcol = curcol;
1899					}
1900					droptcb(tcp);
1901				}
1902				tcp = execve_thread;
1903				if (tcp) {
1904					tcp->pid = pid;
1905					tcp->flags |= TCB_REPRINT;
1906					if (!cflag) {
1907						printleader(tcp);
1908						tprintf("+++ superseded by execve in pid %lu +++\n", old_pid);
1909						line_ended();
1910					}
1911				}
1912			}
1913		}
1914
1915		if (event == PTRACE_EVENT_EXEC && detach_on_execve) {
1916			if (!skip_startup_execve)
1917				detach(tcp);
1918			/* This was initial execve for "strace PROG". Skip. */
1919			skip_startup_execve = 0;
1920		}
1921
1922		if (tcp == NULL) {
1923			if (followfork) {
1924				/* This is needed to go with the CLONE_PTRACE
1925				   changes in process.c/util.c: we might see
1926				   the child's initial trap before we see the
1927				   parent return from the clone syscall.
1928				   Leave the child suspended until the parent
1929				   returns from its system call.  Only then
1930				   will we have the association of parent and
1931				   child so that we know how to do clearbpt
1932				   in the child.  */
1933				tcp = alloctcb(pid);
1934				tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
1935				if (!qflag)
1936					fprintf(stderr, "Process %d attached\n",
1937						pid);
1938			}
1939			else
1940				/* This can happen if a clone call used
1941				   CLONE_PTRACE itself.  */
1942			{
1943				if (WIFSTOPPED(status))
1944					ptrace(PTRACE_CONT, pid, (char *) 0, 0);
1945				error_msg_and_die("Unknown pid: %u", pid);
1946			}
1947		}
1948
1949		/* Set current output file */
1950		outf = tcp->outf;
1951		curcol = tcp->curcol;
1952
1953		if (cflag) {
1954			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
1955			tcp->stime = ru.ru_stime;
1956		}
1957
1958		if (WIFSIGNALED(status)) {
1959			if (pid == strace_child)
1960				exit_code = 0x100 | WTERMSIG(status);
1961			if (cflag != CFLAG_ONLY_STATS
1962			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
1963				printleader(tcp);
1964#ifdef WCOREDUMP
1965				tprintf("+++ killed by %s %s+++\n",
1966					signame(WTERMSIG(status)),
1967					WCOREDUMP(status) ? "(core dumped) " : "");
1968#else
1969				tprintf("+++ killed by %s +++\n",
1970					signame(WTERMSIG(status)));
1971#endif
1972				line_ended();
1973			}
1974			droptcb(tcp);
1975			continue;
1976		}
1977		if (WIFEXITED(status)) {
1978			if (pid == strace_child)
1979				exit_code = WEXITSTATUS(status);
1980			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
1981				printleader(tcp);
1982				tprintf("+++ exited with %d +++\n", WEXITSTATUS(status));
1983				line_ended();
1984			}
1985			droptcb(tcp);
1986			continue;
1987		}
1988		if (!WIFSTOPPED(status)) {
1989			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
1990			droptcb(tcp);
1991			continue;
1992		}
1993
1994		/* Is this the very first time we see this tracee stopped? */
1995		if (tcp->flags & TCB_STARTUP) {
1996			if (debug_flag)
1997				fprintf(stderr, "pid %d has TCB_STARTUP, initializing it\n", tcp->pid);
1998			tcp->flags &= ~TCB_STARTUP;
1999			if (tcp->flags & TCB_BPTSET) {
2000				/*
2001				 * One example is a breakpoint inherited from
2002				 * parent through fork().
2003				 */
2004				if (clearbpt(tcp) < 0) {
2005					/* Pretty fatal */
2006					droptcb(tcp);
2007					cleanup();
2008					return -1;
2009				}
2010			}
2011			if (ptrace_setoptions) {
2012				if (debug_flag)
2013					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2014				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2015					if (errno != ESRCH) {
2016						/* Should never happen, really */
2017						perror_msg_and_die("PTRACE_SETOPTIONS");
2018					}
2019				}
2020			}
2021		}
2022
2023		sig = WSTOPSIG(status);
2024
2025		if (event != 0) {
2026			/* Ptrace event */
2027#ifdef USE_SEIZE
2028			if (event == PTRACE_EVENT_STOP || event == PTRACE_EVENT_STOP1) {
2029				/*
2030				 * PTRACE_INTERRUPT-stop or group-stop.
2031				 * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
2032				 */
2033				if (sig == SIGSTOP
2034				 || sig == SIGTSTP
2035				 || sig == SIGTTIN
2036				 || sig == SIGTTOU
2037				) {
2038					stopped = 1;
2039					goto show_stopsig;
2040				}
2041			}
2042#endif
2043			goto restart_tracee_with_sig_0;
2044		}
2045
2046		/* Is this post-attach SIGSTOP?
2047		 * Interestingly, the process may stop
2048		 * with STOPSIG equal to some other signal
2049		 * than SIGSTOP if we happend to attach
2050		 * just before the process takes a signal.
2051		 */
2052		if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
2053			if (debug_flag)
2054				fprintf(stderr, "ignored SIGSTOP on pid %d\n", tcp->pid);
2055			tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
2056			goto restart_tracee_with_sig_0;
2057		}
2058
2059		if (sig != syscall_trap_sig) {
2060			siginfo_t si;
2061
2062			/* Nonzero (true) if tracee is stopped by signal
2063			 * (as opposed to "tracee received signal").
2064			 */
2065			stopped = (ptrace(PTRACE_GETSIGINFO, pid, 0, (long) &si) < 0);
2066#ifdef USE_SEIZE
2067 show_stopsig:
2068#endif
2069			if (cflag != CFLAG_ONLY_STATS
2070			    && (qual_flags[sig] & QUAL_SIGNAL)) {
2071#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2072				long pc = 0;
2073				long psr = 0;
2074
2075				upeek(tcp, PT_CR_IPSR, &psr);
2076				upeek(tcp, PT_CR_IIP, &pc);
2077
2078# define PSR_RI	41
2079				pc += (psr >> PSR_RI) & 0x3;
2080# define PC_FORMAT_STR	" @ %lx"
2081# define PC_FORMAT_ARG	, pc
2082#else
2083# define PC_FORMAT_STR	""
2084# define PC_FORMAT_ARG	/* nothing */
2085#endif
2086				printleader(tcp);
2087				if (!stopped) {
2088					tprintf("--- %s ", signame(sig));
2089					printsiginfo(&si, verbose(tcp));
2090					tprintf(PC_FORMAT_STR " ---\n"
2091						PC_FORMAT_ARG);
2092				} else
2093					tprintf("--- stopped by %s" PC_FORMAT_STR " ---\n",
2094						signame(sig)
2095						PC_FORMAT_ARG);
2096				line_ended();
2097			}
2098
2099			if (!stopped)
2100				/* It's signal-delivery-stop. Inject the signal */
2101				goto restart_tracee;
2102
2103			/* It's group-stop */
2104#ifdef USE_SEIZE
2105			if (use_seize) {
2106				/*
2107				 * This ends ptrace-stop, but does *not* end group-stop.
2108				 * This makes stopping signals work properly on straced process
2109				 * (that is, process really stops. It used to continue to run).
2110				 */
2111				if (ptrace_restart(PTRACE_LISTEN, tcp, 0) < 0) {
2112					cleanup();
2113					return -1;
2114				}
2115				tcp->curcol = curcol;
2116				continue;
2117			}
2118			/* We don't have PTRACE_LISTEN support... */
2119#endif
2120			goto restart_tracee;
2121		}
2122
2123		/* We handled quick cases, we are permitted to interrupt now. */
2124		if (interrupted)
2125			return 0;
2126
2127		/* This should be syscall entry or exit.
2128		 * (Or it still can be that pesky post-execve SIGTRAP!)
2129		 * Handle it.
2130		 */
2131		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2132			/* ptrace() failed in trace_syscall() with ESRCH.
2133			 * Likely a result of process disappearing mid-flight.
2134			 * Observed case: exit_group() terminating
2135			 * all processes in thread group.
2136			 * We assume that ptrace error was caused by process death.
2137			 * We used to detach(tcp) here, but since we no longer
2138			 * implement "detach before death" policy/hack,
2139			 * we can let this process to report its death to us
2140			 * normally, via WIFEXITED or WIFSIGNALED wait status.
2141			 */
2142			tcp->curcol = curcol;
2143			continue;
2144		}
2145 restart_tracee_with_sig_0:
2146		sig = 0;
2147 restart_tracee:
2148		/* Remember current print column before continuing. */
2149		tcp->curcol = curcol;
2150		if (ptrace_restart(PTRACE_SYSCALL, tcp, sig) < 0) {
2151			cleanup();
2152			return -1;
2153		}
2154	}
2155	return 0;
2156}
2157
2158int
2159main(int argc, char *argv[])
2160{
2161	init(argc, argv);
2162
2163	/* Run main tracing loop */
2164	if (trace() < 0)
2165		return 1;
2166
2167	cleanup();
2168	fflush(NULL);
2169	if (exit_code > 0xff) {
2170		/* Avoid potential core file clobbering.  */
2171		struct rlimit rlim = {0, 0};
2172		setrlimit(RLIMIT_CORE, &rlim);
2173
2174		/* Child was killed by a signal, mimic that.  */
2175		exit_code &= 0xff;
2176		signal(exit_code, SIG_DFL);
2177		raise(exit_code);
2178		/* Paranoia - what if this signal is not fatal?
2179		   Exit with 128 + signo then.  */
2180		exit_code += 128;
2181	}
2182
2183	return exit_code;
2184}
2185