strace.c revision e83e157021177930b64ec4aa4983bbe13b39e91b
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <dirent.h>
48#include <sys/utsname.h>
49
50# include <asm/unistd.h>
51# if defined __NR_tkill
52#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
53# else
54   /* kill() may choose arbitrarily the target task of the process group
55      while we later wait on a that specific TID.  PID process waits become
56      TID task specific waits for a process under ptrace(2).  */
57#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
58#  define my_tkill(tid, sig) kill((tid), (sig))
59# endif
60
61#if defined(IA64)
62# include <asm/ptrace_offsets.h>
63#endif
64
65extern char **environ;
66extern int optind;
67extern char *optarg;
68
69int debug = 0, followfork = 0;
70unsigned int ptrace_setoptions = 0;
71/* Which WSTOPSIG(status) value marks syscall traps? */
72static unsigned int syscall_trap_sig = SIGTRAP;
73int dtime = 0, xflag = 0, qflag = 0;
74cflag_t cflag = CFLAG_NONE;
75static int iflag = 0, pflag_seen = 0, rflag = 0, tflag = 0;
76
77/* -I n */
78enum {
79    INTR_NOT_SET        = 0,
80    INTR_ANYWHERE       = 1, /* don't block/ignore any signals */
81    INTR_WHILE_WAIT     = 2, /* block fatal signals while decoding syscall. default */
82    INTR_NEVER          = 3, /* block fatal signals. default if '-o FILE PROG' */
83    INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z) */
84    NUM_INTR_OPTS
85};
86static int opt_intr;
87/* We play with signal mask only if this mode is active: */
88#define interactive (opt_intr == INTR_WHILE_WAIT)
89
90/*
91 * daemonized_tracer supports -D option.
92 * With this option, strace forks twice.
93 * Unlike normal case, with -D *grandparent* process exec's,
94 * becoming a traced process. Child exits (this prevents traced process
95 * from having children it doesn't expect to have), and grandchild
96 * attaches to grandparent similarly to strace -p PID.
97 * This allows for more transparent interaction in cases
98 * when process and its parent are communicating via signals,
99 * wait() etc. Without -D, strace process gets lodged in between,
100 * disrupting parent<->child link.
101 */
102static bool daemonized_tracer = 0;
103
104#ifdef USE_SEIZE
105static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
106# define use_seize (post_attach_sigstop == 0)
107#else
108# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
109# define use_seize 0
110#endif
111
112/* Sometimes we want to print only succeeding syscalls. */
113int not_failing_only = 0;
114
115/* Show path associated with fd arguments */
116int show_fd_path = 0;
117
118/* are we filtering traces based on paths? */
119int tracing_paths = 0;
120
121static int exit_code = 0;
122static int strace_child = 0;
123static int strace_tracer_pid = 0;
124
125static char *username = NULL;
126static uid_t run_uid;
127static gid_t run_gid;
128
129int max_strlen = DEFAULT_STRLEN;
130static int acolumn = DEFAULT_ACOLUMN;
131static char *acolumn_spaces;
132static char *outfname = NULL;
133static FILE *outf;
134struct tcb *printing_tcp = NULL;
135static int curcol;
136static struct tcb **tcbtab;
137static unsigned int nprocs, tcbtabsize;
138static const char *progname;
139
140static char *os_release; /* from uname() */
141
142static int detach(struct tcb *tcp);
143static int trace(void);
144static void cleanup(void);
145static void interrupt(int sig);
146static sigset_t empty_set, blocked_set;
147
148#ifdef HAVE_SIG_ATOMIC_T
149static volatile sig_atomic_t interrupted;
150#else
151static volatile int interrupted;
152#endif
153
154static void
155usage(FILE *ofp, int exitval)
156{
157	fprintf(ofp, "\
158usage: strace [-CdDffhiqrtttTvVxxy] [-I n] [-a column] [-e expr]... [-o file]\n\
159              [-p pid]... [-s strsize] [-u username] [-E var=val]...\n\
160              [-P path] [PROG [ARGS]]\n\
161   or: strace -c [-D] [-I n] [-e expr]... [-O overhead] [-S sortby] [-E var=val]...\n\
162              [PROG [ARGS]]\n\
163-c -- count time, calls, and errors for each syscall and report summary\n\
164-C -- like -c but also print regular output while processes are running\n\
165-D -- run tracer process as a detached grandchild, not as parent\n\
166-f -- follow forks, -ff -- with output into separate files\n\
167-F -- attempt to follow vforks\n\
168-i -- print instruction pointer at time of syscall\n\
169-I interruptible\n\
170   1: no signals are blocked\n\
171   2: fatal signals are blocked while decoding syscall (default)\n\
172   3: fatal signals are always blocked (default if '-o FILE PROG')\n\
173   4: fatal signals and SIGTSTP (^Z) are always blocked\n\
174      (useful to make 'strace -o FILE PROG' not stop on ^Z)\n\
175-q -- suppress messages about attaching, detaching, etc.\n\
176-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
177-T -- print time spent in each syscall\n\
178-v -- verbose mode: print unabbreviated argv, stat, termios, etc. args\n\
179-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
180-y -- print paths associated with file descriptor arguments\n\
181-h -- print help message\n\
182-V -- print version\n\
183-a column -- alignment COLUMN for printing syscall results (default %d)\n\
184-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
185   options: trace, abbrev, verbose, raw, signal, read, or write\n\
186-o file -- send trace output to FILE instead of stderr\n\
187-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
188-p pid -- trace process with process id PID, may be repeated\n\
189-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
190-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
191-u username -- run command as username handling setuid and/or setgid\n\
192-E var=val -- put var=val in the environment for command\n\
193-E var -- remove var from the environment for command\n\
194-P path -- trace accesses to path\n\
195" /* this is broken, so don't document it
196-z -- print only succeeding syscalls\n\
197  */
198, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
199	exit(exitval);
200}
201
202static void die(void) __attribute__ ((noreturn));
203static void die(void)
204{
205	if (strace_tracer_pid == getpid()) {
206		cflag = 0;
207		cleanup();
208	}
209	exit(1);
210}
211
212static void verror_msg(int err_no, const char *fmt, va_list p)
213{
214	char *msg;
215
216	fflush(NULL);
217
218	/* We want to print entire message with single fprintf to ensure
219	 * message integrity if stderr is shared with other programs.
220	 * Thus we use vasprintf + single fprintf.
221	 */
222	msg = NULL;
223	if (vasprintf(&msg, fmt, p) >= 0) {
224		if (err_no)
225			fprintf(stderr, "%s: %s: %s\n", progname, msg, strerror(err_no));
226		else
227			fprintf(stderr, "%s: %s\n", progname, msg);
228		free(msg);
229	} else {
230		/* malloc in vasprintf failed, try it without malloc */
231		fprintf(stderr, "%s: ", progname);
232		vfprintf(stderr, fmt, p);
233		if (err_no)
234			fprintf(stderr, ": %s\n", strerror(err_no));
235		else
236			putc('\n', stderr);
237	}
238	/* We don't switch stderr to buffered, thus fprintf(stderr)
239	 * always flushes its output and this is not necessary: */
240	/* fflush(stderr); */
241}
242
243void error_msg(const char *fmt, ...)
244{
245	va_list p;
246	va_start(p, fmt);
247	verror_msg(0, fmt, p);
248	va_end(p);
249}
250
251void error_msg_and_die(const char *fmt, ...)
252{
253	va_list p;
254	va_start(p, fmt);
255	verror_msg(0, fmt, p);
256	die();
257}
258
259void perror_msg(const char *fmt, ...)
260{
261	va_list p;
262	va_start(p, fmt);
263	verror_msg(errno, fmt, p);
264	va_end(p);
265}
266
267void perror_msg_and_die(const char *fmt, ...)
268{
269	va_list p;
270	va_start(p, fmt);
271	verror_msg(errno, fmt, p);
272	die();
273}
274
275void die_out_of_memory(void)
276{
277	static bool recursed = 0;
278	if (recursed)
279		exit(1);
280	recursed = 1;
281	error_msg_and_die("Out of memory");
282}
283
284/* Glue for systems without a MMU that cannot provide fork() */
285#ifdef HAVE_FORK
286# define strace_vforked 0
287#else
288# define strace_vforked 1
289# define fork()         vfork()
290#endif
291
292#ifdef USE_SEIZE
293static int
294ptrace_attach_or_seize(int pid)
295{
296	int r;
297	if (!use_seize)
298		return ptrace(PTRACE_ATTACH, pid, 0, 0);
299	r = ptrace(PTRACE_SEIZE, pid, 0, PTRACE_SEIZE_DEVEL);
300	if (r)
301		return r;
302	r = ptrace(PTRACE_INTERRUPT, pid, 0, 0);
303	return r;
304}
305#else
306# define ptrace_attach_or_seize(pid) ptrace(PTRACE_ATTACH, (pid), 0, 0)
307#endif
308
309static void
310set_cloexec_flag(int fd)
311{
312	int flags, newflags;
313
314	flags = fcntl(fd, F_GETFD);
315	if (flags < 0) {
316		/* Can happen only if fd is bad.
317		 * Should never happen: if it does, we have a bug
318		 * in the caller. Therefore we just abort
319		 * instead of propagating the error.
320		 */
321		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
322	}
323
324	newflags = flags | FD_CLOEXEC;
325	if (flags == newflags)
326		return;
327
328	fcntl(fd, F_SETFD, newflags); /* never fails */
329}
330
331/*
332 * When strace is setuid executable, we have to swap uids
333 * before and after filesystem and process management operations.
334 */
335static void
336swap_uid(void)
337{
338	int euid = geteuid(), uid = getuid();
339
340	if (euid != uid && setreuid(euid, uid) < 0) {
341		perror_msg_and_die("setreuid");
342	}
343}
344
345#if _LFS64_LARGEFILE
346# define fopen_for_output fopen64
347#else
348# define fopen_for_output fopen
349#endif
350
351static FILE *
352strace_fopen(const char *path)
353{
354	FILE *fp;
355
356	swap_uid();
357	fp = fopen_for_output(path, "w");
358	if (!fp)
359		perror_msg_and_die("Can't fopen '%s'", path);
360	swap_uid();
361	set_cloexec_flag(fileno(fp));
362	return fp;
363}
364
365static int popen_pid = 0;
366
367#ifndef _PATH_BSHELL
368# define _PATH_BSHELL "/bin/sh"
369#endif
370
371/*
372 * We cannot use standard popen(3) here because we have to distinguish
373 * popen child process from other processes we trace, and standard popen(3)
374 * does not export its child's pid.
375 */
376static FILE *
377strace_popen(const char *command)
378{
379	FILE *fp;
380	int fds[2];
381
382	swap_uid();
383	if (pipe(fds) < 0)
384		perror_msg_and_die("pipe");
385
386	set_cloexec_flag(fds[1]); /* never fails */
387
388	popen_pid = vfork();
389	if (popen_pid == -1)
390		perror_msg_and_die("vfork");
391
392	if (popen_pid == 0) {
393		/* child */
394		close(fds[1]);
395		if (fds[0] != 0) {
396			if (dup2(fds[0], 0))
397				perror_msg_and_die("dup2");
398			close(fds[0]);
399		}
400		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
401		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
402	}
403
404	/* parent */
405	close(fds[0]);
406	swap_uid();
407	fp = fdopen(fds[1], "w");
408	if (!fp)
409		die_out_of_memory();
410	return fp;
411}
412
413static void
414newoutf(struct tcb *tcp)
415{
416	if (outfname && followfork > 1) {
417		char name[520 + sizeof(int) * 3];
418		sprintf(name, "%.512s.%u", outfname, tcp->pid);
419		tcp->outf = strace_fopen(name);
420	}
421}
422
423static void
424startup_attach(void)
425{
426	int tcbi;
427	struct tcb *tcp;
428
429	/*
430	 * Block user interruptions as we would leave the traced
431	 * process stopped (process state T) if we would terminate in
432	 * between PTRACE_ATTACH and wait4() on SIGSTOP.
433	 * We rely on cleanup() from this point on.
434	 */
435	if (interactive)
436		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
437
438	if (daemonized_tracer) {
439		pid_t pid = fork();
440		if (pid < 0) {
441			perror_msg_and_die("fork");
442		}
443		if (pid) { /* parent */
444			/*
445			 * Wait for grandchild to attach to straced process
446			 * (grandparent). Grandchild SIGKILLs us after it attached.
447			 * Grandparent's wait() is unblocked by our death,
448			 * it proceeds to exec the straced program.
449			 */
450			pause();
451			_exit(0); /* paranoia */
452		}
453		/* grandchild */
454		/* We will be the tracer process. Remember our new pid: */
455		strace_tracer_pid = getpid();
456	}
457
458	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
459		tcp = tcbtab[tcbi];
460
461		/* Is this a process we should attach to, but not yet attached? */
462		if ((tcp->flags & (TCB_ATTACHED | TCB_STARTUP)) != TCB_ATTACHED)
463			continue; /* no */
464
465		/* Reinitialize the output since it may have changed */
466		tcp->outf = outf;
467		newoutf(tcp);
468
469		if (followfork && !daemonized_tracer) {
470			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
471			DIR *dir;
472
473			sprintf(procdir, "/proc/%d/task", tcp->pid);
474			dir = opendir(procdir);
475			if (dir != NULL) {
476				unsigned int ntid = 0, nerr = 0;
477				struct dirent *de;
478
479				while ((de = readdir(dir)) != NULL) {
480					struct tcb *cur_tcp;
481					int tid;
482
483					if (de->d_fileno == 0)
484						continue;
485					tid = atoi(de->d_name);
486					if (tid <= 0)
487						continue;
488					++ntid;
489					if (ptrace_attach_or_seize(tid) < 0) {
490						++nerr;
491						if (debug)
492							fprintf(stderr, "attach to pid %d failed\n", tid);
493						continue;
494					}
495					if (debug)
496						fprintf(stderr, "attach to pid %d succeeded\n", tid);
497					cur_tcp = tcp;
498					if (tid != tcp->pid)
499						cur_tcp = alloctcb(tid);
500					cur_tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
501				}
502				closedir(dir);
503				if (interactive) {
504					sigprocmask(SIG_SETMASK, &empty_set, NULL);
505					if (interrupted)
506						goto ret;
507					sigprocmask(SIG_BLOCK, &blocked_set, NULL);
508				}
509				ntid -= nerr;
510				if (ntid == 0) {
511					perror("attach: ptrace(PTRACE_ATTACH, ...)");
512					droptcb(tcp);
513					continue;
514				}
515				if (!qflag) {
516					fprintf(stderr, ntid > 1
517? "Process %u attached with %u threads - interrupt to quit\n"
518: "Process %u attached - interrupt to quit\n",
519						tcp->pid, ntid);
520				}
521				if (!(tcp->flags & TCB_STARTUP)) {
522					/* -p PID, we failed to attach to PID itself
523					 * but did attach to some of its sibling threads.
524					 * Drop PID's tcp.
525					 */
526					droptcb(tcp);
527				}
528				continue;
529			} /* if (opendir worked) */
530		} /* if (-f) */
531		if (ptrace_attach_or_seize(tcp->pid) < 0) {
532			perror("attach: ptrace(PTRACE_ATTACH, ...)");
533			droptcb(tcp);
534			continue;
535		}
536		tcp->flags |= TCB_STARTUP | post_attach_sigstop;
537		if (debug)
538			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
539
540		if (daemonized_tracer) {
541			/*
542			 * It is our grandparent we trace, not a -p PID.
543			 * Don't want to just detach on exit, so...
544			 */
545			tcp->flags &= ~TCB_ATTACHED;
546			/*
547			 * Make parent go away.
548			 * Also makes grandparent's wait() unblock.
549			 */
550			kill(getppid(), SIGKILL);
551		}
552
553		if (!qflag)
554			fprintf(stderr,
555				"Process %u attached - interrupt to quit\n",
556				tcp->pid);
557	} /* for each tcbtab[] */
558
559 ret:
560	if (interactive)
561		sigprocmask(SIG_SETMASK, &empty_set, NULL);
562}
563
564static void
565startup_child(char **argv)
566{
567	struct stat statbuf;
568	const char *filename;
569	char pathname[MAXPATHLEN];
570	int pid = 0;
571	struct tcb *tcp;
572
573	filename = argv[0];
574	if (strchr(filename, '/')) {
575		if (strlen(filename) > sizeof pathname - 1) {
576			errno = ENAMETOOLONG;
577			perror_msg_and_die("exec");
578		}
579		strcpy(pathname, filename);
580	}
581#ifdef USE_DEBUGGING_EXEC
582	/*
583	 * Debuggers customarily check the current directory
584	 * first regardless of the path but doing that gives
585	 * security geeks a panic attack.
586	 */
587	else if (stat(filename, &statbuf) == 0)
588		strcpy(pathname, filename);
589#endif /* USE_DEBUGGING_EXEC */
590	else {
591		const char *path;
592		int m, n, len;
593
594		for (path = getenv("PATH"); path && *path; path += m) {
595			const char *colon = strchr(path, ':');
596			if (colon) {
597				n = colon - path;
598				m = n + 1;
599			}
600			else
601				m = n = strlen(path);
602			if (n == 0) {
603				if (!getcwd(pathname, MAXPATHLEN))
604					continue;
605				len = strlen(pathname);
606			}
607			else if (n > sizeof pathname - 1)
608				continue;
609			else {
610				strncpy(pathname, path, n);
611				len = n;
612			}
613			if (len && pathname[len - 1] != '/')
614				pathname[len++] = '/';
615			strcpy(pathname + len, filename);
616			if (stat(pathname, &statbuf) == 0 &&
617			    /* Accept only regular files
618			       with some execute bits set.
619			       XXX not perfect, might still fail */
620			    S_ISREG(statbuf.st_mode) &&
621			    (statbuf.st_mode & 0111))
622				break;
623		}
624	}
625	if (stat(pathname, &statbuf) < 0) {
626		perror_msg_and_die("Can't stat '%s'", filename);
627	}
628	strace_child = pid = fork();
629	if (pid < 0) {
630		perror_msg_and_die("fork");
631	}
632	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
633	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
634	) {
635		pid = getpid();
636		if (outf != stderr)
637			close(fileno(outf));
638		if (!daemonized_tracer && !use_seize) {
639			if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0) {
640				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
641			}
642		}
643
644		if (username != NULL) {
645			uid_t run_euid = run_uid;
646			gid_t run_egid = run_gid;
647
648			if (statbuf.st_mode & S_ISUID)
649				run_euid = statbuf.st_uid;
650			if (statbuf.st_mode & S_ISGID)
651				run_egid = statbuf.st_gid;
652			/*
653			 * It is important to set groups before we
654			 * lose privileges on setuid.
655			 */
656			if (initgroups(username, run_gid) < 0) {
657				perror_msg_and_die("initgroups");
658			}
659			if (setregid(run_gid, run_egid) < 0) {
660				perror_msg_and_die("setregid");
661			}
662			if (setreuid(run_uid, run_euid) < 0) {
663				perror_msg_and_die("setreuid");
664			}
665		}
666		else if (geteuid() != 0)
667			setreuid(run_uid, run_uid);
668
669		if (!daemonized_tracer) {
670			/*
671			 * Induce a ptrace stop. Tracer (our parent)
672			 * will resume us with PTRACE_SYSCALL and display
673			 * the immediately following execve syscall.
674			 * Can't do this on NOMMU systems, we are after
675			 * vfork: parent is blocked, stopping would deadlock.
676			 */
677			if (!strace_vforked)
678				kill(pid, SIGSTOP);
679		} else {
680			struct sigaction sv_sigchld;
681			sigaction(SIGCHLD, NULL, &sv_sigchld);
682			/*
683			 * Make sure it is not SIG_IGN, otherwise wait
684			 * will not block.
685			 */
686			signal(SIGCHLD, SIG_DFL);
687			/*
688			 * Wait for grandchild to attach to us.
689			 * It kills child after that, and wait() unblocks.
690			 */
691			alarm(3);
692			wait(NULL);
693			alarm(0);
694			sigaction(SIGCHLD, &sv_sigchld, NULL);
695		}
696
697		execv(pathname, argv);
698		perror_msg_and_die("exec");
699	}
700
701	/* We are the tracer */
702
703	if (!daemonized_tracer) {
704		if (!use_seize) {
705			/* child did PTRACE_TRACEME, nothing to do in parent */
706		} else {
707			if (!strace_vforked) {
708				/* Wait until child stopped itself */
709				int status;
710				while (waitpid(pid, &status, WSTOPPED) < 0) {
711					if (errno == EINTR)
712						continue;
713					perror_msg_and_die("waitpid");
714				}
715				if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
716					kill(pid, SIGKILL);
717					perror_msg_and_die("Unexpected wait status %x", status);
718				}
719			}
720			/* Else: vforked case, we have no way to sync.
721			 * Just attach to it as soon as possible.
722			 * This means that we may miss a few first syscalls...
723			 */
724
725			if (ptrace_attach_or_seize(pid)) {
726				kill(pid, SIGKILL);
727				perror_msg_and_die("Can't attach to %d", pid);
728			}
729			if (!strace_vforked)
730				kill(pid, SIGCONT);
731		}
732		tcp = alloctcb(pid);
733		if (!strace_vforked)
734			tcp->flags |= TCB_STARTUP | post_attach_sigstop;
735		else
736			tcp->flags |= TCB_STARTUP;
737	}
738	else {
739		/* With -D, *we* are child here, IOW: different pid. Fetch it: */
740		strace_tracer_pid = getpid();
741		/* The tracee is our parent: */
742		pid = getppid();
743		tcp = alloctcb(pid);
744		/* We want subsequent startup_attach() to attach to it: */
745		tcp->flags |= TCB_ATTACHED;
746	}
747}
748
749static void kill_save_errno(pid_t pid, int sig)
750{
751	int saved_errno = errno;
752
753	(void) kill(pid, sig);
754	errno = saved_errno;
755}
756
757/*
758 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
759 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
760 * and then see which options are supported by the kernel.
761 */
762static void
763test_ptrace_setoptions_followfork(void)
764{
765	int pid, expected_grandchild = 0, found_grandchild = 0;
766	const unsigned int test_options = PTRACE_O_TRACECLONE |
767					  PTRACE_O_TRACEFORK |
768					  PTRACE_O_TRACEVFORK;
769
770	pid = fork();
771	if (pid < 0)
772		perror_msg_and_die("fork");
773	if (pid == 0) {
774		pid = getpid();
775		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
776			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
777					   __func__);
778		kill(pid, SIGSTOP);
779		if (fork() < 0)
780			perror_msg_and_die("fork");
781		_exit(0);
782	}
783
784	while (1) {
785		int status, tracee_pid;
786
787		errno = 0;
788		tracee_pid = wait(&status);
789		if (tracee_pid <= 0) {
790			if (errno == EINTR)
791				continue;
792			else if (errno == ECHILD)
793				break;
794			kill_save_errno(pid, SIGKILL);
795			perror_msg_and_die("%s: unexpected wait result %d",
796					   __func__, tracee_pid);
797		}
798		if (WIFEXITED(status)) {
799			if (WEXITSTATUS(status)) {
800				if (tracee_pid != pid)
801					kill_save_errno(pid, SIGKILL);
802				error_msg_and_die("%s: unexpected exit status %u",
803						  __func__, WEXITSTATUS(status));
804			}
805			continue;
806		}
807		if (WIFSIGNALED(status)) {
808			if (tracee_pid != pid)
809				kill_save_errno(pid, SIGKILL);
810			error_msg_and_die("%s: unexpected signal %u",
811					  __func__, WTERMSIG(status));
812		}
813		if (!WIFSTOPPED(status)) {
814			if (tracee_pid != pid)
815				kill_save_errno(tracee_pid, SIGKILL);
816			kill(pid, SIGKILL);
817			error_msg_and_die("%s: unexpected wait status %x",
818					  __func__, status);
819		}
820		if (tracee_pid != pid) {
821			found_grandchild = tracee_pid;
822			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
823				kill_save_errno(tracee_pid, SIGKILL);
824				kill_save_errno(pid, SIGKILL);
825				perror_msg_and_die("PTRACE_CONT doesn't work");
826			}
827			continue;
828		}
829		switch (WSTOPSIG(status)) {
830		case SIGSTOP:
831			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
832			    && errno != EINVAL && errno != EIO)
833				perror_msg("PTRACE_SETOPTIONS");
834			break;
835		case SIGTRAP:
836			if (status >> 16 == PTRACE_EVENT_FORK) {
837				long msg = 0;
838
839				if (ptrace(PTRACE_GETEVENTMSG, pid,
840					   NULL, (long) &msg) == 0)
841					expected_grandchild = msg;
842			}
843			break;
844		}
845		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
846			kill_save_errno(pid, SIGKILL);
847			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
848		}
849	}
850	if (expected_grandchild && expected_grandchild == found_grandchild) {
851		ptrace_setoptions |= test_options;
852		if (debug)
853			fprintf(stderr, "ptrace_setoptions = %#x\n",
854				ptrace_setoptions);
855		return;
856	}
857	error_msg("Test for PTRACE_O_TRACECLONE failed, "
858		  "giving up using this feature.");
859}
860
861/*
862 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
863 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
864 * and then see whether it will stop with (SIGTRAP | 0x80).
865 *
866 * Use of this option enables correct handling of user-generated SIGTRAPs,
867 * and SIGTRAPs generated by special instructions such as int3 on x86:
868 * _start:	.globl	_start
869 *		int3
870 *		movl	$42, %ebx
871 *		movl	$1, %eax
872 *		int	$0x80
873 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
874 */
875static void
876test_ptrace_setoptions_for_all(void)
877{
878	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
879					  PTRACE_O_TRACEEXEC;
880	int pid;
881	int it_worked = 0;
882
883	pid = fork();
884	if (pid < 0)
885		perror_msg_and_die("fork");
886
887	if (pid == 0) {
888		pid = getpid();
889		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
890			/* Note: exits with exitcode 1 */
891			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
892					   __func__);
893		kill(pid, SIGSTOP);
894		_exit(0); /* parent should see entry into this syscall */
895	}
896
897	while (1) {
898		int status, tracee_pid;
899
900		errno = 0;
901		tracee_pid = wait(&status);
902		if (tracee_pid <= 0) {
903			if (errno == EINTR)
904				continue;
905			kill_save_errno(pid, SIGKILL);
906			perror_msg_and_die("%s: unexpected wait result %d",
907					   __func__, tracee_pid);
908		}
909		if (WIFEXITED(status)) {
910			if (WEXITSTATUS(status) == 0)
911				break;
912			error_msg_and_die("%s: unexpected exit status %u",
913					  __func__, WEXITSTATUS(status));
914		}
915		if (WIFSIGNALED(status)) {
916			error_msg_and_die("%s: unexpected signal %u",
917					  __func__, WTERMSIG(status));
918		}
919		if (!WIFSTOPPED(status)) {
920			kill(pid, SIGKILL);
921			error_msg_and_die("%s: unexpected wait status %x",
922					  __func__, status);
923		}
924		if (WSTOPSIG(status) == SIGSTOP) {
925			/*
926			 * We don't check "options aren't accepted" error.
927			 * If it happens, we'll never get (SIGTRAP | 0x80),
928			 * and thus will decide to not use the option.
929			 * IOW: the outcome of the test will be correct.
930			 */
931			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
932			    && errno != EINVAL && errno != EIO)
933				perror_msg("PTRACE_SETOPTIONS");
934		}
935		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
936			it_worked = 1;
937		}
938		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
939			kill_save_errno(pid, SIGKILL);
940			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
941		}
942	}
943
944	if (it_worked) {
945		syscall_trap_sig = (SIGTRAP | 0x80);
946		ptrace_setoptions |= test_options;
947		if (debug)
948			fprintf(stderr, "ptrace_setoptions = %#x\n",
949				ptrace_setoptions);
950		return;
951	}
952
953	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
954		  "giving up using this feature.");
955}
956
957# ifdef USE_SEIZE
958static void
959test_ptrace_seize(void)
960{
961	int pid;
962
963	pid = fork();
964	if (pid < 0)
965		perror_msg_and_die("fork");
966
967	if (pid == 0) {
968		pause();
969		_exit(0);
970	}
971
972	/* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap.  After
973	 * attaching tracee continues to run unless a trap condition occurs.
974	 * PTRACE_SEIZE doesn't affect signal or group stop state.
975	 */
976	if (ptrace(PTRACE_SEIZE, pid, 0, PTRACE_SEIZE_DEVEL) == 0) {
977		post_attach_sigstop = 0; /* this sets use_seize to 1 */
978	} else if (debug) {
979		fprintf(stderr, "PTRACE_SEIZE doesn't work\n");
980	}
981
982	kill(pid, SIGKILL);
983
984	while (1) {
985		int status, tracee_pid;
986
987		errno = 0;
988		tracee_pid = waitpid(pid, &status, 0);
989		if (tracee_pid <= 0) {
990			if (errno == EINTR)
991				continue;
992			perror_msg_and_die("%s: unexpected wait result %d",
993					 __func__, tracee_pid);
994		}
995		if (WIFSIGNALED(status)) {
996			return;
997		}
998		error_msg_and_die("%s: unexpected wait status %x",
999				__func__, status);
1000	}
1001}
1002# else /* !USE_SEIZE */
1003#  define test_ptrace_seize() ((void)0)
1004# endif
1005
1006/* Noinline: don't want main to have struct utsname permanently on stack */
1007static void __attribute__ ((noinline))
1008get_os_release(void)
1009{
1010	struct utsname u;
1011	if (uname(&u) < 0)
1012		perror_msg_and_die("uname");
1013	os_release = strdup(u.release);
1014	if (!os_release)
1015		die_out_of_memory();
1016}
1017
1018int
1019main(int argc, char *argv[])
1020{
1021	struct tcb *tcp;
1022	int c, pid = 0;
1023	int optF = 0;
1024	struct sigaction sa;
1025
1026	progname = argv[0] ? argv[0] : "strace";
1027
1028	strace_tracer_pid = getpid();
1029
1030	get_os_release();
1031
1032	/* Allocate the initial tcbtab.  */
1033	tcbtabsize = argc;	/* Surely enough for all -p args.  */
1034	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
1035	if (!tcbtab)
1036		die_out_of_memory();
1037	tcp = calloc(tcbtabsize, sizeof(*tcp));
1038	if (!tcp)
1039		die_out_of_memory();
1040	for (c = 0; c < tcbtabsize; c++)
1041		tcbtab[c] = tcp++;
1042
1043	outf = stderr;
1044	set_sortby(DEFAULT_SORTBY);
1045	set_personality(DEFAULT_PERSONALITY);
1046	qualify("trace=all");
1047	qualify("abbrev=all");
1048	qualify("verbose=all");
1049	qualify("signal=all");
1050	while ((c = getopt(argc, argv,
1051		"+cCdfFhiqrtTvVxyz"
1052		"D"
1053		"a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
1054		switch (c) {
1055		case 'c':
1056			if (cflag == CFLAG_BOTH) {
1057				error_msg_and_die("-c and -C are mutually exclusive options");
1058			}
1059			cflag = CFLAG_ONLY_STATS;
1060			break;
1061		case 'C':
1062			if (cflag == CFLAG_ONLY_STATS) {
1063				error_msg_and_die("-c and -C are mutually exclusive options");
1064			}
1065			cflag = CFLAG_BOTH;
1066			break;
1067		case 'd':
1068			debug++;
1069			break;
1070		case 'D':
1071			daemonized_tracer = 1;
1072			break;
1073		case 'F':
1074			optF = 1;
1075			break;
1076		case 'f':
1077			followfork++;
1078			break;
1079		case 'h':
1080			usage(stdout, 0);
1081			break;
1082		case 'i':
1083			iflag++;
1084			break;
1085		case 'q':
1086			qflag++;
1087			break;
1088		case 'r':
1089			rflag++;
1090			tflag++;
1091			break;
1092		case 't':
1093			tflag++;
1094			break;
1095		case 'T':
1096			dtime++;
1097			break;
1098		case 'x':
1099			xflag++;
1100			break;
1101		case 'y':
1102			show_fd_path = 1;
1103			break;
1104		case 'v':
1105			qualify("abbrev=none");
1106			break;
1107		case 'V':
1108			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1109			exit(0);
1110			break;
1111		case 'z':
1112			not_failing_only = 1;
1113			break;
1114		case 'a':
1115			acolumn = atoi(optarg);
1116			if (acolumn < 0)
1117				error_msg_and_die("Bad column width '%s'", optarg);
1118			break;
1119		case 'e':
1120			qualify(optarg);
1121			break;
1122		case 'o':
1123			outfname = strdup(optarg);
1124			break;
1125		case 'O':
1126			set_overhead(atoi(optarg));
1127			break;
1128		case 'p':
1129			pid = atoi(optarg);
1130			if (pid <= 0) {
1131				error_msg("Invalid process id: '%s'", optarg);
1132				break;
1133			}
1134			if (pid == strace_tracer_pid) {
1135				error_msg("I'm sorry, I can't let you do that, Dave.");
1136				break;
1137			}
1138			tcp = alloc_tcb(pid, 0);
1139			tcp->flags |= TCB_ATTACHED;
1140			pflag_seen++;
1141			break;
1142		case 'P':
1143			tracing_paths = 1;
1144			if (pathtrace_select(optarg)) {
1145				error_msg_and_die("Failed to select path '%s'", optarg);
1146			}
1147			break;
1148		case 's':
1149			max_strlen = atoi(optarg);
1150			if (max_strlen < 0) {
1151				error_msg_and_die("Invalid -%c argument: '%s'", c, optarg);
1152			}
1153			break;
1154		case 'S':
1155			set_sortby(optarg);
1156			break;
1157		case 'u':
1158			username = strdup(optarg);
1159			break;
1160		case 'E':
1161			if (putenv(optarg) < 0)
1162				die_out_of_memory();
1163			break;
1164		case 'I':
1165			opt_intr = atoi(optarg);
1166			if (opt_intr <= 0 || opt_intr >= NUM_INTR_OPTS) {
1167				error_msg_and_die("Invalid -%c argument: '%s'", c, optarg);
1168			}
1169			break;
1170		default:
1171			usage(stderr, 1);
1172			break;
1173		}
1174	}
1175	argv += optind;
1176	/* argc -= optind; - no need, argc is not used below */
1177
1178	acolumn_spaces = malloc(acolumn + 1);
1179	if (!acolumn_spaces)
1180		die_out_of_memory();
1181	memset(acolumn_spaces, ' ', acolumn);
1182	acolumn_spaces[acolumn] = '\0';
1183
1184	/* Must have PROG [ARGS], or -p PID. Not both. */
1185	if (!argv[0] == !pflag_seen)
1186		usage(stderr, 1);
1187
1188	if (pflag_seen && daemonized_tracer) {
1189		error_msg_and_die("-D and -p are mutually exclusive options");
1190	}
1191
1192	if (!followfork)
1193		followfork = optF;
1194
1195	if (followfork > 1 && cflag) {
1196		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1197	}
1198
1199	/* See if they want to run as another user. */
1200	if (username != NULL) {
1201		struct passwd *pent;
1202
1203		if (getuid() != 0 || geteuid() != 0) {
1204			error_msg_and_die("You must be root to use the -u option");
1205		}
1206		pent = getpwnam(username);
1207		if (pent == NULL) {
1208			error_msg_and_die("Cannot find user '%s'", username);
1209		}
1210		run_uid = pent->pw_uid;
1211		run_gid = pent->pw_gid;
1212	}
1213	else {
1214		run_uid = getuid();
1215		run_gid = getgid();
1216	}
1217
1218	if (followfork)
1219		test_ptrace_setoptions_followfork();
1220	test_ptrace_setoptions_for_all();
1221	test_ptrace_seize();
1222
1223	/* Check if they want to redirect the output. */
1224	if (outfname) {
1225		/* See if they want to pipe the output. */
1226		if (outfname[0] == '|' || outfname[0] == '!') {
1227			/*
1228			 * We can't do the <outfname>.PID funny business
1229			 * when using popen, so prohibit it.
1230			 */
1231			if (followfork > 1)
1232				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1233			outf = strace_popen(outfname + 1);
1234		}
1235		else if (followfork <= 1)
1236			outf = strace_fopen(outfname);
1237	}
1238
1239	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1240		char *buf = malloc(BUFSIZ);
1241		if (!buf)
1242			die_out_of_memory();
1243		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1244	}
1245	if (outfname && argv[0]) {
1246		if (!opt_intr)
1247			opt_intr = INTR_NEVER;
1248		qflag = 1;
1249	}
1250	if (!opt_intr)
1251		opt_intr = INTR_WHILE_WAIT;
1252
1253	/* argv[0]	-pPID	-oFILE	Default interactive setting
1254	 * yes		0	0	INTR_WHILE_WAIT
1255	 * no		1	0	INTR_WHILE_WAIT
1256	 * yes		0	1	INTR_NEVER
1257	 * no		1	1	INTR_WHILE_WAIT
1258	 */
1259
1260	/* STARTUP_CHILD must be called before the signal handlers get
1261	   installed below as they are inherited into the spawned process.
1262	   Also we do not need to be protected by them as during interruption
1263	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1264	if (argv[0])
1265		startup_child(argv);
1266
1267	sigemptyset(&empty_set);
1268	sigemptyset(&blocked_set);
1269	sa.sa_handler = SIG_IGN;
1270	sigemptyset(&sa.sa_mask);
1271	sa.sa_flags = 0;
1272	sigaction(SIGTTOU, &sa, NULL); /* SIG_IGN */
1273	sigaction(SIGTTIN, &sa, NULL); /* SIG_IGN */
1274	if (opt_intr != INTR_ANYWHERE) {
1275		if (opt_intr == INTR_BLOCK_TSTP_TOO)
1276			sigaction(SIGTSTP, &sa, NULL); /* SIG_IGN */
1277		/*
1278		 * In interactive mode (if no -o OUTFILE, or -p PID is used),
1279		 * fatal signals are blocked while syscall stop is processed,
1280		 * and acted on in between, when waiting for new syscall stops.
1281		 * In non-interactive mode, signals are ignored.
1282		 */
1283		if (opt_intr == INTR_WHILE_WAIT) {
1284			sigaddset(&blocked_set, SIGHUP);
1285			sigaddset(&blocked_set, SIGINT);
1286			sigaddset(&blocked_set, SIGQUIT);
1287			sigaddset(&blocked_set, SIGPIPE);
1288			sigaddset(&blocked_set, SIGTERM);
1289			sa.sa_handler = interrupt;
1290		}
1291		/* SIG_IGN, or set handler for these */
1292		sigaction(SIGHUP, &sa, NULL);
1293		sigaction(SIGINT, &sa, NULL);
1294		sigaction(SIGQUIT, &sa, NULL);
1295		sigaction(SIGPIPE, &sa, NULL);
1296		sigaction(SIGTERM, &sa, NULL);
1297	}
1298	/* Make sure SIGCHLD has the default action so that waitpid
1299	   definitely works without losing track of children.  The user
1300	   should not have given us a bogus state to inherit, but he might
1301	   have.  Arguably we should detect SIG_IGN here and pass it on
1302	   to children, but probably noone really needs that.  */
1303	sa.sa_handler = SIG_DFL;
1304	sigaction(SIGCHLD, &sa, NULL);
1305
1306	if (pflag_seen || daemonized_tracer)
1307		startup_attach();
1308
1309	if (trace() < 0)
1310		exit(1);
1311
1312	cleanup();
1313	fflush(NULL);
1314	if (exit_code > 0xff) {
1315		/* Avoid potential core file clobbering.  */
1316		struct rlimit rlim = {0, 0};
1317		setrlimit(RLIMIT_CORE, &rlim);
1318
1319		/* Child was killed by a signal, mimic that.  */
1320		exit_code &= 0xff;
1321		signal(exit_code, SIG_DFL);
1322		raise(exit_code);
1323		/* Paranoia - what if this signal is not fatal?
1324		   Exit with 128 + signo then.  */
1325		exit_code += 128;
1326	}
1327	exit(exit_code);
1328}
1329
1330static void
1331expand_tcbtab(void)
1332{
1333	/* Allocate some more TCBs and expand the table.
1334	   We don't want to relocate the TCBs because our
1335	   callers have pointers and it would be a pain.
1336	   So tcbtab is a table of pointers.  Since we never
1337	   free the TCBs, we allocate a single chunk of many.  */
1338	int i = tcbtabsize;
1339	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1340	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1341	if (!newtab || !newtcbs)
1342		die_out_of_memory();
1343	tcbtabsize *= 2;
1344	tcbtab = newtab;
1345	while (i < tcbtabsize)
1346		tcbtab[i++] = newtcbs++;
1347}
1348
1349struct tcb *
1350alloc_tcb(int pid, int command_options_parsed)
1351{
1352	int i;
1353	struct tcb *tcp;
1354
1355	if (nprocs == tcbtabsize)
1356		expand_tcbtab();
1357
1358	for (i = 0; i < tcbtabsize; i++) {
1359		tcp = tcbtab[i];
1360		if ((tcp->flags & TCB_INUSE) == 0) {
1361			memset(tcp, 0, sizeof(*tcp));
1362			tcp->pid = pid;
1363			tcp->flags = TCB_INUSE;
1364			tcp->outf = outf; /* Initialise to current out file */
1365#if SUPPORTED_PERSONALITIES > 1
1366			tcp->currpers = current_personality;
1367#endif
1368			nprocs++;
1369			if (debug)
1370				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1371			if (command_options_parsed)
1372				newoutf(tcp);
1373			return tcp;
1374		}
1375	}
1376	error_msg_and_die("bug in alloc_tcb");
1377}
1378
1379static struct tcb *
1380pid2tcb(int pid)
1381{
1382	int i;
1383
1384	if (pid <= 0)
1385		return NULL;
1386
1387	for (i = 0; i < tcbtabsize; i++) {
1388		struct tcb *tcp = tcbtab[i];
1389		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1390			return tcp;
1391	}
1392
1393	return NULL;
1394}
1395
1396void
1397droptcb(struct tcb *tcp)
1398{
1399	if (tcp->pid == 0)
1400		return;
1401
1402	nprocs--;
1403	if (debug)
1404		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1405
1406	if (outfname && followfork > 1 && tcp->outf)
1407		fclose(tcp->outf);
1408
1409	memset(tcp, 0, sizeof(*tcp));
1410}
1411
1412/* detach traced process; continue with sig
1413   Never call DETACH twice on the same process as both unattached and
1414   attached-unstopped processes give the same ESRCH.  For unattached process we
1415   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1416
1417static int
1418detach(struct tcb *tcp)
1419{
1420	int error = 0;
1421	int status, catch_sigstop;
1422
1423	if (tcp->flags & TCB_BPTSET)
1424		clearbpt(tcp);
1425
1426	/*
1427	 * Linux wrongly insists the child be stopped
1428	 * before detaching.  Arghh.  We go through hoops
1429	 * to make a clean break of things.
1430	 */
1431#if defined(SPARC)
1432#undef PTRACE_DETACH
1433#define PTRACE_DETACH PTRACE_SUNDETACH
1434#endif
1435	/*
1436	 * We attached but possibly didn't see the expected SIGSTOP.
1437	 * We must catch exactly one as otherwise the detached process
1438	 * would be left stopped (process state T).
1439	 */
1440	catch_sigstop = (tcp->flags & TCB_IGNORE_ONE_SIGSTOP);
1441	error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, 0);
1442	if (error == 0) {
1443		/* On a clear day, you can see forever. */
1444	}
1445	else if (errno != ESRCH) {
1446		/* Shouldn't happen. */
1447		perror("detach: ptrace(PTRACE_DETACH, ...)");
1448	}
1449	else if (my_tkill(tcp->pid, 0) < 0) {
1450		if (errno != ESRCH)
1451			perror("detach: checking sanity");
1452	}
1453	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1454		if (errno != ESRCH)
1455			perror("detach: stopping child");
1456	}
1457	else
1458		catch_sigstop = 1;
1459	if (catch_sigstop) {
1460		for (;;) {
1461#ifdef __WALL
1462			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1463				if (errno == ECHILD) /* Already gone.  */
1464					break;
1465				if (errno != EINVAL) {
1466					perror("detach: waiting");
1467					break;
1468				}
1469#endif /* __WALL */
1470				/* No __WALL here.  */
1471				if (waitpid(tcp->pid, &status, 0) < 0) {
1472					if (errno != ECHILD) {
1473						perror("detach: waiting");
1474						break;
1475					}
1476#ifdef __WCLONE
1477					/* If no processes, try clones.  */
1478					if (wait4(tcp->pid, &status, __WCLONE,
1479						  NULL) < 0) {
1480						if (errno != ECHILD)
1481							perror("detach: waiting");
1482						break;
1483					}
1484#endif /* __WCLONE */
1485				}
1486#ifdef __WALL
1487			}
1488#endif
1489			if (!WIFSTOPPED(status)) {
1490				/* Au revoir, mon ami. */
1491				break;
1492			}
1493			if (WSTOPSIG(status) == SIGSTOP) {
1494				ptrace_restart(PTRACE_DETACH, tcp, 0);
1495				break;
1496			}
1497			error = ptrace_restart(PTRACE_CONT, tcp,
1498					WSTOPSIG(status) == syscall_trap_sig ? 0
1499					: WSTOPSIG(status));
1500			if (error < 0)
1501				break;
1502		}
1503	}
1504
1505	if (!qflag)
1506		fprintf(stderr, "Process %u detached\n", tcp->pid);
1507
1508	droptcb(tcp);
1509
1510	return error;
1511}
1512
1513static void
1514cleanup(void)
1515{
1516	int i;
1517	struct tcb *tcp;
1518	int fatal_sig;
1519
1520	/* 'interrupted' is a volatile object, fetch it only once */
1521	fatal_sig = interrupted;
1522	if (!fatal_sig)
1523		fatal_sig = SIGTERM;
1524
1525	for (i = 0; i < tcbtabsize; i++) {
1526		tcp = tcbtab[i];
1527		if (!(tcp->flags & TCB_INUSE))
1528			continue;
1529		if (debug)
1530			fprintf(stderr,
1531				"cleanup: looking at pid %u\n", tcp->pid);
1532		if (printing_tcp &&
1533		    (!outfname || followfork < 2 || printing_tcp == tcp)) {
1534			tprints(" <unfinished ...>\n");
1535			printing_tcp = NULL;
1536		}
1537		if (tcp->flags & TCB_ATTACHED)
1538			detach(tcp);
1539		else {
1540			kill(tcp->pid, SIGCONT);
1541			kill(tcp->pid, fatal_sig);
1542		}
1543	}
1544	if (cflag)
1545		call_summary(outf);
1546}
1547
1548static void
1549interrupt(int sig)
1550{
1551	interrupted = sig;
1552}
1553
1554#ifndef HAVE_STRERROR
1555
1556#if !HAVE_DECL_SYS_ERRLIST
1557extern int sys_nerr;
1558extern char *sys_errlist[];
1559#endif /* HAVE_DECL_SYS_ERRLIST */
1560
1561const char *
1562strerror(int err_no)
1563{
1564	static char buf[64];
1565
1566	if (err_no < 1 || err_no >= sys_nerr) {
1567		sprintf(buf, "Unknown error %d", err_no);
1568		return buf;
1569	}
1570	return sys_errlist[err_no];
1571}
1572
1573#endif /* HAVE_STERRROR */
1574
1575#ifndef HAVE_STRSIGNAL
1576
1577#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
1578extern char *sys_siglist[];
1579#endif
1580#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
1581extern char *_sys_siglist[];
1582#endif
1583
1584const char *
1585strsignal(int sig)
1586{
1587	static char buf[64];
1588
1589	if (sig < 1 || sig >= NSIG) {
1590		sprintf(buf, "Unknown signal %d", sig);
1591		return buf;
1592	}
1593#ifdef HAVE__SYS_SIGLIST
1594	return _sys_siglist[sig];
1595#else
1596	return sys_siglist[sig];
1597#endif
1598}
1599
1600#endif /* HAVE_STRSIGNAL */
1601
1602static int
1603trace(void)
1604{
1605	struct rusage ru;
1606	struct rusage *rup = cflag ? &ru : NULL;
1607# ifdef __WALL
1608	static int wait4_options = __WALL;
1609# endif
1610
1611	while (nprocs != 0) {
1612		int pid;
1613		int wait_errno;
1614		int status, sig;
1615		int stopped;
1616		struct tcb *tcp;
1617		unsigned event;
1618
1619		if (interrupted)
1620			return 0;
1621		if (interactive)
1622			sigprocmask(SIG_SETMASK, &empty_set, NULL);
1623# ifdef __WALL
1624		pid = wait4(-1, &status, wait4_options, rup);
1625		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
1626			/* this kernel does not support __WALL */
1627			wait4_options &= ~__WALL;
1628			pid = wait4(-1, &status, wait4_options, rup);
1629		}
1630		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
1631			/* most likely a "cloned" process */
1632			pid = wait4(-1, &status, __WCLONE, rup);
1633			if (pid < 0) {
1634				perror_msg("wait4(__WCLONE) failed");
1635			}
1636		}
1637# else
1638		pid = wait4(-1, &status, 0, rup);
1639# endif /* __WALL */
1640		wait_errno = errno;
1641		if (interactive)
1642			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1643
1644		if (pid < 0) {
1645			switch (wait_errno) {
1646			case EINTR:
1647				continue;
1648			case ECHILD:
1649				/*
1650				 * We would like to verify this case
1651				 * but sometimes a race in Solbourne's
1652				 * version of SunOS sometimes reports
1653				 * ECHILD before sending us SIGCHILD.
1654				 */
1655				return 0;
1656			default:
1657				errno = wait_errno;
1658				perror("strace: wait");
1659				return -1;
1660			}
1661		}
1662		if (pid == popen_pid) {
1663			if (WIFEXITED(status) || WIFSIGNALED(status))
1664				popen_pid = 0;
1665			continue;
1666		}
1667
1668		event = ((unsigned)status >> 16);
1669		if (debug) {
1670			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
1671			if (event != 0) {
1672				static const char *const event_names[] = {
1673					[PTRACE_EVENT_CLONE] = "CLONE",
1674					[PTRACE_EVENT_FORK]  = "FORK",
1675					[PTRACE_EVENT_VFORK] = "VFORK",
1676					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
1677					[PTRACE_EVENT_EXEC]  = "EXEC",
1678					[PTRACE_EVENT_EXIT]  = "EXIT",
1679				};
1680				const char *e;
1681				if (event < ARRAY_SIZE(event_names))
1682					e = event_names[event];
1683				else {
1684					sprintf(buf, "?? (%u)", event);
1685					e = buf;
1686				}
1687				fprintf(stderr, " PTRACE_EVENT_%s", e);
1688			}
1689			strcpy(buf, "???");
1690			if (WIFSIGNALED(status))
1691#ifdef WCOREDUMP
1692				sprintf(buf, "WIFSIGNALED,%ssig=%s",
1693						WCOREDUMP(status) ? "core," : "",
1694						signame(WTERMSIG(status)));
1695#else
1696				sprintf(buf, "WIFSIGNALED,sig=%s",
1697						signame(WTERMSIG(status)));
1698#endif
1699			if (WIFEXITED(status))
1700				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
1701			if (WIFSTOPPED(status))
1702				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
1703#ifdef WIFCONTINUED
1704			if (WIFCONTINUED(status))
1705				strcpy(buf, "WIFCONTINUED");
1706#endif
1707			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
1708		}
1709
1710		/* Look up 'pid' in our table. */
1711		tcp = pid2tcb(pid);
1712
1713		/* Under Linux, execve changes pid to thread leader's pid,
1714		 * and we see this changed pid on EVENT_EXEC and later,
1715		 * execve sysexit. Leader "disappears" without exit
1716		 * notification. Let user know that, drop leader's tcb,
1717		 * and fix up pid in execve thread's tcb.
1718		 * Effectively, execve thread's tcb replaces leader's tcb.
1719		 *
1720		 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
1721		 * on exit syscall) in multithreaded programs exactly
1722		 * in order to handle this case.
1723		 *
1724		 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
1725		 * On 2.6 and earlier, it can return garbage.
1726		 */
1727		if (event == PTRACE_EVENT_EXEC && os_release[0] >= '3') {
1728			long old_pid = 0;
1729			if (ptrace(PTRACE_GETEVENTMSG, pid, NULL, (long) &old_pid) >= 0
1730			 && old_pid > 0
1731			 && old_pid != pid
1732			) {
1733				struct tcb *execve_thread = pid2tcb(old_pid);
1734				if (tcp) {
1735					outf = tcp->outf;
1736					curcol = tcp->curcol;
1737					if (!cflag) {
1738						if (printing_tcp)
1739							tprints(" <unfinished ...>\n");
1740						printleader(tcp);
1741						tprintf("+++ superseded by execve in pid %lu +++\n", old_pid);
1742						printing_tcp = NULL;
1743						fflush(outf);
1744					}
1745					if (execve_thread) {
1746						/* swap output FILEs (needed for -ff) */
1747						tcp->outf = execve_thread->outf;
1748						execve_thread->outf = outf;
1749					}
1750					droptcb(tcp);
1751				}
1752				tcp = execve_thread;
1753				if (tcp) {
1754					tcp->pid = pid;
1755					tcp->flags |= TCB_REPRINT;
1756				}
1757			}
1758		}
1759
1760		if (tcp == NULL) {
1761			if (followfork) {
1762				/* This is needed to go with the CLONE_PTRACE
1763				   changes in process.c/util.c: we might see
1764				   the child's initial trap before we see the
1765				   parent return from the clone syscall.
1766				   Leave the child suspended until the parent
1767				   returns from its system call.  Only then
1768				   will we have the association of parent and
1769				   child so that we know how to do clearbpt
1770				   in the child.  */
1771				tcp = alloctcb(pid);
1772				tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
1773				if (!qflag)
1774					fprintf(stderr, "Process %d attached\n",
1775						pid);
1776			}
1777			else
1778				/* This can happen if a clone call used
1779				   CLONE_PTRACE itself.  */
1780			{
1781				if (WIFSTOPPED(status))
1782					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
1783				error_msg_and_die("Unknown pid: %u", pid);
1784			}
1785		}
1786		/* set current output file */
1787		outf = tcp->outf;
1788		curcol = tcp->curcol;
1789		if (cflag) {
1790			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
1791			tcp->stime = ru.ru_stime;
1792		}
1793
1794		if (WIFSIGNALED(status)) {
1795			if (pid == strace_child)
1796				exit_code = 0x100 | WTERMSIG(status);
1797			if (cflag != CFLAG_ONLY_STATS
1798			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
1799				printleader(tcp);
1800#ifdef WCOREDUMP
1801				tprintf("+++ killed by %s %s+++\n",
1802					signame(WTERMSIG(status)),
1803					WCOREDUMP(status) ? "(core dumped) " : "");
1804#else
1805				tprintf("+++ killed by %s +++\n",
1806					signame(WTERMSIG(status)));
1807#endif
1808				printing_tcp = NULL;
1809			}
1810			fflush(tcp->outf);
1811			droptcb(tcp);
1812			continue;
1813		}
1814		if (WIFEXITED(status)) {
1815			if (pid == strace_child)
1816				exit_code = WEXITSTATUS(status);
1817			if (tcp == printing_tcp) {
1818				tprints(" <unfinished ...>\n");
1819				printing_tcp = NULL;
1820			}
1821			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
1822				printleader(tcp);
1823				tprintf("+++ exited with %d +++\n", WEXITSTATUS(status));
1824				printing_tcp = NULL;
1825			}
1826			fflush(tcp->outf);
1827			droptcb(tcp);
1828			continue;
1829		}
1830		if (!WIFSTOPPED(status)) {
1831			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
1832			droptcb(tcp);
1833			continue;
1834		}
1835
1836		/* Is this the very first time we see this tracee stopped? */
1837		if (tcp->flags & TCB_STARTUP) {
1838			if (debug)
1839				fprintf(stderr, "pid %d has TCB_STARTUP, initializing it\n", tcp->pid);
1840			tcp->flags &= ~TCB_STARTUP;
1841			if (tcp->flags & TCB_BPTSET) {
1842				/*
1843				 * One example is a breakpoint inherited from
1844				 * parent through fork().
1845				 */
1846				if (clearbpt(tcp) < 0) {
1847					/* Pretty fatal */
1848					droptcb(tcp);
1849					cleanup();
1850					return -1;
1851				}
1852			}
1853			if (ptrace_setoptions) {
1854				if (debug)
1855					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
1856				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
1857					if (errno != ESRCH) {
1858						/* Should never happen, really */
1859						perror_msg_and_die("PTRACE_SETOPTIONS");
1860					}
1861				}
1862			}
1863		}
1864
1865		sig = WSTOPSIG(status);
1866
1867		if (event != 0) {
1868			/* Ptrace event */
1869#ifdef USE_SEIZE
1870			if (event == PTRACE_EVENT_STOP || event == PTRACE_EVENT_STOP1) {
1871				/*
1872				 * PTRACE_INTERRUPT-stop or group-stop.
1873				 * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
1874				 */
1875				if (sig == SIGSTOP
1876				 || sig == SIGTSTP
1877				 || sig == SIGTTIN
1878				 || sig == SIGTTOU
1879				) {
1880					stopped = 1;
1881					goto show_stopsig;
1882				}
1883			}
1884#endif
1885			goto restart_tracee_with_sig_0;
1886		}
1887
1888		/* Is this post-attach SIGSTOP?
1889		 * Interestingly, the process may stop
1890		 * with STOPSIG equal to some other signal
1891		 * than SIGSTOP if we happend to attach
1892		 * just before the process takes a signal.
1893		 */
1894		if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
1895			if (debug)
1896				fprintf(stderr, "ignored SIGSTOP on pid %d\n", tcp->pid);
1897			tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
1898			goto restart_tracee_with_sig_0;
1899		}
1900
1901		if (sig != syscall_trap_sig) {
1902			siginfo_t si;
1903
1904			/* Nonzero (true) if tracee is stopped by signal
1905			 * (as opposed to "tracee received signal").
1906			 */
1907			stopped = (ptrace(PTRACE_GETSIGINFO, pid, 0, (long) &si) < 0);
1908#ifdef USE_SEIZE
1909 show_stopsig:
1910#endif
1911			if (cflag != CFLAG_ONLY_STATS
1912			    && (qual_flags[sig] & QUAL_SIGNAL)) {
1913#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
1914				long pc = 0;
1915				long psr = 0;
1916
1917				upeek(tcp, PT_CR_IPSR, &psr);
1918				upeek(tcp, PT_CR_IIP, &pc);
1919
1920# define PSR_RI	41
1921				pc += (psr >> PSR_RI) & 0x3;
1922# define PC_FORMAT_STR	" @ %lx"
1923# define PC_FORMAT_ARG	, pc
1924#else
1925# define PC_FORMAT_STR	""
1926# define PC_FORMAT_ARG	/* nothing */
1927#endif
1928				printleader(tcp);
1929				if (!stopped) {
1930					tprints("--- ");
1931					printsiginfo(&si, verbose(tcp));
1932					tprintf(" (%s)" PC_FORMAT_STR " ---\n",
1933						strsignal(sig)
1934						PC_FORMAT_ARG);
1935				} else
1936					tprintf("--- %s by %s" PC_FORMAT_STR " ---\n",
1937						strsignal(sig),
1938						signame(sig)
1939						PC_FORMAT_ARG);
1940				printing_tcp = NULL;
1941				fflush(tcp->outf);
1942			}
1943
1944			if (!stopped)
1945				/* It's signal-delivery-stop. Inject the signal */
1946				goto restart_tracee;
1947
1948			/* It's group-stop */
1949#ifdef USE_SEIZE
1950			if (use_seize) {
1951				/*
1952				 * This ends ptrace-stop, but does *not* end group-stop.
1953				 * This makes stopping signals work properly on straced process
1954				 * (that is, process really stops. It used to continue to run).
1955				 */
1956				if (ptrace_restart(PTRACE_LISTEN, tcp, 0) < 0) {
1957					cleanup();
1958					return -1;
1959				}
1960				continue;
1961			}
1962			/* We don't have PTRACE_LISTEN support... */
1963#endif
1964			goto restart_tracee;
1965		}
1966
1967		/* We handled quick cases, we are permitted to interrupt now. */
1968		if (interrupted)
1969			return 0;
1970
1971		/* This should be syscall entry or exit.
1972		 * (Or it still can be that pesky post-execve SIGTRAP!)
1973		 * Handle it.
1974		 */
1975		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
1976			/* ptrace() failed in trace_syscall() with ESRCH.
1977			 * Likely a result of process disappearing mid-flight.
1978			 * Observed case: exit_group() terminating
1979			 * all processes in thread group.
1980			 */
1981			if (tcp->flags & TCB_ATTACHED) {
1982				if (printing_tcp) {
1983					/* Do we have dangling line "syscall(param, param"?
1984					 * Finish the line then.
1985					 */
1986					printing_tcp->flags |= TCB_REPRINT;
1987					tprints(" <unfinished ...>\n");
1988					printing_tcp = NULL;
1989					fflush(tcp->outf);
1990				}
1991				/* We assume that ptrace error was caused by process death.
1992				 * We used to detach(tcp) here, but since we no longer
1993				 * implement "detach before death" policy/hack,
1994				 * we can let this process to report its death to us
1995				 * normally, via WIFEXITED or WIFSIGNALED wait status.
1996				 */
1997			} else {
1998				/* It's our real child (and we also trace it) */
1999				/* my_tkill(pid, SIGKILL); - why? */
2000				/* droptcb(tcp); - why? */
2001			}
2002			continue;
2003		}
2004 restart_tracee_with_sig_0:
2005		sig = 0;
2006 restart_tracee:
2007		/* Remember current print column before continuing. */
2008		tcp->curcol = curcol;
2009		if (ptrace_restart(PTRACE_SYSCALL, tcp, sig) < 0) {
2010			cleanup();
2011			return -1;
2012		}
2013	}
2014	return 0;
2015}
2016
2017void
2018tprintf(const char *fmt, ...)
2019{
2020	va_list args;
2021
2022	va_start(args, fmt);
2023	if (outf) {
2024		int n = vfprintf(outf, fmt, args);
2025		if (n < 0) {
2026			if (outf != stderr)
2027				perror(outfname == NULL
2028				       ? "<writing to pipe>" : outfname);
2029		} else
2030			curcol += n;
2031	}
2032	va_end(args);
2033}
2034
2035void
2036tprints(const char *str)
2037{
2038	if (outf) {
2039		int n = fputs(str, outf);
2040		if (n >= 0) {
2041			curcol += strlen(str);
2042			return;
2043		}
2044		if (outf != stderr)
2045			perror(outfname == NULL
2046			       ? "<writing to pipe>" : outfname);
2047	}
2048}
2049
2050void
2051printleader(struct tcb *tcp)
2052{
2053	if (printing_tcp) {
2054		if (printing_tcp->ptrace_errno) {
2055			if (printing_tcp->flags & TCB_INSYSCALL) {
2056				tprints(" <unavailable>) ");
2057				tabto();
2058			}
2059			tprints("= ? <unavailable>\n");
2060			printing_tcp->ptrace_errno = 0;
2061		} else if (!outfname || followfork < 2 || printing_tcp == tcp) {
2062			printing_tcp->flags |= TCB_REPRINT;
2063			tprints(" <unfinished ...>\n");
2064		}
2065	}
2066
2067	printing_tcp = tcp;
2068	curcol = 0;
2069	if ((followfork == 1 || pflag_seen > 1) && outfname)
2070		tprintf("%-5d ", tcp->pid);
2071	else if (nprocs > 1 && !outfname)
2072		tprintf("[pid %5u] ", tcp->pid);
2073	if (tflag) {
2074		char str[sizeof("HH:MM:SS")];
2075		struct timeval tv, dtv;
2076		static struct timeval otv;
2077
2078		gettimeofday(&tv, NULL);
2079		if (rflag) {
2080			if (otv.tv_sec == 0)
2081				otv = tv;
2082			tv_sub(&dtv, &tv, &otv);
2083			tprintf("%6ld.%06ld ",
2084				(long) dtv.tv_sec, (long) dtv.tv_usec);
2085			otv = tv;
2086		}
2087		else if (tflag > 2) {
2088			tprintf("%ld.%06ld ",
2089				(long) tv.tv_sec, (long) tv.tv_usec);
2090		}
2091		else {
2092			time_t local = tv.tv_sec;
2093			strftime(str, sizeof(str), "%T", localtime(&local));
2094			if (tflag > 1)
2095				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
2096			else
2097				tprintf("%s ", str);
2098		}
2099	}
2100	if (iflag)
2101		printcall(tcp);
2102}
2103
2104void
2105tabto(void)
2106{
2107	if (curcol < acolumn)
2108		tprints(acolumn_spaces + curcol);
2109}
2110