strace.c revision 9a36ae5e886794865623b0d3d4f0d10bf541f32d
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <limits.h>
48#include <dirent.h>
49
50#ifdef LINUX
51# include <asm/unistd.h>
52# if defined __NR_tkill
53#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
54# else
55   /* kill() may choose arbitrarily the target task of the process group
56      while we later wait on a that specific TID.  PID process waits become
57      TID task specific waits for a process under ptrace(2).  */
58#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
59#  define my_tkill(tid, sig) kill((tid), (sig))
60# endif
61#endif
62
63#if defined(IA64) && defined(LINUX)
64# include <asm/ptrace_offsets.h>
65#endif
66
67#ifdef USE_PROCFS
68#include <poll.h>
69#endif
70
71#ifdef SVR4
72#include <sys/stropts.h>
73#ifdef HAVE_MP_PROCFS
74#ifdef HAVE_SYS_UIO_H
75#include <sys/uio.h>
76#endif
77#endif
78#endif
79extern char **environ;
80extern int optind;
81extern char *optarg;
82
83
84int debug = 0, followfork = 0;
85unsigned int ptrace_setoptions = 0;
86/* Which WSTOPSIG(status) value marks syscall traps? */
87static unsigned int syscall_trap_sig = SIGTRAP;
88int dtime = 0, xflag = 0, qflag = 0;
89cflag_t cflag = CFLAG_NONE;
90static int iflag = 0, interactive = 0, pflag_seen = 0, rflag = 0, tflag = 0;
91/*
92 * daemonized_tracer supports -D option.
93 * With this option, strace forks twice.
94 * Unlike normal case, with -D *grandparent* process exec's,
95 * becoming a traced process. Child exits (this prevents traced process
96 * from having children it doesn't expect to have), and grandchild
97 * attaches to grandparent similarly to strace -p PID.
98 * This allows for more transparent interaction in cases
99 * when process and its parent are communicating via signals,
100 * wait() etc. Without -D, strace process gets lodged in between,
101 * disrupting parent<->child link.
102 */
103static bool daemonized_tracer = 0;
104
105/* Sometimes we want to print only succeeding syscalls. */
106int not_failing_only = 0;
107
108/* Show path associated with fd arguments */
109int show_fd_path = 0;
110
111/* are we filtering traces based on paths? */
112int tracing_paths = 0;
113
114static int exit_code = 0;
115static int strace_child = 0;
116static int strace_tracer_pid = 0;
117
118static char *username = NULL;
119static uid_t run_uid;
120static gid_t run_gid;
121
122int acolumn = DEFAULT_ACOLUMN;
123int max_strlen = DEFAULT_STRLEN;
124static char *outfname = NULL;
125static FILE *outf;
126static int curcol;
127static struct tcb **tcbtab;
128static unsigned int nprocs, tcbtabsize;
129static const char *progname;
130
131static int detach(struct tcb *tcp, int sig);
132static int trace(void);
133static void cleanup(void);
134static void interrupt(int sig);
135static sigset_t empty_set, blocked_set;
136
137#ifdef HAVE_SIG_ATOMIC_T
138static volatile sig_atomic_t interrupted;
139#else /* !HAVE_SIG_ATOMIC_T */
140static volatile int interrupted;
141#endif /* !HAVE_SIG_ATOMIC_T */
142
143#ifdef USE_PROCFS
144
145static struct tcb *pfd2tcb(int pfd);
146static void reaper(int sig);
147static void rebuild_pollv(void);
148static struct pollfd *pollv;
149
150#ifndef HAVE_POLLABLE_PROCFS
151
152static void proc_poll_open(void);
153static void proc_poller(int pfd);
154
155struct proc_pollfd {
156	int fd;
157	int revents;
158	int pid;
159};
160
161static int poller_pid;
162static int proc_poll_pipe[2] = { -1, -1 };
163
164#endif /* !HAVE_POLLABLE_PROCFS */
165
166#ifdef HAVE_MP_PROCFS
167#define POLLWANT	POLLWRNORM
168#else
169#define POLLWANT	POLLPRI
170#endif
171#endif /* USE_PROCFS */
172
173static void
174usage(FILE *ofp, int exitval)
175{
176	fprintf(ofp, "\
177usage: strace [-CdDffhiqrtttTvVxxy] [-a column] [-e expr] ... [-o file]\n\
178              [-p pid] ... [-s strsize] [-u username] [-E var=val] ...\n\
179              [-P path] [command [arg ...]]\n\
180   or: strace -c [-D] [-e expr] ... [-O overhead] [-S sortby] [-E var=val] ...\n\
181              [command [arg ...]]\n\
182-c -- count time, calls, and errors for each syscall and report summary\n\
183-C -- like -c but also print regular output while processes are running\n\
184-f -- follow forks, -ff -- with output into separate files\n\
185-F -- attempt to follow vforks, -h -- print help message\n\
186-i -- print instruction pointer at time of syscall\n\
187-q -- suppress messages about attaching, detaching, etc.\n\
188-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
189-T -- print time spent in each syscall, -V -- print version\n\
190-v -- verbose mode: print unabbreviated argv, stat, termio[s], etc. args\n\
191-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
192-y -- print paths associated with file descriptor arguments\n\
193-a column -- alignment COLUMN for printing syscall results (default %d)\n\
194-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
195   options: trace, abbrev, verbose, raw, signal, read, or write\n\
196-o file -- send trace output to FILE instead of stderr\n\
197-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
198-p pid -- trace process with process id PID, may be repeated\n\
199-D -- run tracer process as a detached grandchild, not as parent\n\
200-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
201-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
202-u username -- run command as username handling setuid and/or setgid\n\
203-E var=val -- put var=val in the environment for command\n\
204-E var -- remove var from the environment for command\n\
205-P path -- trace accesses to path\n\
206" /* this is broken, so don't document it
207-z -- print only succeeding syscalls\n\
208  */
209, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
210	exit(exitval);
211}
212
213static void die(void) __attribute__ ((noreturn));
214static void die(void)
215{
216	if (strace_tracer_pid == getpid()) {
217		cflag = 0;
218		cleanup();
219	}
220	exit(1);
221}
222
223static void verror_msg(int err_no, const char *fmt, va_list p)
224{
225	fflush(NULL);
226	fprintf(stderr, "%s: ", progname);
227	vfprintf(stderr, fmt, p);
228	if (err_no)
229		fprintf(stderr, ": %s\n", strerror(err_no));
230	else
231		putc('\n', stderr);
232	fflush(stderr);
233}
234
235void error_msg(const char *fmt, ...)
236{
237	va_list p;
238	va_start(p, fmt);
239	verror_msg(0, fmt, p);
240	va_end(p);
241}
242
243void error_msg_and_die(const char *fmt, ...)
244{
245	va_list p;
246	va_start(p, fmt);
247	verror_msg(0, fmt, p);
248	die();
249}
250
251void perror_msg(const char *fmt, ...)
252{
253	va_list p;
254	va_start(p, fmt);
255	verror_msg(errno, fmt, p);
256	va_end(p);
257}
258
259void perror_msg_and_die(const char *fmt, ...)
260{
261	va_list p;
262	va_start(p, fmt);
263	verror_msg(errno, fmt, p);
264	die();
265}
266
267#ifdef SVR4
268#ifdef MIPS
269void
270foobar()
271{
272}
273#endif /* MIPS */
274#endif /* SVR4 */
275
276/* Glue for systems without a MMU that cannot provide fork() */
277#ifdef HAVE_FORK
278# define strace_vforked 0
279#else
280# define strace_vforked 1
281# define fork()         vfork()
282#endif
283
284static void
285set_cloexec_flag(int fd)
286{
287	int flags, newflags;
288
289	flags = fcntl(fd, F_GETFD);
290	if (flags < 0) {
291		/* Can happen only if fd is bad.
292		 * Should never happen: if it does, we have a bug
293		 * in the caller. Therefore we just abort
294		 * instead of propagating the error.
295		 */
296		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
297	}
298
299	newflags = flags | FD_CLOEXEC;
300	if (flags == newflags)
301		return;
302
303	fcntl(fd, F_SETFD, newflags); /* never fails */
304}
305
306/*
307 * When strace is setuid executable, we have to swap uids
308 * before and after filesystem and process management operations.
309 */
310static void
311swap_uid(void)
312{
313#ifndef SVR4
314	int euid = geteuid(), uid = getuid();
315
316	if (euid != uid && setreuid(euid, uid) < 0) {
317		perror_msg_and_die("setreuid");
318	}
319#endif
320}
321
322#if _LFS64_LARGEFILE
323# define fopen_for_output fopen64
324#else
325# define fopen_for_output fopen
326#endif
327
328static FILE *
329strace_fopen(const char *path)
330{
331	FILE *fp;
332
333	swap_uid();
334	fp = fopen_for_output(path, "w");
335	if (!fp)
336		perror_msg_and_die("Can't fopen '%s'", path);
337	swap_uid();
338	set_cloexec_flag(fileno(fp));
339	return fp;
340}
341
342static int popen_pid = 0;
343
344#ifndef _PATH_BSHELL
345# define _PATH_BSHELL "/bin/sh"
346#endif
347
348/*
349 * We cannot use standard popen(3) here because we have to distinguish
350 * popen child process from other processes we trace, and standard popen(3)
351 * does not export its child's pid.
352 */
353static FILE *
354strace_popen(const char *command)
355{
356	FILE *fp;
357	int fds[2];
358
359	swap_uid();
360	if (pipe(fds) < 0)
361		perror_msg_and_die("pipe");
362
363	set_cloexec_flag(fds[1]); /* never fails */
364
365	popen_pid = vfork();
366	if (popen_pid == -1)
367		perror_msg_and_die("vfork");
368
369	if (popen_pid == 0) {
370		/* child */
371		close(fds[1]);
372		if (fds[0] != 0) {
373			if (dup2(fds[0], 0))
374				perror_msg_and_die("dup2");
375			close(fds[0]);
376		}
377		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
378		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
379	}
380
381	/* parent */
382	close(fds[0]);
383	swap_uid();
384	fp = fdopen(fds[1], "w");
385	if (!fp)
386		error_msg_and_die("Out of memory");
387	return fp;
388}
389
390static void
391newoutf(struct tcb *tcp)
392{
393	if (outfname && followfork > 1) {
394		char name[520 + sizeof(int) * 3];
395		sprintf(name, "%.512s.%u", outfname, tcp->pid);
396		tcp->outf = strace_fopen(name);
397	}
398}
399
400static void
401startup_attach(void)
402{
403	int tcbi;
404	struct tcb *tcp;
405
406	/*
407	 * Block user interruptions as we would leave the traced
408	 * process stopped (process state T) if we would terminate in
409	 * between PTRACE_ATTACH and wait4 () on SIGSTOP.
410	 * We rely on cleanup() from this point on.
411	 */
412	if (interactive)
413		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
414
415	if (daemonized_tracer) {
416		pid_t pid = fork();
417		if (pid < 0) {
418			_exit(1);
419		}
420		if (pid) { /* parent */
421			/*
422			 * Wait for grandchild to attach to straced process
423			 * (grandparent). Grandchild SIGKILLs us after it attached.
424			 * Grandparent's wait() is unblocked by our death,
425			 * it proceeds to exec the straced program.
426			 */
427			pause();
428			_exit(0); /* paranoia */
429		}
430		/* grandchild */
431		/* We will be the tracer process. Remember our new pid: */
432		strace_tracer_pid = getpid();
433	}
434
435	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
436		tcp = tcbtab[tcbi];
437
438		if (!(tcp->flags & TCB_INUSE) || !(tcp->flags & TCB_ATTACHED))
439			continue;
440#ifdef LINUX
441		if (tcp->flags & TCB_ATTACH_DONE)
442			continue;
443#endif
444		/* Reinitialize the output since it may have changed. */
445		tcp->outf = outf;
446		newoutf(tcp);
447
448#ifdef USE_PROCFS
449		if (proc_open(tcp, 1) < 0) {
450			fprintf(stderr, "trouble opening proc file\n");
451			droptcb(tcp);
452			continue;
453		}
454#else /* !USE_PROCFS */
455# ifdef LINUX
456		if (followfork && !daemonized_tracer) {
457			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
458			DIR *dir;
459
460			sprintf(procdir, "/proc/%d/task", tcp->pid);
461			dir = opendir(procdir);
462			if (dir != NULL) {
463				unsigned int ntid = 0, nerr = 0;
464				struct dirent *de;
465				int tid;
466				while ((de = readdir(dir)) != NULL) {
467					if (de->d_fileno == 0)
468						continue;
469					tid = atoi(de->d_name);
470					if (tid <= 0)
471						continue;
472					++ntid;
473					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0) {
474						++nerr;
475						if (debug)
476							fprintf(stderr, "attach to pid %d failed\n", tid);
477					}
478					else {
479						if (debug)
480							fprintf(stderr, "attach to pid %d succeeded\n", tid);
481						if (tid != tcp->pid) {
482							struct tcb *new_tcp = alloctcb(tid);
483							new_tcp->flags |= TCB_ATTACHED|TCB_ATTACH_DONE;
484						}
485					}
486					if (interactive) {
487						sigprocmask(SIG_SETMASK, &empty_set, NULL);
488						if (interrupted)
489							goto ret;
490						sigprocmask(SIG_BLOCK, &blocked_set, NULL);
491					}
492				}
493				closedir(dir);
494				ntid -= nerr;
495				if (ntid == 0) {
496					perror("attach: ptrace(PTRACE_ATTACH, ...)");
497					droptcb(tcp);
498					continue;
499				}
500				if (!qflag) {
501					fprintf(stderr, ntid > 1
502? "Process %u attached with %u threads - interrupt to quit\n"
503: "Process %u attached - interrupt to quit\n",
504						tcp->pid, ntid);
505				}
506				continue;
507			} /* if (opendir worked) */
508		} /* if (-f) */
509# endif /* LINUX */
510		if (ptrace(PTRACE_ATTACH, tcp->pid, (char *) 1, 0) < 0) {
511			perror("attach: ptrace(PTRACE_ATTACH, ...)");
512			droptcb(tcp);
513			continue;
514		}
515		if (debug)
516			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
517
518		if (daemonized_tracer) {
519			/*
520			 * It is our grandparent we trace, not a -p PID.
521			 * Don't want to just detach on exit, so...
522			 */
523			tcp->flags &= ~TCB_ATTACHED;
524			/*
525			 * Make parent go away.
526			 * Also makes grandparent's wait() unblock.
527			 */
528			kill(getppid(), SIGKILL);
529		}
530
531#endif /* !USE_PROCFS */
532		if (!qflag)
533			fprintf(stderr,
534				"Process %u attached - interrupt to quit\n",
535				tcp->pid);
536	} /* for each tcbtab[] */
537
538 ret:
539#ifdef LINUX
540	/* TCB_ATTACH_DONE flag is used only in this function */
541	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
542		tcp = tcbtab[tcbi];
543		tcp->flags &= ~TCB_ATTACH_DONE;
544	}
545#endif
546
547	if (interactive)
548		sigprocmask(SIG_SETMASK, &empty_set, NULL);
549}
550
551static void
552startup_child(char **argv)
553{
554	struct stat statbuf;
555	const char *filename;
556	char pathname[MAXPATHLEN];
557	int pid = 0;
558	struct tcb *tcp;
559
560	filename = argv[0];
561	if (strchr(filename, '/')) {
562		if (strlen(filename) > sizeof pathname - 1) {
563			errno = ENAMETOOLONG;
564			perror_msg_and_die("exec");
565		}
566		strcpy(pathname, filename);
567	}
568#ifdef USE_DEBUGGING_EXEC
569	/*
570	 * Debuggers customarily check the current directory
571	 * first regardless of the path but doing that gives
572	 * security geeks a panic attack.
573	 */
574	else if (stat(filename, &statbuf) == 0)
575		strcpy(pathname, filename);
576#endif /* USE_DEBUGGING_EXEC */
577	else {
578		const char *path;
579		int m, n, len;
580
581		for (path = getenv("PATH"); path && *path; path += m) {
582			if (strchr(path, ':')) {
583				n = strchr(path, ':') - path;
584				m = n + 1;
585			}
586			else
587				m = n = strlen(path);
588			if (n == 0) {
589				if (!getcwd(pathname, MAXPATHLEN))
590					continue;
591				len = strlen(pathname);
592			}
593			else if (n > sizeof pathname - 1)
594				continue;
595			else {
596				strncpy(pathname, path, n);
597				len = n;
598			}
599			if (len && pathname[len - 1] != '/')
600				pathname[len++] = '/';
601			strcpy(pathname + len, filename);
602			if (stat(pathname, &statbuf) == 0 &&
603			    /* Accept only regular files
604			       with some execute bits set.
605			       XXX not perfect, might still fail */
606			    S_ISREG(statbuf.st_mode) &&
607			    (statbuf.st_mode & 0111))
608				break;
609		}
610	}
611	if (stat(pathname, &statbuf) < 0) {
612		perror_msg_and_die("Can't stat '%s'", filename);
613	}
614	strace_child = pid = fork();
615	if (pid < 0) {
616		perror_msg_and_die("fork");
617	}
618	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
619	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
620	) {
621		pid = getpid();
622#ifdef USE_PROCFS
623		if (outf != stderr) close(fileno(outf));
624#ifdef MIPS
625		/* Kludge for SGI, see proc_open for details. */
626		sa.sa_handler = foobar;
627		sa.sa_flags = 0;
628		sigemptyset(&sa.sa_mask);
629		sigaction(SIGINT, &sa, NULL);
630#endif /* MIPS */
631#ifndef FREEBSD
632		pause();
633#else /* FREEBSD */
634		kill(pid, SIGSTOP); /* stop HERE */
635#endif /* FREEBSD */
636#else /* !USE_PROCFS */
637		if (outf != stderr)
638			close(fileno(outf));
639
640		if (!daemonized_tracer) {
641			if (ptrace(PTRACE_TRACEME, 0, (char *) 1, 0) < 0) {
642				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
643			}
644			if (debug)
645				kill(pid, SIGSTOP);
646		}
647
648		if (username != NULL || geteuid() == 0) {
649			uid_t run_euid = run_uid;
650			gid_t run_egid = run_gid;
651
652			if (statbuf.st_mode & S_ISUID)
653				run_euid = statbuf.st_uid;
654			if (statbuf.st_mode & S_ISGID)
655				run_egid = statbuf.st_gid;
656
657			/*
658			 * It is important to set groups before we
659			 * lose privileges on setuid.
660			 */
661			if (username != NULL) {
662				if (initgroups(username, run_gid) < 0) {
663					perror_msg_and_die("initgroups");
664				}
665				if (setregid(run_gid, run_egid) < 0) {
666					perror_msg_and_die("setregid");
667				}
668				if (setreuid(run_uid, run_euid) < 0) {
669					perror_msg_and_die("setreuid");
670				}
671			}
672		}
673		else
674			setreuid(run_uid, run_uid);
675
676		if (!daemonized_tracer) {
677			/*
678			 * Induce an immediate stop so that the parent
679			 * will resume us with PTRACE_SYSCALL and display
680			 * this execve call normally.
681			 * Unless of course we're on a no-MMU system where
682			 * we vfork()-ed, so we cannot stop the child.
683			 */
684			if (!strace_vforked)
685				kill(getpid(), SIGSTOP);
686		} else {
687			struct sigaction sv_sigchld;
688			sigaction(SIGCHLD, NULL, &sv_sigchld);
689			/*
690			 * Make sure it is not SIG_IGN, otherwise wait
691			 * will not block.
692			 */
693			signal(SIGCHLD, SIG_DFL);
694			/*
695			 * Wait for grandchild to attach to us.
696			 * It kills child after that, and wait() unblocks.
697			 */
698			alarm(3);
699			wait(NULL);
700			alarm(0);
701			sigaction(SIGCHLD, &sv_sigchld, NULL);
702		}
703#endif /* !USE_PROCFS */
704
705		execv(pathname, argv);
706		perror_msg_and_die("exec");
707	}
708
709	/* We are the tracer.  */
710	/* With -D, we are *child* here, IOW: different pid. Fetch it. */
711	strace_tracer_pid = getpid();
712
713	tcp = alloctcb(daemonized_tracer ? getppid() : pid);
714	if (daemonized_tracer) {
715		/* We want subsequent startup_attach() to attach to it.  */
716		tcp->flags |= TCB_ATTACHED;
717	}
718#ifdef USE_PROCFS
719	if (proc_open(tcp, 0) < 0) {
720		perror_msg_and_die("trouble opening proc file");
721	}
722#endif /* USE_PROCFS */
723}
724
725#ifdef LINUX
726static void kill_save_errno(pid_t pid, int sig)
727{
728	int saved_errno = errno;
729
730	(void) kill(pid, sig);
731	errno = saved_errno;
732}
733
734/*
735 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
736 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
737 * and then see which options are supported by the kernel.
738 */
739static void
740test_ptrace_setoptions_followfork(void)
741{
742	int pid, expected_grandchild = 0, found_grandchild = 0;
743	const unsigned int test_options = PTRACE_O_TRACECLONE |
744					  PTRACE_O_TRACEFORK |
745					  PTRACE_O_TRACEVFORK;
746
747	pid = fork();
748	if (pid < 0)
749		perror_msg_and_die("fork");
750	if (pid == 0) {
751		pid = getpid();
752		if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
753			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
754					   __func__);
755		kill(pid, SIGSTOP);
756		if (fork() < 0)
757			perror_msg_and_die("fork");
758		_exit(0);
759	}
760
761	while (1) {
762		int status, tracee_pid;
763
764		errno = 0;
765		tracee_pid = wait(&status);
766		if (tracee_pid <= 0) {
767			if (errno == EINTR)
768				continue;
769			else if (errno == ECHILD)
770				break;
771			kill_save_errno(pid, SIGKILL);
772			perror_msg_and_die("%s: unexpected wait result %d",
773					   __func__, tracee_pid);
774		}
775		if (WIFEXITED(status)) {
776			if (WEXITSTATUS(status)) {
777				if (tracee_pid != pid)
778					kill_save_errno(pid, SIGKILL);
779				error_msg_and_die("%s: unexpected exit status %u",
780						  __func__, WEXITSTATUS(status));
781			}
782			continue;
783		}
784		if (WIFSIGNALED(status)) {
785			if (tracee_pid != pid)
786				kill_save_errno(pid, SIGKILL);
787			error_msg_and_die("%s: unexpected signal %u",
788					  __func__, WTERMSIG(status));
789		}
790		if (!WIFSTOPPED(status)) {
791			if (tracee_pid != pid)
792				kill_save_errno(tracee_pid, SIGKILL);
793			kill(pid, SIGKILL);
794			error_msg_and_die("%s: unexpected wait status %x",
795					  __func__, status);
796		}
797		if (tracee_pid != pid) {
798			found_grandchild = tracee_pid;
799			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
800				kill_save_errno(tracee_pid, SIGKILL);
801				kill_save_errno(pid, SIGKILL);
802				perror_msg_and_die("PTRACE_CONT doesn't work");
803			}
804			continue;
805		}
806		switch (WSTOPSIG(status)) {
807		case SIGSTOP:
808			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
809			    && errno != EINVAL && errno != EIO)
810				perror_msg("PTRACE_SETOPTIONS");
811			break;
812		case SIGTRAP:
813			if (status >> 16 == PTRACE_EVENT_FORK) {
814				long msg = 0;
815
816				if (ptrace(PTRACE_GETEVENTMSG, pid,
817					   NULL, (long) &msg) == 0)
818					expected_grandchild = msg;
819			}
820			break;
821		}
822		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
823			kill_save_errno(pid, SIGKILL);
824			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
825		}
826	}
827	if (expected_grandchild && expected_grandchild == found_grandchild) {
828		ptrace_setoptions |= test_options;
829		if (debug)
830			fprintf(stderr, "ptrace_setoptions = %#x\n",
831				ptrace_setoptions);
832		return;
833	}
834	error_msg("Test for PTRACE_O_TRACECLONE failed, "
835		  "giving up using this feature.");
836}
837
838/*
839 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
840 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
841 * and then see whether it will stop with (SIGTRAP | 0x80).
842 *
843 * Use of this option enables correct handling of user-generated SIGTRAPs,
844 * and SIGTRAPs generated by special instructions such as int3 on x86:
845 * _start:	.globl	_start
846 *		int3
847 *		movl	$42, %ebx
848 *		movl	$1, %eax
849 *		int	$0x80
850 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
851 */
852static void
853test_ptrace_setoptions_for_all(void)
854{
855	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
856					  PTRACE_O_TRACEEXEC;
857	int pid;
858	int it_worked = 0;
859
860	pid = fork();
861	if (pid < 0)
862		perror_msg_and_die("fork");
863
864	if (pid == 0) {
865		pid = getpid();
866		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
867			/* Note: exits with exitcode 1 */
868			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
869					   __func__);
870		kill(pid, SIGSTOP);
871		_exit(0); /* parent should see entry into this syscall */
872	}
873
874	while (1) {
875		int status, tracee_pid;
876
877		errno = 0;
878		tracee_pid = wait(&status);
879		if (tracee_pid <= 0) {
880			if (errno == EINTR)
881				continue;
882			kill_save_errno(pid, SIGKILL);
883			perror_msg_and_die("%s: unexpected wait result %d",
884					   __func__, tracee_pid);
885		}
886		if (WIFEXITED(status)) {
887			if (WEXITSTATUS(status) == 0)
888				break;
889			error_msg_and_die("%s: unexpected exit status %u",
890					  __func__, WEXITSTATUS(status));
891		}
892		if (WIFSIGNALED(status)) {
893			error_msg_and_die("%s: unexpected signal %u",
894					  __func__, WTERMSIG(status));
895		}
896		if (!WIFSTOPPED(status)) {
897			kill(pid, SIGKILL);
898			error_msg_and_die("%s: unexpected wait status %x",
899					  __func__, status);
900		}
901		if (WSTOPSIG(status) == SIGSTOP) {
902			/*
903			 * We don't check "options aren't accepted" error.
904			 * If it happens, we'll never get (SIGTRAP | 0x80),
905			 * and thus will decide to not use the option.
906			 * IOW: the outcome of the test will be correct.
907			 */
908			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
909			    && errno != EINVAL && errno != EIO)
910				perror_msg("PTRACE_SETOPTIONS");
911		}
912		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
913			it_worked = 1;
914		}
915		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
916			kill_save_errno(pid, SIGKILL);
917			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
918		}
919	}
920
921	if (it_worked) {
922		syscall_trap_sig = (SIGTRAP | 0x80);
923		ptrace_setoptions |= test_options;
924		if (debug)
925			fprintf(stderr, "ptrace_setoptions = %#x\n",
926				ptrace_setoptions);
927		return;
928	}
929
930	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
931		  "giving up using this feature.");
932}
933#endif
934
935int
936main(int argc, char *argv[])
937{
938	struct tcb *tcp;
939	int c, pid = 0;
940	int optF = 0;
941	struct sigaction sa;
942
943	progname = argv[0] ? argv[0] : "strace";
944
945	strace_tracer_pid = getpid();
946
947	/* Allocate the initial tcbtab.  */
948	tcbtabsize = argc;	/* Surely enough for all -p args.  */
949	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
950	if (tcbtab == NULL)
951		error_msg_and_die("Out of memory");
952	tcp = calloc(tcbtabsize, sizeof(*tcp));
953	if (tcp == NULL)
954		error_msg_and_die("Out of memory");
955	for (c = 0; c < tcbtabsize; c++)
956		tcbtab[c] = tcp++;
957
958	outf = stderr;
959	interactive = 1;
960	set_sortby(DEFAULT_SORTBY);
961	set_personality(DEFAULT_PERSONALITY);
962	qualify("trace=all");
963	qualify("abbrev=all");
964	qualify("verbose=all");
965	qualify("signal=all");
966	while ((c = getopt(argc, argv,
967		"+cCdfFhiqrtTvVxyz"
968#ifndef USE_PROCFS
969		"D"
970#endif
971		"a:e:o:O:p:s:S:u:E:P:")) != EOF) {
972		switch (c) {
973		case 'c':
974			if (cflag == CFLAG_BOTH) {
975				error_msg_and_die("-c and -C are mutually exclusive options");
976			}
977			cflag = CFLAG_ONLY_STATS;
978			break;
979		case 'C':
980			if (cflag == CFLAG_ONLY_STATS) {
981				error_msg_and_die("-c and -C are mutually exclusive options");
982			}
983			cflag = CFLAG_BOTH;
984			break;
985		case 'd':
986			debug++;
987			break;
988#ifndef USE_PROCFS
989		case 'D':
990			daemonized_tracer = 1;
991			break;
992#endif
993		case 'F':
994			optF = 1;
995			break;
996		case 'f':
997			followfork++;
998			break;
999		case 'h':
1000			usage(stdout, 0);
1001			break;
1002		case 'i':
1003			iflag++;
1004			break;
1005		case 'q':
1006			qflag++;
1007			break;
1008		case 'r':
1009			rflag++;
1010			tflag++;
1011			break;
1012		case 't':
1013			tflag++;
1014			break;
1015		case 'T':
1016			dtime++;
1017			break;
1018		case 'x':
1019			xflag++;
1020			break;
1021		case 'y':
1022			show_fd_path = 1;
1023			break;
1024		case 'v':
1025			qualify("abbrev=none");
1026			break;
1027		case 'V':
1028			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1029			exit(0);
1030			break;
1031		case 'z':
1032			not_failing_only = 1;
1033			break;
1034		case 'a':
1035			acolumn = atoi(optarg);
1036			break;
1037		case 'e':
1038			qualify(optarg);
1039			break;
1040		case 'o':
1041			outfname = strdup(optarg);
1042			break;
1043		case 'O':
1044			set_overhead(atoi(optarg));
1045			break;
1046		case 'p':
1047			pid = atoi(optarg);
1048			if (pid <= 0) {
1049				error_msg("Invalid process id: '%s'", optarg);
1050				break;
1051			}
1052			if (pid == strace_tracer_pid) {
1053				error_msg("I'm sorry, I can't let you do that, Dave.");
1054				break;
1055			}
1056			tcp = alloc_tcb(pid, 0);
1057			tcp->flags |= TCB_ATTACHED;
1058			pflag_seen++;
1059			break;
1060		case 'P':
1061			tracing_paths = 1;
1062			if (pathtrace_select(optarg)) {
1063				error_msg_and_die("Failed to select path '%s'", optarg);
1064			}
1065			break;
1066		case 's':
1067			max_strlen = atoi(optarg);
1068			if (max_strlen < 0) {
1069				error_msg_and_die("Invalid -s argument: '%s'", optarg);
1070			}
1071			break;
1072		case 'S':
1073			set_sortby(optarg);
1074			break;
1075		case 'u':
1076			username = strdup(optarg);
1077			break;
1078		case 'E':
1079			if (putenv(optarg) < 0) {
1080				error_msg_and_die("Out of memory");
1081			}
1082			break;
1083		default:
1084			usage(stderr, 1);
1085			break;
1086		}
1087	}
1088
1089	if ((optind == argc) == !pflag_seen)
1090		usage(stderr, 1);
1091
1092	if (pflag_seen && daemonized_tracer) {
1093		error_msg_and_die("-D and -p are mutually exclusive options");
1094	}
1095
1096	if (!followfork)
1097		followfork = optF;
1098
1099	if (followfork > 1 && cflag) {
1100		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1101	}
1102
1103	/* See if they want to run as another user. */
1104	if (username != NULL) {
1105		struct passwd *pent;
1106
1107		if (getuid() != 0 || geteuid() != 0) {
1108			error_msg_and_die("You must be root to use the -u option");
1109		}
1110		pent = getpwnam(username);
1111		if (pent == NULL) {
1112			error_msg_and_die("Cannot find user '%s'", username);
1113		}
1114		run_uid = pent->pw_uid;
1115		run_gid = pent->pw_gid;
1116	}
1117	else {
1118		run_uid = getuid();
1119		run_gid = getgid();
1120	}
1121
1122#ifdef LINUX
1123	if (followfork)
1124		test_ptrace_setoptions_followfork();
1125	test_ptrace_setoptions_for_all();
1126#endif
1127
1128	/* Check if they want to redirect the output. */
1129	if (outfname) {
1130		/* See if they want to pipe the output. */
1131		if (outfname[0] == '|' || outfname[0] == '!') {
1132			/*
1133			 * We can't do the <outfname>.PID funny business
1134			 * when using popen, so prohibit it.
1135			 */
1136			if (followfork > 1)
1137				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1138			outf = strace_popen(outfname + 1);
1139		}
1140		else if (followfork <= 1)
1141			outf = strace_fopen(outfname);
1142	}
1143
1144	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1145		static char buf[BUFSIZ];
1146		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1147	}
1148	if (outfname && optind < argc) {
1149		interactive = 0;
1150		qflag = 1;
1151	}
1152
1153	/* Valid states here:
1154	   optind < argc	pflag_seen	outfname	interactive
1155	   1			0		0		1
1156	   0			1		0		1
1157	   1			0		1		0
1158	   0			1		1		1
1159	 */
1160
1161	/* STARTUP_CHILD must be called before the signal handlers get
1162	   installed below as they are inherited into the spawned process.
1163	   Also we do not need to be protected by them as during interruption
1164	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1165	if (!pflag_seen)
1166		startup_child(&argv[optind]);
1167
1168	sigemptyset(&empty_set);
1169	sigemptyset(&blocked_set);
1170	sa.sa_handler = SIG_IGN;
1171	sigemptyset(&sa.sa_mask);
1172	sa.sa_flags = 0;
1173	sigaction(SIGTTOU, &sa, NULL);
1174	sigaction(SIGTTIN, &sa, NULL);
1175	if (interactive) {
1176		sigaddset(&blocked_set, SIGHUP);
1177		sigaddset(&blocked_set, SIGINT);
1178		sigaddset(&blocked_set, SIGQUIT);
1179		sigaddset(&blocked_set, SIGPIPE);
1180		sigaddset(&blocked_set, SIGTERM);
1181		sa.sa_handler = interrupt;
1182#ifdef SUNOS4
1183		/* POSIX signals on sunos4.1 are a little broken. */
1184		sa.sa_flags = SA_INTERRUPT;
1185#endif /* SUNOS4 */
1186	}
1187	sigaction(SIGHUP, &sa, NULL);
1188	sigaction(SIGINT, &sa, NULL);
1189	sigaction(SIGQUIT, &sa, NULL);
1190	sigaction(SIGPIPE, &sa, NULL);
1191	sigaction(SIGTERM, &sa, NULL);
1192#ifdef USE_PROCFS
1193	sa.sa_handler = reaper;
1194	sigaction(SIGCHLD, &sa, NULL);
1195#else
1196	/* Make sure SIGCHLD has the default action so that waitpid
1197	   definitely works without losing track of children.  The user
1198	   should not have given us a bogus state to inherit, but he might
1199	   have.  Arguably we should detect SIG_IGN here and pass it on
1200	   to children, but probably noone really needs that.  */
1201	sa.sa_handler = SIG_DFL;
1202	sigaction(SIGCHLD, &sa, NULL);
1203#endif /* USE_PROCFS */
1204
1205	if (pflag_seen || daemonized_tracer)
1206		startup_attach();
1207
1208	if (trace() < 0)
1209		exit(1);
1210	cleanup();
1211	fflush(NULL);
1212	if (exit_code > 0xff) {
1213		/* Child was killed by a signal, mimic that.  */
1214		exit_code &= 0xff;
1215		signal(exit_code, SIG_DFL);
1216		raise(exit_code);
1217		/* Paranoia - what if this signal is not fatal?
1218		   Exit with 128 + signo then.  */
1219		exit_code += 128;
1220	}
1221	exit(exit_code);
1222}
1223
1224static void
1225expand_tcbtab(void)
1226{
1227	/* Allocate some more TCBs and expand the table.
1228	   We don't want to relocate the TCBs because our
1229	   callers have pointers and it would be a pain.
1230	   So tcbtab is a table of pointers.  Since we never
1231	   free the TCBs, we allocate a single chunk of many.  */
1232	int i = tcbtabsize;
1233	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1234	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1235	if (newtab == NULL || newtcbs == NULL)
1236		error_msg_and_die("Out of memory");
1237	tcbtabsize *= 2;
1238	tcbtab = newtab;
1239	while (i < tcbtabsize)
1240		tcbtab[i++] = newtcbs++;
1241}
1242
1243struct tcb *
1244alloc_tcb(int pid, int command_options_parsed)
1245{
1246	int i;
1247	struct tcb *tcp;
1248
1249	if (nprocs == tcbtabsize)
1250		expand_tcbtab();
1251
1252	for (i = 0; i < tcbtabsize; i++) {
1253		tcp = tcbtab[i];
1254		if ((tcp->flags & TCB_INUSE) == 0) {
1255			memset(tcp, 0, sizeof(*tcp));
1256			tcp->pid = pid;
1257			tcp->flags = TCB_INUSE | TCB_STARTUP;
1258			tcp->outf = outf; /* Initialise to current out file */
1259#ifdef USE_PROCFS
1260			tcp->pfd = -1;
1261#endif
1262			nprocs++;
1263			if (debug)
1264				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1265			if (command_options_parsed)
1266				newoutf(tcp);
1267			return tcp;
1268		}
1269	}
1270	error_msg_and_die("bug in alloc_tcb");
1271}
1272
1273#ifdef USE_PROCFS
1274int
1275proc_open(struct tcb *tcp, int attaching)
1276{
1277	char proc[32];
1278	long arg;
1279#ifdef SVR4
1280	int i;
1281	sysset_t syscalls;
1282	sigset_t signals;
1283	fltset_t faults;
1284#endif
1285#ifndef HAVE_POLLABLE_PROCFS
1286	static int last_pfd;
1287#endif
1288
1289#ifdef HAVE_MP_PROCFS
1290	/* Open the process pseudo-files in /proc. */
1291	sprintf(proc, "/proc/%d/ctl", tcp->pid);
1292	tcp->pfd = open(proc, O_WRONLY|O_EXCL);
1293	if (tcp->pfd < 0) {
1294		perror("strace: open(\"/proc/...\", ...)");
1295		return -1;
1296	}
1297	set_cloexec_flag(tcp->pfd);
1298	sprintf(proc, "/proc/%d/status", tcp->pid);
1299	tcp->pfd_stat = open(proc, O_RDONLY|O_EXCL);
1300	if (tcp->pfd_stat < 0) {
1301		perror("strace: open(\"/proc/...\", ...)");
1302		return -1;
1303	}
1304	set_cloexec_flag(tcp->pfd_stat);
1305	sprintf(proc, "/proc/%d/as", tcp->pid);
1306	tcp->pfd_as = open(proc, O_RDONLY|O_EXCL);
1307	if (tcp->pfd_as < 0) {
1308		perror("strace: open(\"/proc/...\", ...)");
1309		return -1;
1310	}
1311	set_cloexec_flag(tcp->pfd_as);
1312#else
1313	/* Open the process pseudo-file in /proc. */
1314# ifndef FREEBSD
1315	sprintf(proc, "/proc/%d", tcp->pid);
1316	tcp->pfd = open(proc, O_RDWR|O_EXCL);
1317# else
1318	sprintf(proc, "/proc/%d/mem", tcp->pid);
1319	tcp->pfd = open(proc, O_RDWR);
1320# endif
1321	if (tcp->pfd < 0) {
1322		perror("strace: open(\"/proc/...\", ...)");
1323		return -1;
1324	}
1325	set_cloexec_flag(tcp->pfd);
1326#endif
1327#ifdef FREEBSD
1328	sprintf(proc, "/proc/%d/regs", tcp->pid);
1329	tcp->pfd_reg = open(proc, O_RDONLY);
1330	if (tcp->pfd_reg < 0) {
1331		perror("strace: open(\"/proc/.../regs\", ...)");
1332		return -1;
1333	}
1334	if (cflag) {
1335		sprintf(proc, "/proc/%d/status", tcp->pid);
1336		tcp->pfd_status = open(proc, O_RDONLY);
1337		if (tcp->pfd_status < 0) {
1338			perror("strace: open(\"/proc/.../status\", ...)");
1339			return -1;
1340		}
1341	} else
1342		tcp->pfd_status = -1;
1343#endif /* FREEBSD */
1344	rebuild_pollv();
1345	if (!attaching) {
1346		/*
1347		 * Wait for the child to pause.  Because of a race
1348		 * condition we have to poll for the event.
1349		 */
1350		for (;;) {
1351			if (IOCTL_STATUS(tcp) < 0) {
1352				perror("strace: PIOCSTATUS");
1353				return -1;
1354			}
1355			if (tcp->status.PR_FLAGS & PR_ASLEEP)
1356				break;
1357		}
1358	}
1359#ifndef FREEBSD
1360	/* Stop the process so that we own the stop. */
1361	if (IOCTL(tcp->pfd, PIOCSTOP, (char *)NULL) < 0) {
1362		perror("strace: PIOCSTOP");
1363		return -1;
1364	}
1365#endif
1366#ifdef PIOCSET
1367	/* Set Run-on-Last-Close. */
1368	arg = PR_RLC;
1369	if (IOCTL(tcp->pfd, PIOCSET, &arg) < 0) {
1370		perror("PIOCSET PR_RLC");
1371		return -1;
1372	}
1373	/* Set or Reset Inherit-on-Fork. */
1374	arg = PR_FORK;
1375	if (IOCTL(tcp->pfd, followfork ? PIOCSET : PIOCRESET, &arg) < 0) {
1376		perror("PIOC{SET,RESET} PR_FORK");
1377		return -1;
1378	}
1379#else  /* !PIOCSET */
1380#ifndef FREEBSD
1381	if (ioctl(tcp->pfd, PIOCSRLC) < 0) {
1382		perror("PIOCSRLC");
1383		return -1;
1384	}
1385	if (ioctl(tcp->pfd, followfork ? PIOCSFORK : PIOCRFORK) < 0) {
1386		perror("PIOC{S,R}FORK");
1387		return -1;
1388	}
1389#else /* FREEBSD */
1390	/* just unset the PF_LINGER flag for the Run-on-Last-Close. */
1391	if (ioctl(tcp->pfd, PIOCGFL, &arg) < 0) {
1392	        perror("PIOCGFL");
1393		return -1;
1394	}
1395	arg &= ~PF_LINGER;
1396	if (ioctl(tcp->pfd, PIOCSFL, arg) < 0) {
1397		perror("PIOCSFL");
1398		return -1;
1399	}
1400#endif /* FREEBSD */
1401#endif /* !PIOCSET */
1402#ifndef FREEBSD
1403	/* Enable all syscall entries we care about. */
1404	premptyset(&syscalls);
1405	for (i = 1; i < MAX_QUALS; ++i) {
1406		if (i > (sizeof syscalls) * CHAR_BIT) break;
1407		if (qual_flags[i] & QUAL_TRACE) praddset(&syscalls, i);
1408	}
1409	praddset(&syscalls, SYS_execve);
1410	if (followfork) {
1411		praddset(&syscalls, SYS_fork);
1412#ifdef SYS_forkall
1413		praddset(&syscalls, SYS_forkall);
1414#endif
1415#ifdef SYS_fork1
1416		praddset(&syscalls, SYS_fork1);
1417#endif
1418#ifdef SYS_rfork1
1419		praddset(&syscalls, SYS_rfork1);
1420#endif
1421#ifdef SYS_rforkall
1422		praddset(&syscalls, SYS_rforkall);
1423#endif
1424	}
1425	if (IOCTL(tcp->pfd, PIOCSENTRY, &syscalls) < 0) {
1426		perror("PIOCSENTRY");
1427		return -1;
1428	}
1429	/* Enable the syscall exits. */
1430	if (IOCTL(tcp->pfd, PIOCSEXIT, &syscalls) < 0) {
1431		perror("PIOSEXIT");
1432		return -1;
1433	}
1434	/* Enable signals we care about. */
1435	premptyset(&signals);
1436	for (i = 1; i < MAX_QUALS; ++i) {
1437		if (i > (sizeof signals) * CHAR_BIT) break;
1438		if (qual_flags[i] & QUAL_SIGNAL) praddset(&signals, i);
1439	}
1440	if (IOCTL(tcp->pfd, PIOCSTRACE, &signals) < 0) {
1441		perror("PIOCSTRACE");
1442		return -1;
1443	}
1444	/* Enable faults we care about */
1445	premptyset(&faults);
1446	for (i = 1; i < MAX_QUALS; ++i) {
1447		if (i > (sizeof faults) * CHAR_BIT) break;
1448		if (qual_flags[i] & QUAL_FAULT) praddset(&faults, i);
1449	}
1450	if (IOCTL(tcp->pfd, PIOCSFAULT, &faults) < 0) {
1451		perror("PIOCSFAULT");
1452		return -1;
1453	}
1454#else /* FREEBSD */
1455	/* set events flags. */
1456	arg = S_SIG | S_SCE | S_SCX;
1457	if (ioctl(tcp->pfd, PIOCBIS, arg) < 0) {
1458		perror("PIOCBIS");
1459		return -1;
1460	}
1461#endif /* FREEBSD */
1462	if (!attaching) {
1463#ifdef MIPS
1464		/*
1465		 * The SGI PRSABORT doesn't work for pause() so
1466		 * we send it a caught signal to wake it up.
1467		 */
1468		kill(tcp->pid, SIGINT);
1469#else /* !MIPS */
1470#ifdef PRSABORT
1471		/* The child is in a pause(), abort it. */
1472		arg = PRSABORT;
1473		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1474			perror("PIOCRUN");
1475			return -1;
1476		}
1477#endif
1478#endif /* !MIPS*/
1479#ifdef FREEBSD
1480		/* wake up the child if it received the SIGSTOP */
1481		kill(tcp->pid, SIGCONT);
1482#endif
1483		for (;;) {
1484			/* Wait for the child to do something. */
1485			if (IOCTL_WSTOP(tcp) < 0) {
1486				perror("PIOCWSTOP");
1487				return -1;
1488			}
1489			if (tcp->status.PR_WHY == PR_SYSENTRY) {
1490				tcp->flags &= ~TCB_INSYSCALL;
1491				get_scno_on_sysenter(tcp);
1492				if (known_scno(tcp) == SYS_execve)
1493					break;
1494			}
1495			/* Set it running: maybe execve will be next. */
1496#ifndef FREEBSD
1497			arg = 0;
1498			if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
1499#else
1500			if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
1501#endif
1502			{
1503				perror("PIOCRUN");
1504				return -1;
1505			}
1506#ifdef FREEBSD
1507			/* handle the case where we "opened" the child before
1508			   it did the kill -STOP */
1509			if (tcp->status.PR_WHY == PR_SIGNALLED &&
1510			    tcp->status.PR_WHAT == SIGSTOP)
1511			        kill(tcp->pid, SIGCONT);
1512#endif
1513		}
1514	}
1515#ifdef FREEBSD
1516	else {
1517		if (attaching < 2) {
1518			/* We are attaching to an already running process.
1519			 * Try to figure out the state of the process in syscalls,
1520			 * to handle the first event well.
1521			 * This is done by having a look at the "wchan" property of the
1522			 * process, which tells where it is stopped (if it is). */
1523			FILE * status;
1524			char wchan[20]; /* should be enough */
1525
1526			sprintf(proc, "/proc/%d/status", tcp->pid);
1527			status = fopen(proc, "r");
1528			if (status &&
1529			    (fscanf(status, "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d"
1530				    "%*d,%*d %*d,%*d %19s", wchan) == 1) &&
1531			    strcmp(wchan, "nochan") && strcmp(wchan, "spread") &&
1532			    strcmp(wchan, "stopevent")) {
1533				/* The process is asleep in the middle of a syscall.
1534				   Fake the syscall entry event */
1535				tcp->flags &= ~(TCB_INSYSCALL|TCB_STARTUP);
1536				tcp->status.PR_WHY = PR_SYSENTRY;
1537				trace_syscall(tcp);
1538			}
1539			if (status)
1540				fclose(status);
1541		} /* otherwise it's a fork being followed */
1542	}
1543#endif /* FREEBSD */
1544#ifndef HAVE_POLLABLE_PROCFS
1545	if (proc_poll_pipe[0] != -1)
1546		proc_poller(tcp->pfd);
1547	else if (nprocs > 1) {
1548		proc_poll_open();
1549		proc_poller(last_pfd);
1550		proc_poller(tcp->pfd);
1551	}
1552	last_pfd = tcp->pfd;
1553#endif /* !HAVE_POLLABLE_PROCFS */
1554	return 0;
1555}
1556
1557#endif /* USE_PROCFS */
1558
1559struct tcb *
1560pid2tcb(int pid)
1561{
1562	int i;
1563
1564	if (pid <= 0)
1565		return NULL;
1566
1567	for (i = 0; i < tcbtabsize; i++) {
1568		struct tcb *tcp = tcbtab[i];
1569		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1570			return tcp;
1571	}
1572
1573	return NULL;
1574}
1575
1576#ifdef USE_PROCFS
1577
1578static struct tcb *
1579first_used_tcb(void)
1580{
1581	int i;
1582	struct tcb *tcp;
1583	for (i = 0; i < tcbtabsize; i++) {
1584		tcp = tcbtab[i];
1585		if (tcp->flags & TCB_INUSE)
1586			return tcp;
1587	}
1588	return NULL;
1589}
1590
1591static struct tcb *
1592pfd2tcb(int pfd)
1593{
1594	int i;
1595
1596	for (i = 0; i < tcbtabsize; i++) {
1597		struct tcb *tcp = tcbtab[i];
1598		if (tcp->pfd != pfd)
1599			continue;
1600		if (tcp->flags & TCB_INUSE)
1601			return tcp;
1602	}
1603	return NULL;
1604}
1605
1606#endif /* USE_PROCFS */
1607
1608void
1609droptcb(struct tcb *tcp)
1610{
1611	if (tcp->pid == 0)
1612		return;
1613
1614	nprocs--;
1615	if (debug)
1616		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1617
1618#ifdef USE_PROCFS
1619	if (tcp->pfd != -1) {
1620		close(tcp->pfd);
1621		tcp->pfd = -1;
1622# ifdef FREEBSD
1623		if (tcp->pfd_reg != -1) {
1624		        close(tcp->pfd_reg);
1625		        tcp->pfd_reg = -1;
1626		}
1627		if (tcp->pfd_status != -1) {
1628			close(tcp->pfd_status);
1629			tcp->pfd_status = -1;
1630		}
1631# endif
1632		tcp->flags = 0; /* rebuild_pollv needs it */
1633		rebuild_pollv();
1634	}
1635#endif
1636
1637	if (outfname && followfork > 1 && tcp->outf)
1638		fclose(tcp->outf);
1639
1640	memset(tcp, 0, sizeof(*tcp));
1641}
1642
1643/* detach traced process; continue with sig
1644   Never call DETACH twice on the same process as both unattached and
1645   attached-unstopped processes give the same ESRCH.  For unattached process we
1646   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1647
1648static int
1649detach(struct tcb *tcp, int sig)
1650{
1651	int error = 0;
1652#ifdef LINUX
1653	int status, catch_sigstop;
1654#endif
1655
1656	if (tcp->flags & TCB_BPTSET)
1657		clearbpt(tcp);
1658
1659#ifdef LINUX
1660	/*
1661	 * Linux wrongly insists the child be stopped
1662	 * before detaching.  Arghh.  We go through hoops
1663	 * to make a clean break of things.
1664	 */
1665#if defined(SPARC)
1666#undef PTRACE_DETACH
1667#define PTRACE_DETACH PTRACE_SUNDETACH
1668#endif
1669	/*
1670	 * On TCB_STARTUP we did PTRACE_ATTACH but still did not get the
1671	 * expected SIGSTOP.  We must catch exactly one as otherwise the
1672	 * detached process would be left stopped (process state T).
1673	 */
1674	catch_sigstop = (tcp->flags & TCB_STARTUP);
1675	error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, sig);
1676	if (error == 0) {
1677		/* On a clear day, you can see forever. */
1678	}
1679	else if (errno != ESRCH) {
1680		/* Shouldn't happen. */
1681		perror("detach: ptrace(PTRACE_DETACH, ...)");
1682	}
1683	else if (my_tkill(tcp->pid, 0) < 0) {
1684		if (errno != ESRCH)
1685			perror("detach: checking sanity");
1686	}
1687	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1688		if (errno != ESRCH)
1689			perror("detach: stopping child");
1690	}
1691	else
1692		catch_sigstop = 1;
1693	if (catch_sigstop) {
1694		for (;;) {
1695#ifdef __WALL
1696			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1697				if (errno == ECHILD) /* Already gone.  */
1698					break;
1699				if (errno != EINVAL) {
1700					perror("detach: waiting");
1701					break;
1702				}
1703#endif /* __WALL */
1704				/* No __WALL here.  */
1705				if (waitpid(tcp->pid, &status, 0) < 0) {
1706					if (errno != ECHILD) {
1707						perror("detach: waiting");
1708						break;
1709					}
1710#ifdef __WCLONE
1711					/* If no processes, try clones.  */
1712					if (wait4(tcp->pid, &status, __WCLONE,
1713						  NULL) < 0) {
1714						if (errno != ECHILD)
1715							perror("detach: waiting");
1716						break;
1717					}
1718#endif /* __WCLONE */
1719				}
1720#ifdef __WALL
1721			}
1722#endif
1723			if (!WIFSTOPPED(status)) {
1724				/* Au revoir, mon ami. */
1725				break;
1726			}
1727			if (WSTOPSIG(status) == SIGSTOP) {
1728				ptrace_restart(PTRACE_DETACH, tcp, sig);
1729				break;
1730			}
1731			error = ptrace_restart(PTRACE_CONT, tcp,
1732					WSTOPSIG(status) == syscall_trap_sig ? 0
1733					: WSTOPSIG(status));
1734			if (error < 0)
1735				break;
1736		}
1737	}
1738#endif /* LINUX */
1739
1740#if defined(SUNOS4)
1741	/* PTRACE_DETACH won't respect `sig' argument, so we post it here. */
1742	if (sig && kill(tcp->pid, sig) < 0)
1743		perror("detach: kill");
1744	sig = 0;
1745	error = ptrace_restart(PTRACE_DETACH, tcp, sig);
1746#endif /* SUNOS4 */
1747
1748	if (!qflag)
1749		fprintf(stderr, "Process %u detached\n", tcp->pid);
1750
1751	droptcb(tcp);
1752
1753	return error;
1754}
1755
1756#ifdef USE_PROCFS
1757
1758static void reaper(int sig)
1759{
1760	int pid;
1761	int status;
1762
1763	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
1764	}
1765}
1766
1767#endif /* USE_PROCFS */
1768
1769static void
1770cleanup(void)
1771{
1772	int i;
1773	struct tcb *tcp;
1774
1775	for (i = 0; i < tcbtabsize; i++) {
1776		tcp = tcbtab[i];
1777		if (!(tcp->flags & TCB_INUSE))
1778			continue;
1779		if (debug)
1780			fprintf(stderr,
1781				"cleanup: looking at pid %u\n", tcp->pid);
1782		if (tcp_last &&
1783		    (!outfname || followfork < 2 || tcp_last == tcp)) {
1784			tprintf(" <unfinished ...>");
1785			printtrailer();
1786		}
1787		if (tcp->flags & TCB_ATTACHED)
1788			detach(tcp, 0);
1789		else {
1790			kill(tcp->pid, SIGCONT);
1791			kill(tcp->pid, SIGTERM);
1792		}
1793	}
1794	if (cflag)
1795		call_summary(outf);
1796}
1797
1798static void
1799interrupt(int sig)
1800{
1801	interrupted = 1;
1802}
1803
1804#ifndef HAVE_STRERROR
1805
1806#if !HAVE_DECL_SYS_ERRLIST
1807extern int sys_nerr;
1808extern char *sys_errlist[];
1809#endif /* HAVE_DECL_SYS_ERRLIST */
1810
1811const char *
1812strerror(int err_no)
1813{
1814	static char buf[64];
1815
1816	if (err_no < 1 || err_no >= sys_nerr) {
1817		sprintf(buf, "Unknown error %d", err_no);
1818		return buf;
1819	}
1820	return sys_errlist[err_no];
1821}
1822
1823#endif /* HAVE_STERRROR */
1824
1825#ifndef HAVE_STRSIGNAL
1826
1827#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
1828extern char *sys_siglist[];
1829#endif
1830#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
1831extern char *_sys_siglist[];
1832#endif
1833
1834const char *
1835strsignal(int sig)
1836{
1837	static char buf[64];
1838
1839	if (sig < 1 || sig >= NSIG) {
1840		sprintf(buf, "Unknown signal %d", sig);
1841		return buf;
1842	}
1843#ifdef HAVE__SYS_SIGLIST
1844	return _sys_siglist[sig];
1845#else
1846	return sys_siglist[sig];
1847#endif
1848}
1849
1850#endif /* HAVE_STRSIGNAL */
1851
1852#ifdef USE_PROCFS
1853
1854static void
1855rebuild_pollv(void)
1856{
1857	int i, j;
1858
1859	free(pollv);
1860	pollv = malloc(nprocs * sizeof(pollv[0]));
1861	if (pollv == NULL) {
1862		error_msg_and_die("Out of memory");
1863	}
1864
1865	for (i = j = 0; i < tcbtabsize; i++) {
1866		struct tcb *tcp = tcbtab[i];
1867		if (!(tcp->flags & TCB_INUSE))
1868			continue;
1869		pollv[j].fd = tcp->pfd;
1870		pollv[j].events = POLLWANT;
1871		j++;
1872	}
1873	if (j != nprocs) {
1874		error_msg_and_die("proc miscount");
1875	}
1876}
1877
1878#ifndef HAVE_POLLABLE_PROCFS
1879
1880static void
1881proc_poll_open(void)
1882{
1883	int i;
1884
1885	if (pipe(proc_poll_pipe) < 0) {
1886		perror_msg_and_die("pipe");
1887	}
1888	for (i = 0; i < 2; i++) {
1889		set_cloexec_flag(proc_poll_pipe[i]);
1890	}
1891}
1892
1893static int
1894proc_poll(struct pollfd *pollv, int nfds, int timeout)
1895{
1896	int i;
1897	int n;
1898	struct proc_pollfd pollinfo;
1899
1900	n = read(proc_poll_pipe[0], &pollinfo, sizeof(pollinfo));
1901	if (n < 0)
1902		return n;
1903	if (n != sizeof(struct proc_pollfd)) {
1904		error_msg_and_die("panic: short read: %d", n);
1905	}
1906	for (i = 0; i < nprocs; i++) {
1907		if (pollv[i].fd == pollinfo.fd)
1908			pollv[i].revents = pollinfo.revents;
1909		else
1910			pollv[i].revents = 0;
1911	}
1912	poller_pid = pollinfo.pid;
1913	return 1;
1914}
1915
1916static void
1917wakeup_handler(int sig)
1918{
1919}
1920
1921static void
1922proc_poller(int pfd)
1923{
1924	struct proc_pollfd pollinfo;
1925	struct sigaction sa;
1926	sigset_t blocked_set, empty_set;
1927	int i;
1928	int n;
1929	struct rlimit rl;
1930#ifdef FREEBSD
1931	struct procfs_status pfs;
1932#endif /* FREEBSD */
1933
1934	switch (fork()) {
1935	case -1:
1936		perror_msg_and_die("fork");
1937	case 0:
1938		break;
1939	default:
1940		return;
1941	}
1942
1943	sa.sa_handler = interactive ? SIG_DFL : SIG_IGN;
1944	sa.sa_flags = 0;
1945	sigemptyset(&sa.sa_mask);
1946	sigaction(SIGHUP, &sa, NULL);
1947	sigaction(SIGINT, &sa, NULL);
1948	sigaction(SIGQUIT, &sa, NULL);
1949	sigaction(SIGPIPE, &sa, NULL);
1950	sigaction(SIGTERM, &sa, NULL);
1951	sa.sa_handler = wakeup_handler;
1952	sigaction(SIGUSR1, &sa, NULL);
1953	sigemptyset(&blocked_set);
1954	sigaddset(&blocked_set, SIGUSR1);
1955	sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1956	sigemptyset(&empty_set);
1957
1958	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
1959		perror_msg_and_die("getrlimit(RLIMIT_NOFILE, ...)");
1960	}
1961	n = rl.rlim_cur;
1962	for (i = 0; i < n; i++) {
1963		if (i != pfd && i != proc_poll_pipe[1])
1964			close(i);
1965	}
1966
1967	pollinfo.fd = pfd;
1968	pollinfo.pid = getpid();
1969	for (;;) {
1970#ifndef FREEBSD
1971		if (ioctl(pfd, PIOCWSTOP, NULL) < 0)
1972#else
1973		if (ioctl(pfd, PIOCWSTOP, &pfs) < 0)
1974#endif
1975		{
1976			switch (errno) {
1977			case EINTR:
1978				continue;
1979			case EBADF:
1980				pollinfo.revents = POLLERR;
1981				break;
1982			case ENOENT:
1983				pollinfo.revents = POLLHUP;
1984				break;
1985			default:
1986				perror("proc_poller: PIOCWSTOP");
1987			}
1988			write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
1989			_exit(0);
1990		}
1991		pollinfo.revents = POLLWANT;
1992		write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
1993		sigsuspend(&empty_set);
1994	}
1995}
1996
1997#endif /* !HAVE_POLLABLE_PROCFS */
1998
1999static int
2000choose_pfd()
2001{
2002	int i, j;
2003	struct tcb *tcp;
2004
2005	static int last;
2006
2007	if (followfork < 2 &&
2008	    last < nprocs && (pollv[last].revents & POLLWANT)) {
2009		/*
2010		 * The previous process is ready to run again.  We'll
2011		 * let it do so if it is currently in a syscall.  This
2012		 * heuristic improves the readability of the trace.
2013		 */
2014		tcp = pfd2tcb(pollv[last].fd);
2015		if (tcp && exiting(tcp))
2016			return pollv[last].fd;
2017	}
2018
2019	for (i = 0; i < nprocs; i++) {
2020		/* Let competing children run round robin. */
2021		j = (i + last + 1) % nprocs;
2022		if (pollv[j].revents & (POLLHUP | POLLERR)) {
2023			tcp = pfd2tcb(pollv[j].fd);
2024			if (!tcp) {
2025				error_msg_and_die("lost proc");
2026			}
2027			droptcb(tcp);
2028			return -1;
2029		}
2030		if (pollv[j].revents & POLLWANT) {
2031			last = j;
2032			return pollv[j].fd;
2033		}
2034	}
2035	error_msg_and_die("nothing ready");
2036}
2037
2038static int
2039trace(void)
2040{
2041#ifdef POLL_HACK
2042	struct tcb *in_syscall = NULL;
2043#endif
2044	struct tcb *tcp;
2045	int pfd;
2046	int what;
2047	int ioctl_result = 0, ioctl_errno = 0;
2048	long arg;
2049
2050	for (;;) {
2051		if (interactive)
2052			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2053
2054		if (nprocs == 0)
2055			break;
2056
2057		switch (nprocs) {
2058		case 1:
2059#ifndef HAVE_POLLABLE_PROCFS
2060			if (proc_poll_pipe[0] == -1) {
2061#endif
2062				tcp = first_used_tcb();
2063				if (!tcp)
2064					continue;
2065				pfd = tcp->pfd;
2066				if (pfd == -1)
2067					continue;
2068				break;
2069#ifndef HAVE_POLLABLE_PROCFS
2070			}
2071			/* fall through ... */
2072#endif /* !HAVE_POLLABLE_PROCFS */
2073		default:
2074#ifdef HAVE_POLLABLE_PROCFS
2075#ifdef POLL_HACK
2076		        /* On some systems (e.g. UnixWare) we get too much ugly
2077			   "unfinished..." stuff when multiple proceses are in
2078			   syscalls.  Here's a nasty hack */
2079
2080			if (in_syscall) {
2081				struct pollfd pv;
2082				tcp = in_syscall;
2083				in_syscall = NULL;
2084				pv.fd = tcp->pfd;
2085				pv.events = POLLWANT;
2086				what = poll(&pv, 1, 1);
2087				if (what < 0) {
2088					if (interrupted)
2089						return 0;
2090					continue;
2091				}
2092				else if (what == 1 && pv.revents & POLLWANT) {
2093					goto FOUND;
2094				}
2095			}
2096#endif
2097
2098			if (poll(pollv, nprocs, INFTIM) < 0) {
2099				if (interrupted)
2100					return 0;
2101				continue;
2102			}
2103#else /* !HAVE_POLLABLE_PROCFS */
2104			if (proc_poll(pollv, nprocs, INFTIM) < 0) {
2105				if (interrupted)
2106					return 0;
2107				continue;
2108			}
2109#endif /* !HAVE_POLLABLE_PROCFS */
2110			pfd = choose_pfd();
2111			if (pfd == -1)
2112				continue;
2113			break;
2114		}
2115
2116		/* Look up `pfd' in our table. */
2117		tcp = pfd2tcb(pfd);
2118		if (tcp == NULL) {
2119			error_msg_and_die("unknown pfd: %u", pfd);
2120		}
2121#ifdef POLL_HACK
2122	FOUND:
2123#endif
2124		/* Get the status of the process. */
2125		if (!interrupted) {
2126#ifndef FREEBSD
2127			ioctl_result = IOCTL_WSTOP(tcp);
2128#else /* FREEBSD */
2129			/* Thanks to some scheduling mystery, the first poller
2130			   sometimes waits for the already processed end of fork
2131			   event. Doing a non blocking poll here solves the problem. */
2132			if (proc_poll_pipe[0] != -1)
2133				ioctl_result = IOCTL_STATUS(tcp);
2134			else
2135				ioctl_result = IOCTL_WSTOP(tcp);
2136#endif /* FREEBSD */
2137			ioctl_errno = errno;
2138#ifndef HAVE_POLLABLE_PROCFS
2139			if (proc_poll_pipe[0] != -1) {
2140				if (ioctl_result < 0)
2141					kill(poller_pid, SIGKILL);
2142				else
2143					kill(poller_pid, SIGUSR1);
2144			}
2145#endif /* !HAVE_POLLABLE_PROCFS */
2146		}
2147		if (interrupted)
2148			return 0;
2149
2150		if (interactive)
2151			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2152
2153		if (ioctl_result < 0) {
2154			/* Find out what happened if it failed. */
2155			switch (ioctl_errno) {
2156			case EINTR:
2157			case EBADF:
2158				continue;
2159#ifdef FREEBSD
2160			case ENOTTY:
2161#endif
2162			case ENOENT:
2163				droptcb(tcp);
2164				continue;
2165			default:
2166				perror_msg_and_die("PIOCWSTOP");
2167			}
2168		}
2169
2170#ifdef FREEBSD
2171		if ((tcp->flags & TCB_STARTUP) && (tcp->status.PR_WHY == PR_SYSEXIT)) {
2172			/* discard first event for a syscall we never entered */
2173			IOCTL(tcp->pfd, PIOCRUN, 0);
2174			continue;
2175		}
2176#endif
2177
2178		/* clear the just started flag */
2179		tcp->flags &= ~TCB_STARTUP;
2180
2181		/* set current output file */
2182		outf = tcp->outf;
2183		curcol = tcp->curcol;
2184
2185		if (cflag) {
2186			struct timeval stime;
2187#ifdef FREEBSD
2188			char buf[1024];
2189			int len;
2190
2191			len = pread(tcp->pfd_status, buf, sizeof(buf) - 1, 0);
2192			if (len > 0) {
2193				buf[len] = '\0';
2194				sscanf(buf,
2195				       "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d %*d,%*d %ld,%ld",
2196				       &stime.tv_sec, &stime.tv_usec);
2197			} else
2198				stime.tv_sec = stime.tv_usec = 0;
2199#else /* !FREEBSD */
2200			stime.tv_sec = tcp->status.pr_stime.tv_sec;
2201			stime.tv_usec = tcp->status.pr_stime.tv_nsec/1000;
2202#endif /* !FREEBSD */
2203			tv_sub(&tcp->dtime, &stime, &tcp->stime);
2204			tcp->stime = stime;
2205		}
2206		what = tcp->status.PR_WHAT;
2207		switch (tcp->status.PR_WHY) {
2208#ifndef FREEBSD
2209		case PR_REQUESTED:
2210			if (tcp->status.PR_FLAGS & PR_ASLEEP) {
2211				tcp->status.PR_WHY = PR_SYSENTRY;
2212				if (trace_syscall(tcp) < 0) {
2213					error_msg_and_die("syscall trouble");
2214				}
2215			}
2216			break;
2217#endif /* !FREEBSD */
2218		case PR_SYSENTRY:
2219#ifdef POLL_HACK
2220		        in_syscall = tcp;
2221#endif
2222		case PR_SYSEXIT:
2223			if (trace_syscall(tcp) < 0) {
2224				error_msg_and_die("syscall trouble");
2225			}
2226			break;
2227		case PR_SIGNALLED:
2228			if (cflag != CFLAG_ONLY_STATS
2229			    && (qual_flags[what] & QUAL_SIGNAL)) {
2230				printleader(tcp);
2231				tprintf("--- %s (%s) ---",
2232					signame(what), strsignal(what));
2233				printtrailer();
2234#ifdef PR_INFO
2235				if (tcp->status.PR_INFO.si_signo == what) {
2236					printleader(tcp);
2237					tprintf("    siginfo=");
2238					printsiginfo(&tcp->status.PR_INFO, 1);
2239					printtrailer();
2240				}
2241#endif
2242			}
2243			break;
2244		case PR_FAULTED:
2245			if (cflag != CFLAGS_ONLY_STATS
2246			    && (qual_flags[what] & QUAL_FAULT)) {
2247				printleader(tcp);
2248				tprintf("=== FAULT %d ===", what);
2249				printtrailer();
2250			}
2251			break;
2252#ifdef FREEBSD
2253		case 0: /* handle case we polled for nothing */
2254			continue;
2255#endif
2256		default:
2257			error_msg_and_die("odd stop %d", tcp->status.PR_WHY);
2258			break;
2259		}
2260		/* Remember current print column before continuing. */
2261		tcp->curcol = curcol;
2262		arg = 0;
2263#ifndef FREEBSD
2264		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
2265#else
2266		if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
2267#endif
2268		{
2269			perror_msg_and_die("PIOCRUN");
2270		}
2271	}
2272	return 0;
2273}
2274
2275#else /* !USE_PROCFS */
2276
2277static int
2278trace()
2279{
2280	int pid;
2281	int wait_errno;
2282	int status;
2283	struct tcb *tcp;
2284#ifdef LINUX
2285	struct rusage ru;
2286	struct rusage *rup = cflag ? &ru : NULL;
2287# ifdef __WALL
2288	static int wait4_options = __WALL;
2289# endif
2290#endif /* LINUX */
2291
2292	while (nprocs != 0) {
2293		if (interrupted)
2294			return 0;
2295		if (interactive)
2296			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2297#ifdef LINUX
2298# ifdef __WALL
2299		pid = wait4(-1, &status, wait4_options, rup);
2300		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
2301			/* this kernel does not support __WALL */
2302			wait4_options &= ~__WALL;
2303			pid = wait4(-1, &status, wait4_options, rup);
2304		}
2305		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
2306			/* most likely a "cloned" process */
2307			pid = wait4(-1, &status, __WCLONE, rup);
2308			if (pid < 0) {
2309				perror_msg("wait4(__WCLONE) failed");
2310			}
2311		}
2312# else
2313		pid = wait4(-1, &status, 0, rup);
2314# endif /* __WALL */
2315#endif /* LINUX */
2316#ifdef SUNOS4
2317		pid = wait(&status);
2318#endif
2319		wait_errno = errno;
2320		if (interactive)
2321			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2322
2323		if (pid < 0) {
2324			switch (wait_errno) {
2325			case EINTR:
2326				continue;
2327			case ECHILD:
2328				/*
2329				 * We would like to verify this case
2330				 * but sometimes a race in Solbourne's
2331				 * version of SunOS sometimes reports
2332				 * ECHILD before sending us SIGCHILD.
2333				 */
2334				return 0;
2335			default:
2336				errno = wait_errno;
2337				perror("strace: wait");
2338				return -1;
2339			}
2340		}
2341		if (pid == popen_pid) {
2342			if (WIFEXITED(status) || WIFSIGNALED(status))
2343				popen_pid = 0;
2344			continue;
2345		}
2346		if (debug) {
2347			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
2348#ifdef LINUX
2349			unsigned ev = (unsigned)status >> 16;
2350			if (ev) {
2351				static const char *const event_names[] = {
2352					[PTRACE_EVENT_CLONE] = "CLONE",
2353					[PTRACE_EVENT_FORK]  = "FORK",
2354					[PTRACE_EVENT_VFORK] = "VFORK",
2355					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
2356					[PTRACE_EVENT_EXEC]  = "EXEC",
2357					[PTRACE_EVENT_EXIT]  = "EXIT",
2358				};
2359				const char *e;
2360				if (ev < ARRAY_SIZE(event_names))
2361					e = event_names[ev];
2362				else {
2363					sprintf(buf, "?? (%u)", ev);
2364					e = buf;
2365				}
2366				fprintf(stderr, " PTRACE_EVENT_%s", e);
2367			}
2368#endif
2369			strcpy(buf, "???");
2370			if (WIFSIGNALED(status))
2371#ifdef WCOREDUMP
2372				sprintf(buf, "WIFSIGNALED,%ssig=%s",
2373						WCOREDUMP(status) ? "core," : "",
2374						signame(WTERMSIG(status)));
2375#else
2376				sprintf(buf, "WIFSIGNALED,sig=%s",
2377						signame(WTERMSIG(status)));
2378#endif
2379			if (WIFEXITED(status))
2380				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
2381			if (WIFSTOPPED(status))
2382				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
2383#ifdef WIFCONTINUED
2384			if (WIFCONTINUED(status))
2385				strcpy(buf, "WIFCONTINUED");
2386#endif
2387			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
2388		}
2389
2390		/* Look up `pid' in our table. */
2391		tcp = pid2tcb(pid);
2392		if (tcp == NULL) {
2393#ifdef LINUX
2394			if (followfork) {
2395				/* This is needed to go with the CLONE_PTRACE
2396				   changes in process.c/util.c: we might see
2397				   the child's initial trap before we see the
2398				   parent return from the clone syscall.
2399				   Leave the child suspended until the parent
2400				   returns from its system call.  Only then
2401				   will we have the association of parent and
2402				   child so that we know how to do clearbpt
2403				   in the child.  */
2404				tcp = alloctcb(pid);
2405				tcp->flags |= TCB_ATTACHED;
2406				if (!qflag)
2407					fprintf(stderr, "Process %d attached\n",
2408						pid);
2409			}
2410			else
2411				/* This can happen if a clone call used
2412				   CLONE_PTRACE itself.  */
2413#endif
2414			{
2415				if (WIFSTOPPED(status))
2416					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
2417				error_msg_and_die("Unknown pid: %u", pid);
2418			}
2419		}
2420		/* set current output file */
2421		outf = tcp->outf;
2422		curcol = tcp->curcol;
2423#ifdef LINUX
2424		if (cflag) {
2425			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2426			tcp->stime = ru.ru_stime;
2427		}
2428#endif
2429
2430		if (WIFSIGNALED(status)) {
2431			if (pid == strace_child)
2432				exit_code = 0x100 | WTERMSIG(status);
2433			if (cflag != CFLAG_ONLY_STATS
2434			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2435				printleader(tcp);
2436#ifdef WCOREDUMP
2437				tprintf("+++ killed by %s %s+++",
2438					signame(WTERMSIG(status)),
2439					WCOREDUMP(status) ? "(core dumped) " : "");
2440#else
2441				tprintf("+++ killed by %s +++",
2442					signame(WTERMSIG(status)));
2443#endif
2444				printtrailer();
2445			}
2446			droptcb(tcp);
2447			continue;
2448		}
2449		if (WIFEXITED(status)) {
2450			if (pid == strace_child)
2451				exit_code = WEXITSTATUS(status);
2452			if (tcp == tcp_last) {
2453				if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
2454					tprintf(" <unfinished ... exit status %d>\n",
2455						WEXITSTATUS(status));
2456				tcp_last = NULL;
2457			}
2458			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
2459				printleader(tcp);
2460				tprintf("+++ exited with %d +++", WEXITSTATUS(status));
2461				printtrailer();
2462			}
2463			droptcb(tcp);
2464			continue;
2465		}
2466		if (!WIFSTOPPED(status)) {
2467			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2468			droptcb(tcp);
2469			continue;
2470		}
2471
2472		if (status >> 16) {
2473			/* Ptrace event (we ignore all of them for now) */
2474			goto tracing;
2475		}
2476
2477		/*
2478		 * Interestingly, the process may stop
2479		 * with STOPSIG equal to some other signal
2480		 * than SIGSTOP if we happend to attach
2481		 * just before the process takes a signal.
2482		 * A no-MMU vforked child won't send up a signal,
2483		 * so skip the first (lost) execve notification.
2484		 */
2485		if ((tcp->flags & TCB_STARTUP) &&
2486		    (WSTOPSIG(status) == SIGSTOP || strace_vforked)) {
2487			/*
2488			 * This flag is there to keep us in sync.
2489			 * Next time this process stops it should
2490			 * really be entering a system call.
2491			 */
2492			tcp->flags &= ~TCB_STARTUP;
2493			if (tcp->flags & TCB_BPTSET) {
2494				/*
2495				 * One example is a breakpoint inherited from
2496				 * parent through fork().
2497				 */
2498				if (clearbpt(tcp) < 0) /* Pretty fatal */ {
2499					droptcb(tcp);
2500					cleanup();
2501					return -1;
2502				}
2503			}
2504#ifdef LINUX
2505			if (ptrace_setoptions) {
2506				if (debug)
2507					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2508				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2509					if (errno != ESRCH) {
2510						/* Should never happen, really */
2511						perror_msg_and_die("PTRACE_SETOPTIONS");
2512					}
2513				}
2514			}
2515#endif
2516			goto tracing;
2517		}
2518
2519		if (WSTOPSIG(status) != syscall_trap_sig) {
2520			if (WSTOPSIG(status) == SIGSTOP &&
2521					(tcp->flags & TCB_SIGTRAPPED)) {
2522				/*
2523				 * Trapped attempt to block SIGTRAP
2524				 * Hope we are back in control now.
2525				 */
2526				tcp->flags &= ~(TCB_INSYSCALL | TCB_SIGTRAPPED);
2527				if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2528					cleanup();
2529					return -1;
2530				}
2531				continue;
2532			}
2533			if (cflag != CFLAG_ONLY_STATS
2534			    && (qual_flags[WSTOPSIG(status)] & QUAL_SIGNAL)) {
2535				siginfo_t si;
2536#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2537				long pc = 0;
2538				long psr = 0;
2539
2540				upeek(tcp, PT_CR_IPSR, &psr);
2541				upeek(tcp, PT_CR_IIP, &pc);
2542
2543# define PSR_RI	41
2544				pc += (psr >> PSR_RI) & 0x3;
2545# define PC_FORMAT_STR	" @ %lx"
2546# define PC_FORMAT_ARG	, pc
2547#else
2548# define PC_FORMAT_STR	""
2549# define PC_FORMAT_ARG	/* nothing */
2550#endif
2551				printleader(tcp);
2552				if (ptrace(PTRACE_GETSIGINFO, pid, 0, &si) == 0) {
2553					tprintf("--- ");
2554					printsiginfo(&si, verbose(tcp));
2555					tprintf(" (%s)" PC_FORMAT_STR " ---",
2556						strsignal(WSTOPSIG(status))
2557						PC_FORMAT_ARG);
2558				} else
2559					tprintf("--- %s by %s" PC_FORMAT_STR " ---",
2560						strsignal(WSTOPSIG(status)),
2561						signame(WSTOPSIG(status))
2562						PC_FORMAT_ARG);
2563				printtrailer();
2564			}
2565			if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
2566				cleanup();
2567				return -1;
2568			}
2569			continue;
2570		}
2571
2572		/* We handled quick cases, we are permitted to interrupt now. */
2573		if (interrupted)
2574			return 0;
2575
2576		/* This should be syscall entry or exit.
2577		 * (Or it still can be that pesky post-execve SIGTRAP!)
2578		 * Handle it.
2579		 */
2580		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2581			/* ptrace() failed in trace_syscall() with ESRCH.
2582			 * Likely a result of process disappearing mid-flight.
2583			 * Observed case: exit_group() terminating
2584			 * all processes in thread group. In this case, threads
2585			 * "disappear" in an unpredictable moment without any
2586			 * notification to strace via wait().
2587			 */
2588			if (tcp->flags & TCB_ATTACHED) {
2589				if (tcp_last) {
2590					/* Do we have dangling line "syscall(param, param"?
2591					 * Finish the line then.
2592					 */
2593					tcp_last->flags |= TCB_REPRINT;
2594					tprintf(" <unfinished ...>");
2595					printtrailer();
2596				}
2597				detach(tcp, 0);
2598			} else {
2599				ptrace(PTRACE_KILL,
2600					tcp->pid, (char *) 1, SIGTERM);
2601				droptcb(tcp);
2602			}
2603			continue;
2604		}
2605	tracing:
2606		/* Remember current print column before continuing. */
2607		tcp->curcol = curcol;
2608		if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2609			cleanup();
2610			return -1;
2611		}
2612	}
2613	return 0;
2614}
2615
2616#endif /* !USE_PROCFS */
2617
2618void
2619tprintf(const char *fmt, ...)
2620{
2621	va_list args;
2622
2623	va_start(args, fmt);
2624	if (outf) {
2625		int n = vfprintf(outf, fmt, args);
2626		if (n < 0) {
2627			if (outf != stderr)
2628				perror(outfname == NULL
2629				       ? "<writing to pipe>" : outfname);
2630		} else
2631			curcol += n;
2632	}
2633	va_end(args);
2634	return;
2635}
2636
2637void
2638printleader(struct tcb *tcp)
2639{
2640	if (tcp_last) {
2641		if (tcp_last->ptrace_errno) {
2642			if (tcp_last->flags & TCB_INSYSCALL) {
2643				tprintf(" <unavailable>) ");
2644				tabto(acolumn);
2645			}
2646			tprintf("= ? <unavailable>\n");
2647			tcp_last->ptrace_errno = 0;
2648		} else if (!outfname || followfork < 2 || tcp_last == tcp) {
2649			tcp_last->flags |= TCB_REPRINT;
2650			tprintf(" <unfinished ...>\n");
2651		}
2652	}
2653	curcol = 0;
2654	if ((followfork == 1 || pflag_seen > 1) && outfname)
2655		tprintf("%-5d ", tcp->pid);
2656	else if (nprocs > 1 && !outfname)
2657		tprintf("[pid %5u] ", tcp->pid);
2658	if (tflag) {
2659		char str[sizeof("HH:MM:SS")];
2660		struct timeval tv, dtv;
2661		static struct timeval otv;
2662
2663		gettimeofday(&tv, NULL);
2664		if (rflag) {
2665			if (otv.tv_sec == 0)
2666				otv = tv;
2667			tv_sub(&dtv, &tv, &otv);
2668			tprintf("%6ld.%06ld ",
2669				(long) dtv.tv_sec, (long) dtv.tv_usec);
2670			otv = tv;
2671		}
2672		else if (tflag > 2) {
2673			tprintf("%ld.%06ld ",
2674				(long) tv.tv_sec, (long) tv.tv_usec);
2675		}
2676		else {
2677			time_t local = tv.tv_sec;
2678			strftime(str, sizeof(str), "%T", localtime(&local));
2679			if (tflag > 1)
2680				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
2681			else
2682				tprintf("%s ", str);
2683		}
2684	}
2685	if (iflag)
2686		printcall(tcp);
2687}
2688
2689void
2690tabto(int col)
2691{
2692	if (curcol < col)
2693		tprintf("%*s", col - curcol, "");
2694}
2695
2696void
2697printtrailer(void)
2698{
2699	tprintf("\n");
2700	tcp_last = NULL;
2701}
2702
2703#ifdef HAVE_MP_PROCFS
2704
2705int
2706mp_ioctl(int fd, int cmd, void *arg, int size)
2707{
2708	struct iovec iov[2];
2709	int n = 1;
2710
2711	iov[0].iov_base = &cmd;
2712	iov[0].iov_len = sizeof cmd;
2713	if (arg) {
2714		++n;
2715		iov[1].iov_base = arg;
2716		iov[1].iov_len = size;
2717	}
2718
2719	return writev(fd, iov, n);
2720}
2721
2722#endif
2723